Skip to content

Scalability Patterns

Scalability Overview

Scalability Fundamentals


Load Balancing

Load Balancing Patterns


Caching Patterns

Caching Strategies

Cache Implementation

@Service
public class UserService {

    private final UserRepository userRepository;
    private final RedisTemplate<String, User> redisTemplate;
    private static final Duration CACHE_TTL = Duration.ofMinutes(15);

    // Cache-Aside pattern
    public User getUser(Long userId) {
        String cacheKey = "user:" + userId;

        // Check cache first
        User cached = redisTemplate.opsForValue().get(cacheKey);
        if (cached != null) {
            return cached;
        }

        // Cache miss - fetch from DB
        User user = userRepository.findById(userId)
            .orElseThrow(() -> new UserNotFoundException(userId));

        // Populate cache
        redisTemplate.opsForValue().set(cacheKey, user, CACHE_TTL);

        return user;
    }

    // Write-Through pattern
    public User updateUser(Long userId, UserUpdateRequest request) {
        User user = userRepository.findById(userId)
            .orElseThrow(() -> new UserNotFoundException(userId));

        user.setName(request.getName());
        user.setEmail(request.getEmail());

        // Write to DB
        user = userRepository.save(user);

        // Write to cache
        String cacheKey = "user:" + userId;
        redisTemplate.opsForValue().set(cacheKey, user, CACHE_TTL);

        return user;
    }

    // Cache invalidation
    public void deleteUser(Long userId) {
        userRepository.deleteById(userId);
        redisTemplate.delete("user:" + userId);
    }
}

Multi-Level Caching

The caching strategies diagram above shows the multi-level cache architecture with L1 (local), L2 (distributed), and database layers.

@Service
public class MultiLevelCacheService<K, V> {

    private final Cache<K, V> localCache;  // Caffeine
    private final RedisTemplate<K, V> redisTemplate;
    private final Duration localTtl = Duration.ofMinutes(5);
    private final Duration redisTtl = Duration.ofMinutes(30);

    public MultiLevelCacheService() {
        this.localCache = Caffeine.newBuilder()
            .maximumSize(10_000)
            .expireAfterWrite(localTtl)
            .build();
    }

    public V get(K key, Function<K, V> loader) {
        // L1: Check local cache
        V value = localCache.getIfPresent(key);
        if (value != null) {
            return value;
        }

        // L2: Check Redis
        value = redisTemplate.opsForValue().get(key);
        if (value != null) {
            localCache.put(key, value);
            return value;
        }

        // Load from source
        value = loader.apply(key);
        if (value != null) {
            put(key, value);
        }

        return value;
    }

    public void put(K key, V value) {
        localCache.put(key, value);
        redisTemplate.opsForValue().set(key, value, redisTtl);
    }

    public void invalidate(K key) {
        localCache.invalidate(key);
        redisTemplate.delete(key);
        // Publish invalidation event for other instances
        redisTemplate.convertAndSend("cache-invalidation", key.toString());
    }
}

Database Scaling

Database Scaling Patterns

Read/Write Splitting

@Configuration
public class DataSourceConfig {

    @Bean
    @Primary
    public DataSource routingDataSource(
            @Qualifier("writerDataSource") DataSource writer,
            @Qualifier("readerDataSource") DataSource reader) {

        Map<Object, Object> targetDataSources = new HashMap<>();
        targetDataSources.put(DataSourceType.WRITER, writer);
        targetDataSources.put(DataSourceType.READER, reader);

        RoutingDataSource routingDataSource = new RoutingDataSource();
        routingDataSource.setTargetDataSources(targetDataSources);
        routingDataSource.setDefaultTargetDataSource(writer);

        return routingDataSource;
    }
}

public class RoutingDataSource extends AbstractRoutingDataSource {

    @Override
    protected Object determineCurrentLookupKey() {
        return DataSourceContextHolder.getDataSourceType();
    }
}

// Aspect for automatic routing
@Aspect
@Component
public class ReadOnlyRoutingAspect {

    @Around("@annotation(org.springframework.transaction.annotation.Transactional)")
    public Object route(ProceedingJoinPoint joinPoint) throws Throwable {
        Transactional transactional = getTransactionalAnnotation(joinPoint);

        if (transactional != null && transactional.readOnly()) {
            DataSourceContextHolder.setDataSourceType(DataSourceType.READER);
        } else {
            DataSourceContextHolder.setDataSourceType(DataSourceType.WRITER);
        }

        try {
            return joinPoint.proceed();
        } finally {
            DataSourceContextHolder.clear();
        }
    }
}

Message Queue Scaling

Queue-Based Load Leveling

Competing Consumers Pattern


Connection Pooling

Connection Pooling

# HikariCP Configuration
spring:
  datasource:
    hikari:
      minimum-idle: 10
      maximum-pool-size: 50
      connection-timeout: 20000    # 20 seconds
      idle-timeout: 300000         # 5 minutes
      max-lifetime: 1200000        # 20 minutes
      pool-name: MyAppPool

# Sizing formula:
# connections = (core_count * 2) + effective_spindle_count
# For SSD with 4 cores: (4 * 2) + 1 = 9 connections minimum

Rate Limiting

Rate Limiting Algorithms

Rate Limiter Implementation

@Component
public class RedisRateLimiter {

    private final RedisTemplate<String, String> redisTemplate;
    private final int maxRequests;
    private final Duration window;

    // Sliding window rate limiter using Redis
    public boolean isAllowed(String key) {
        String redisKey = "ratelimit:" + key;
        long now = System.currentTimeMillis();
        long windowStart = now - window.toMillis();

        // Lua script for atomic operation
        String script = """
            -- Remove old entries
            redis.call('ZREMRANGEBYSCORE', KEYS[1], '-inf', ARGV[1])
            -- Count current entries
            local count = redis.call('ZCARD', KEYS[1])
            if count < tonumber(ARGV[2]) then
                -- Add current request
                redis.call('ZADD', KEYS[1], ARGV[3], ARGV[3])
                redis.call('EXPIRE', KEYS[1], ARGV[4])
                return 1
            end
            return 0
            """;

        Long result = redisTemplate.execute(
            new DefaultRedisScript<>(script, Long.class),
            List.of(redisKey),
            String.valueOf(windowStart),
            String.valueOf(maxRequests),
            String.valueOf(now),
            String.valueOf(window.getSeconds())
        );

        return result != null && result == 1;
    }
}

// Annotation-based rate limiting
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface RateLimit {
    int requests() default 100;
    int seconds() default 60;
}

@Aspect
@Component
public class RateLimitAspect {

    @Autowired
    private RedisRateLimiter rateLimiter;

    @Around("@annotation(rateLimit)")
    public Object checkRateLimit(ProceedingJoinPoint joinPoint,
                                  RateLimit rateLimit) throws Throwable {
        String key = extractKey(joinPoint);

        if (!rateLimiter.isAllowed(key)) {
            throw new RateLimitExceededException("Rate limit exceeded");
        }

        return joinPoint.proceed();
    }
}

Auto-Scaling Patterns

Auto-Scaling Strategies

# Kubernetes HPA
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: myapp-hpa
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: myapp
  minReplicas: 2
  maxReplicas: 20
  metrics:
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: 70
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: 80
    - type: Pods
      pods:
        metric:
          name: http_requests_per_second
        target:
          type: AverageValue
          averageValue: 1000
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300
      policies:
        - type: Percent
          value: 10
          periodSeconds: 60
    scaleUp:
      stabilizationWindowSeconds: 0
      policies:
        - type: Percent
          value: 100
          periodSeconds: 15

CDN & Edge Computing

CDN Architecture


Common Interview Questions

  1. How to scale a read-heavy application?
  2. Add read replicas
  3. Implement caching (Redis, CDN)
  4. Use async processing
  5. Consider CQRS

  6. How to handle thundering herd?

  7. Cache stampede protection
  8. Request coalescing
  9. Probabilistic early expiration
  10. Circuit breakers

  11. Vertical vs horizontal scaling?

  12. Vertical: Simpler, has limits, SPOF
  13. Horizontal: Complex, unlimited, fault tolerant

  14. How to choose sharding key?

  15. High cardinality
  16. Even distribution
  17. Query patterns
  18. Avoid cross-shard queries

  19. How to handle hot spots?

  20. Add salt to keys
  21. Split hot partitions
  22. Use write sharding
  23. Cache hot data