Scalability Patterns¶

Scalability Overview¶

Scalability Fundamentals

Load Balancing¶

Load Balancing Patterns

Caching Patterns¶

Caching Strategies

Cache Implementation¶

@Service
public class UserService {

    private final UserRepository userRepository;
    private final RedisTemplate<String, User> redisTemplate;
    private static final Duration CACHE_TTL = Duration.ofMinutes(15);

    // Cache-Aside pattern
    public User getUser(Long userId) {
        String cacheKey = "user:" + userId;

        // Check cache first
        User cached = redisTemplate.opsForValue().get(cacheKey);
        if (cached != null) {
            return cached;
        }

        // Cache miss - fetch from DB
        User user = userRepository.findById(userId)
            .orElseThrow(() -> new UserNotFoundException(userId));

        // Populate cache
        redisTemplate.opsForValue().set(cacheKey, user, CACHE_TTL);

        return user;
    }

    // Write-Through pattern
    public User updateUser(Long userId, UserUpdateRequest request) {
        User user = userRepository.findById(userId)
            .orElseThrow(() -> new UserNotFoundException(userId));

        user.setName(request.getName());
        user.setEmail(request.getEmail());

        // Write to DB
        user = userRepository.save(user);

        // Write to cache
        String cacheKey = "user:" + userId;
        redisTemplate.opsForValue().set(cacheKey, user, CACHE_TTL);

        return user;
    }

    // Cache invalidation
    public void deleteUser(Long userId) {
        userRepository.deleteById(userId);
        redisTemplate.delete("user:" + userId);
    }
}

Multi-Level Caching¶

The caching strategies diagram above shows the multi-level cache architecture with L1 (local), L2 (distributed), and database layers.

@Service
public class MultiLevelCacheService<K, V> {

    private final Cache<K, V> localCache;  // Caffeine
    private final RedisTemplate<K, V> redisTemplate;
    private final Duration localTtl = Duration.ofMinutes(5);
    private final Duration redisTtl = Duration.ofMinutes(30);

    public MultiLevelCacheService() {
        this.localCache = Caffeine.newBuilder()
            .maximumSize(10_000)
            .expireAfterWrite(localTtl)
            .build();
    }

    public V get(K key, Function<K, V> loader) {
        // L1: Check local cache
        V value = localCache.getIfPresent(key);
        if (value != null) {
            return value;
        }

        // L2: Check Redis
        value = redisTemplate.opsForValue().get(key);
        if (value != null) {
            localCache.put(key, value);
            return value;
        }

        // Load from source
        value = loader.apply(key);
        if (value != null) {
            put(key, value);
        }

        return value;
    }

    public void put(K key, V value) {
        localCache.put(key, value);
        redisTemplate.opsForValue().set(key, value, redisTtl);
    }

    public void invalidate(K key) {
        localCache.invalidate(key);
        redisTemplate.delete(key);
        // Publish invalidation event for other instances
        redisTemplate.convertAndSend("cache-invalidation", key.toString());
    }
}

Database Scaling¶

Database Scaling Patterns

Read/Write Splitting¶

@Configuration
public class DataSourceConfig {

    @Bean
    @Primary
    public DataSource routingDataSource(
            @Qualifier("writerDataSource") DataSource writer,
            @Qualifier("readerDataSource") DataSource reader) {

        Map<Object, Object> targetDataSources = new HashMap<>();
        targetDataSources.put(DataSourceType.WRITER, writer);
        targetDataSources.put(DataSourceType.READER, reader);

        RoutingDataSource routingDataSource = new RoutingDataSource();
        routingDataSource.setTargetDataSources(targetDataSources);
        routingDataSource.setDefaultTargetDataSource(writer);

        return routingDataSource;
    }
}

public class RoutingDataSource extends AbstractRoutingDataSource {

    @Override
    protected Object determineCurrentLookupKey() {
        return DataSourceContextHolder.getDataSourceType();
    }
}

// Aspect for automatic routing
@Aspect
@Component
public class ReadOnlyRoutingAspect {

    @Around("@annotation(org.springframework.transaction.annotation.Transactional)")
    public Object route(ProceedingJoinPoint joinPoint) throws Throwable {
        Transactional transactional = getTransactionalAnnotation(joinPoint);

        if (transactional != null && transactional.readOnly()) {
            DataSourceContextHolder.setDataSourceType(DataSourceType.READER);
        } else {
            DataSourceContextHolder.setDataSourceType(DataSourceType.WRITER);
        }

        try {
            return joinPoint.proceed();
        } finally {
            DataSourceContextHolder.clear();
        }
    }
}

Message Queue Scaling¶

Queue-Based Load Leveling

Competing Consumers Pattern

Connection Pooling¶

Connection Pooling

# HikariCP Configuration
spring:
  datasource:
    hikari:
      minimum-idle: 10
      maximum-pool-size: 50
      connection-timeout: 20000    # 20 seconds
      idle-timeout: 300000         # 5 minutes
      max-lifetime: 1200000        # 20 minutes
      pool-name: MyAppPool

# Sizing formula:
# connections = (core_count * 2) + effective_spindle_count
# For SSD with 4 cores: (4 * 2) + 1 = 9 connections minimum

Rate Limiting¶

Rate Limiting Algorithms

Rate Limiter Implementation¶

@Component
public class RedisRateLimiter {

    private final RedisTemplate<String, String> redisTemplate;
    private final int maxRequests;
    private final Duration window;

    // Sliding window rate limiter using Redis
    public boolean isAllowed(String key) {
        String redisKey = "ratelimit:" + key;
        long now = System.currentTimeMillis();
        long windowStart = now - window.toMillis();

        // Lua script for atomic operation
        String script = """
            -- Remove old entries
            redis.call('ZREMRANGEBYSCORE', KEYS[1], '-inf', ARGV[1])
            -- Count current entries
            local count = redis.call('ZCARD', KEYS[1])
            if count < tonumber(ARGV[2]) then
                -- Add current request
                redis.call('ZADD', KEYS[1], ARGV[3], ARGV[3])
                redis.call('EXPIRE', KEYS[1], ARGV[4])
                return 1
            end
            return 0
            """;

        Long result = redisTemplate.execute(
            new DefaultRedisScript<>(script, Long.class),
            List.of(redisKey),
            String.valueOf(windowStart),
            String.valueOf(maxRequests),
            String.valueOf(now),
            String.valueOf(window.getSeconds())
        );

        return result != null && result == 1;
    }
}

// Annotation-based rate limiting
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface RateLimit {
    int requests() default 100;
    int seconds() default 60;
}

@Aspect
@Component
public class RateLimitAspect {

    @Autowired
    private RedisRateLimiter rateLimiter;

    @Around("@annotation(rateLimit)")
    public Object checkRateLimit(ProceedingJoinPoint joinPoint,
                                  RateLimit rateLimit) throws Throwable {
        String key = extractKey(joinPoint);

        if (!rateLimiter.isAllowed(key)) {
            throw new RateLimitExceededException("Rate limit exceeded");
        }

        return joinPoint.proceed();
    }
}

Auto-Scaling Patterns¶

Auto-Scaling Strategies

# Kubernetes HPA
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: myapp-hpa
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: myapp
  minReplicas: 2
  maxReplicas: 20
  metrics:
    - type: Resource
      resource:
        name: cpu
        target:
          type: Utilization
          averageUtilization: 70
    - type: Resource
      resource:
        name: memory
        target:
          type: Utilization
          averageUtilization: 80
    - type: Pods
      pods:
        metric:
          name: http_requests_per_second
        target:
          type: AverageValue
          averageValue: 1000
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300
      policies:
        - type: Percent
          value: 10
          periodSeconds: 60
    scaleUp:
      stabilizationWindowSeconds: 0
      policies:
        - type: Percent
          value: 100
          periodSeconds: 15

CDN & Edge Computing¶

CDN Architecture

Common Interview Questions¶

How to scale a read-heavy application?
Add read replicas
Implement caching (Redis, CDN)
Use async processing
Consider CQRS
How to handle thundering herd?
Cache stampede protection
Request coalescing
Probabilistic early expiration
Circuit breakers
Vertical vs horizontal scaling?
Vertical: Simpler, has limits, SPOF
Horizontal: Complex, unlimited, fault tolerant
How to choose sharding key?
High cardinality
Even distribution
Query patterns
Avoid cross-shard queries
How to handle hot spots?
Add salt to keys
Split hot partitions
Use write sharding
Cache hot data