Scalability Patterns¶
Scalability Overview¶
Load Balancing¶
Caching Patterns¶
Cache Implementation¶
@Service
public class UserService {
private final UserRepository userRepository;
private final RedisTemplate<String, User> redisTemplate;
private static final Duration CACHE_TTL = Duration.ofMinutes(15);
// Cache-Aside pattern
public User getUser(Long userId) {
String cacheKey = "user:" + userId;
// Check cache first
User cached = redisTemplate.opsForValue().get(cacheKey);
if (cached != null) {
return cached;
}
// Cache miss - fetch from DB
User user = userRepository.findById(userId)
.orElseThrow(() -> new UserNotFoundException(userId));
// Populate cache
redisTemplate.opsForValue().set(cacheKey, user, CACHE_TTL);
return user;
}
// Write-Through pattern
public User updateUser(Long userId, UserUpdateRequest request) {
User user = userRepository.findById(userId)
.orElseThrow(() -> new UserNotFoundException(userId));
user.setName(request.getName());
user.setEmail(request.getEmail());
// Write to DB
user = userRepository.save(user);
// Write to cache
String cacheKey = "user:" + userId;
redisTemplate.opsForValue().set(cacheKey, user, CACHE_TTL);
return user;
}
// Cache invalidation
public void deleteUser(Long userId) {
userRepository.deleteById(userId);
redisTemplate.delete("user:" + userId);
}
}
Multi-Level Caching¶
The caching strategies diagram above shows the multi-level cache architecture with L1 (local), L2 (distributed), and database layers.
@Service
public class MultiLevelCacheService<K, V> {
private final Cache<K, V> localCache; // Caffeine
private final RedisTemplate<K, V> redisTemplate;
private final Duration localTtl = Duration.ofMinutes(5);
private final Duration redisTtl = Duration.ofMinutes(30);
public MultiLevelCacheService() {
this.localCache = Caffeine.newBuilder()
.maximumSize(10_000)
.expireAfterWrite(localTtl)
.build();
}
public V get(K key, Function<K, V> loader) {
// L1: Check local cache
V value = localCache.getIfPresent(key);
if (value != null) {
return value;
}
// L2: Check Redis
value = redisTemplate.opsForValue().get(key);
if (value != null) {
localCache.put(key, value);
return value;
}
// Load from source
value = loader.apply(key);
if (value != null) {
put(key, value);
}
return value;
}
public void put(K key, V value) {
localCache.put(key, value);
redisTemplate.opsForValue().set(key, value, redisTtl);
}
public void invalidate(K key) {
localCache.invalidate(key);
redisTemplate.delete(key);
// Publish invalidation event for other instances
redisTemplate.convertAndSend("cache-invalidation", key.toString());
}
}
Database Scaling¶
Read/Write Splitting¶
@Configuration
public class DataSourceConfig {
@Bean
@Primary
public DataSource routingDataSource(
@Qualifier("writerDataSource") DataSource writer,
@Qualifier("readerDataSource") DataSource reader) {
Map<Object, Object> targetDataSources = new HashMap<>();
targetDataSources.put(DataSourceType.WRITER, writer);
targetDataSources.put(DataSourceType.READER, reader);
RoutingDataSource routingDataSource = new RoutingDataSource();
routingDataSource.setTargetDataSources(targetDataSources);
routingDataSource.setDefaultTargetDataSource(writer);
return routingDataSource;
}
}
public class RoutingDataSource extends AbstractRoutingDataSource {
@Override
protected Object determineCurrentLookupKey() {
return DataSourceContextHolder.getDataSourceType();
}
}
// Aspect for automatic routing
@Aspect
@Component
public class ReadOnlyRoutingAspect {
@Around("@annotation(org.springframework.transaction.annotation.Transactional)")
public Object route(ProceedingJoinPoint joinPoint) throws Throwable {
Transactional transactional = getTransactionalAnnotation(joinPoint);
if (transactional != null && transactional.readOnly()) {
DataSourceContextHolder.setDataSourceType(DataSourceType.READER);
} else {
DataSourceContextHolder.setDataSourceType(DataSourceType.WRITER);
}
try {
return joinPoint.proceed();
} finally {
DataSourceContextHolder.clear();
}
}
}
Message Queue Scaling¶
Connection Pooling¶
# HikariCP Configuration
spring:
datasource:
hikari:
minimum-idle: 10
maximum-pool-size: 50
connection-timeout: 20000 # 20 seconds
idle-timeout: 300000 # 5 minutes
max-lifetime: 1200000 # 20 minutes
pool-name: MyAppPool
# Sizing formula:
# connections = (core_count * 2) + effective_spindle_count
# For SSD with 4 cores: (4 * 2) + 1 = 9 connections minimum
Rate Limiting¶
Rate Limiter Implementation¶
@Component
public class RedisRateLimiter {
private final RedisTemplate<String, String> redisTemplate;
private final int maxRequests;
private final Duration window;
// Sliding window rate limiter using Redis
public boolean isAllowed(String key) {
String redisKey = "ratelimit:" + key;
long now = System.currentTimeMillis();
long windowStart = now - window.toMillis();
// Lua script for atomic operation
String script = """
-- Remove old entries
redis.call('ZREMRANGEBYSCORE', KEYS[1], '-inf', ARGV[1])
-- Count current entries
local count = redis.call('ZCARD', KEYS[1])
if count < tonumber(ARGV[2]) then
-- Add current request
redis.call('ZADD', KEYS[1], ARGV[3], ARGV[3])
redis.call('EXPIRE', KEYS[1], ARGV[4])
return 1
end
return 0
""";
Long result = redisTemplate.execute(
new DefaultRedisScript<>(script, Long.class),
List.of(redisKey),
String.valueOf(windowStart),
String.valueOf(maxRequests),
String.valueOf(now),
String.valueOf(window.getSeconds())
);
return result != null && result == 1;
}
}
// Annotation-based rate limiting
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface RateLimit {
int requests() default 100;
int seconds() default 60;
}
@Aspect
@Component
public class RateLimitAspect {
@Autowired
private RedisRateLimiter rateLimiter;
@Around("@annotation(rateLimit)")
public Object checkRateLimit(ProceedingJoinPoint joinPoint,
RateLimit rateLimit) throws Throwable {
String key = extractKey(joinPoint);
if (!rateLimiter.isAllowed(key)) {
throw new RateLimitExceededException("Rate limit exceeded");
}
return joinPoint.proceed();
}
}
Auto-Scaling Patterns¶
# Kubernetes HPA
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: myapp-hpa
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: myapp
minReplicas: 2
maxReplicas: 20
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
- type: Pods
pods:
metric:
name: http_requests_per_second
target:
type: AverageValue
averageValue: 1000
behavior:
scaleDown:
stabilizationWindowSeconds: 300
policies:
- type: Percent
value: 10
periodSeconds: 60
scaleUp:
stabilizationWindowSeconds: 0
policies:
- type: Percent
value: 100
periodSeconds: 15
CDN & Edge Computing¶
Common Interview Questions¶
- How to scale a read-heavy application?
- Add read replicas
- Implement caching (Redis, CDN)
- Use async processing
-
Consider CQRS
-
How to handle thundering herd?
- Cache stampede protection
- Request coalescing
- Probabilistic early expiration
-
Circuit breakers
-
Vertical vs horizontal scaling?
- Vertical: Simpler, has limits, SPOF
-
Horizontal: Complex, unlimited, fault tolerant
-
How to choose sharding key?
- High cardinality
- Even distribution
- Query patterns
-
Avoid cross-shard queries
-
How to handle hot spots?
- Add salt to keys
- Split hot partitions
- Use write sharding
- Cache hot data