Rate Limiting
Definition

Algorithms

Implementation
// TOKEN BUCKET (Redis-based for distributed)
public class TokenBucketRateLimiter {
private final RedisTemplate<String, String> redis;
private final int capacity; // Max tokens
private final int refillRate; // Tokens per second
public boolean tryConsume(String key) {
String bucketKey = "rate_limit:" + key;
// Lua script for atomic operation
String script = """
local tokens = tonumber(redis.call('get', KEYS[1]) or ARGV[1])
local lastRefill = tonumber(redis.call('get', KEYS[2]) or ARGV[2])
local now = tonumber(ARGV[2])
local rate = tonumber(ARGV[3])
local capacity = tonumber(ARGV[1])
-- Refill tokens
local elapsed = now - lastRefill
tokens = math.min(capacity, tokens + elapsed * rate)
if tokens >= 1 then
tokens = tokens - 1
redis.call('set', KEYS[1], tokens)
redis.call('set', KEYS[2], now)
return 1
else
return 0
end
""";
Long result = redis.execute(/* script */);
return result == 1;
}
}
// SLIDING WINDOW (simpler implementation)
public class SlidingWindowRateLimiter {
private final Map<String, Deque<Long>> requestLog = new ConcurrentHashMap<>();
private final int maxRequests;
private final Duration window;
public synchronized boolean tryAcquire(String clientId) {
long now = System.currentTimeMillis();
long windowStart = now - window.toMillis();
Deque<Long> timestamps = requestLog.computeIfAbsent(
clientId, k -> new LinkedList<>());
// Remove old timestamps
while (!timestamps.isEmpty() && timestamps.peekFirst() < windowStart) {
timestamps.pollFirst();
}
if (timestamps.size() < maxRequests) {
timestamps.addLast(now);
return true;
}
return false;
}
}
// Spring Boot with Resilience4j
@RateLimiter(name = "api", fallbackMethod = "rateLimitFallback")
public Response callApi() {
return apiClient.call();
}
public Response rateLimitFallback(RequestNotPermitted e) {
return Response.status(429).body("Too many requests");
}
Distributed Rate Limiting
// DISTRIBUTED CHALLENGES:
// • Multiple instances need shared state
// • Network latency affects accuracy
// • Need atomic operations
// SOLUTION 1: Centralized (Redis)
// All instances check against Redis
// Pros: Accurate
// Cons: Redis is SPOF, latency
// SOLUTION 2: Local + Sync
// Local rate limiter, periodic sync
// Pros: Low latency
// Cons: Less accurate, can exceed limit briefly
// SOLUTION 3: Sticky Sessions
// Route same client to same instance
// Pros: Simple, accurate
// Cons: Uneven load distribution
// Redis-based distributed rate limiter
@Component
public class DistributedRateLimiter {
@Autowired
private StringRedisTemplate redis;
public boolean isAllowed(String clientId, int limit, Duration window) {
String key = "rate:" + clientId;
long now = System.currentTimeMillis();
// Use sorted set with timestamps
redis.opsForZSet().removeRangeByScore(key, 0,
now - window.toMillis());
Long count = redis.opsForZSet().zCard(key);
if (count < limit) {
redis.opsForZSet().add(key, String.valueOf(now), now);
redis.expire(key, window);
return true;
}
return false;
}
}
// API Gateway rate limiting (Kong, nginx)
// Offload to gateway, cleaner architecture
// Kong plugin:
// plugins:
// - name: rate-limiting
// config:
// minute: 100
// policy: redis
Best Practices

Tips & Tricks
