Rate Limiting¶

Definition¶

Rate Limiting

Algorithms¶

Rate Limiting Algorithms

Implementation¶

// TOKEN BUCKET (Redis-based for distributed)
public class TokenBucketRateLimiter {
    private final RedisTemplate<String, String> redis;
    private final int capacity;      // Max tokens
    private final int refillRate;    // Tokens per second

    public boolean tryConsume(String key) {
        String bucketKey = "rate_limit:" + key;

        // Lua script for atomic operation
        String script = """
            local tokens = tonumber(redis.call('get', KEYS[1]) or ARGV[1])
            local lastRefill = tonumber(redis.call('get', KEYS[2]) or ARGV[2])
            local now = tonumber(ARGV[2])
            local rate = tonumber(ARGV[3])
            local capacity = tonumber(ARGV[1])

            -- Refill tokens
            local elapsed = now - lastRefill
            tokens = math.min(capacity, tokens + elapsed * rate)

            if tokens >= 1 then
                tokens = tokens - 1
                redis.call('set', KEYS[1], tokens)
                redis.call('set', KEYS[2], now)
                return 1
            else
                return 0
            end
            """;

        Long result = redis.execute(/* script */);
        return result == 1;
    }
}

// SLIDING WINDOW (simpler implementation)
public class SlidingWindowRateLimiter {
    private final Map<String, Deque<Long>> requestLog = new ConcurrentHashMap<>();
    private final int maxRequests;
    private final Duration window;

    public synchronized boolean tryAcquire(String clientId) {
        long now = System.currentTimeMillis();
        long windowStart = now - window.toMillis();

        Deque<Long> timestamps = requestLog.computeIfAbsent(
            clientId, k -> new LinkedList<>());

        // Remove old timestamps
        while (!timestamps.isEmpty() && timestamps.peekFirst() < windowStart) {
            timestamps.pollFirst();
        }

        if (timestamps.size() < maxRequests) {
            timestamps.addLast(now);
            return true;
        }
        return false;
    }
}

// Spring Boot with Resilience4j
@RateLimiter(name = "api", fallbackMethod = "rateLimitFallback")
public Response callApi() {
    return apiClient.call();
}

public Response rateLimitFallback(RequestNotPermitted e) {
    return Response.status(429).body("Too many requests");
}

Distributed Rate Limiting¶

// DISTRIBUTED CHALLENGES:
// • Multiple instances need shared state
// • Network latency affects accuracy
// • Need atomic operations

// SOLUTION 1: Centralized (Redis)
// All instances check against Redis
// Pros: Accurate
// Cons: Redis is SPOF, latency

// SOLUTION 2: Local + Sync
// Local rate limiter, periodic sync
// Pros: Low latency
// Cons: Less accurate, can exceed limit briefly

// SOLUTION 3: Sticky Sessions
// Route same client to same instance
// Pros: Simple, accurate
// Cons: Uneven load distribution

// Redis-based distributed rate limiter
@Component
public class DistributedRateLimiter {

    @Autowired
    private StringRedisTemplate redis;

    public boolean isAllowed(String clientId, int limit, Duration window) {
        String key = "rate:" + clientId;
        long now = System.currentTimeMillis();

        // Use sorted set with timestamps
        redis.opsForZSet().removeRangeByScore(key, 0,
            now - window.toMillis());

        Long count = redis.opsForZSet().zCard(key);

        if (count < limit) {
            redis.opsForZSet().add(key, String.valueOf(now), now);
            redis.expire(key, window);
            return true;
        }

        return false;
    }
}

// API Gateway rate limiting (Kong, nginx)
// Offload to gateway, cleaner architecture
// Kong plugin:
// plugins:
//   - name: rate-limiting
//     config:
//       minute: 100
//       policy: redis

Best Practices¶

Rate Limiting Best Practices

Tips & Tricks¶

Tips and Tricks