Unnamed Skill
API rate limiting and quota management implementation. Use when implementing request throttling, API quotas, backpressure handling, or protection against abuse. Keywords: rate limiting, throttling, token bucket, sliding window, leaky bucket, quota, Redis, backpressure, API limits, DDoS protection.
$ Installer
git clone https://github.com/cosmix/claude-code-setup /tmp/claude-code-setup && cp -r /tmp/claude-code-setup/skills/rate-limiting ~/.claude/skills/claude-code-setup// tip: Run this command in your terminal to install the skill
name: rate-limiting description: API rate limiting and quota management implementation. Use when implementing request throttling, API quotas, backpressure handling, or protection against abuse. Keywords: rate limiting, throttling, token bucket, sliding window, leaky bucket, quota, Redis, backpressure, API limits, DDoS protection.
Rate Limiting
Overview
Rate limiting is a technique to control the rate of requests a client can make to an API. It protects services from abuse, ensures fair usage, and maintains system stability. This skill covers algorithms, implementation patterns, and best practices for distributed rate limiting.
Key Concepts
Rate Limiting Algorithms
Token Bucket Algorithm:
The token bucket allows bursts while maintaining an average rate.
class TokenBucket {
private tokens: number;
private lastRefill: number;
constructor(
private capacity: number, // Maximum tokens
private refillRate: number, // Tokens per second
) {
this.tokens = capacity;
this.lastRefill = Date.now();
}
private refill(): void {
const now = Date.now();
const elapsed = (now - this.lastRefill) / 1000;
const tokensToAdd = elapsed * this.refillRate;
this.tokens = Math.min(this.capacity, this.tokens + tokensToAdd);
this.lastRefill = now;
}
consume(tokens: number = 1): boolean {
this.refill();
if (this.tokens >= tokens) {
this.tokens -= tokens;
return true;
}
return false;
}
getState(): { tokens: number; capacity: number } {
this.refill();
return { tokens: this.tokens, capacity: this.capacity };
}
}
// Usage: 100 requests/minute with burst of 10
const bucket = new TokenBucket(10, 100 / 60);
Sliding Window Log Algorithm:
Precise rate limiting by tracking individual request timestamps.
class SlidingWindowLog {
private requests: number[] = [];
constructor(
private windowMs: number, // Window size in milliseconds
private maxRequests: number, // Max requests per window
) {}
isAllowed(): boolean {
const now = Date.now();
const windowStart = now - this.windowMs;
// Remove expired entries
this.requests = this.requests.filter((ts) => ts > windowStart);
if (this.requests.length < this.maxRequests) {
this.requests.push(now);
return true;
}
return false;
}
getRemainingRequests(): number {
const now = Date.now();
const windowStart = now - this.windowMs;
this.requests = this.requests.filter((ts) => ts > windowStart);
return Math.max(0, this.maxRequests - this.requests.length);
}
getResetTime(): number {
if (this.requests.length === 0) return 0;
return this.requests[0] + this.windowMs;
}
}
Sliding Window Counter Algorithm:
Memory-efficient approximation using weighted counters.
class SlidingWindowCounter {
private previousCount: number = 0;
private currentCount: number = 0;
private windowStart: number;
constructor(
private windowMs: number,
private maxRequests: number,
) {
this.windowStart = Date.now();
}
isAllowed(): boolean {
const now = Date.now();
const elapsed = now - this.windowStart;
// Check if we've moved to a new window
if (elapsed >= this.windowMs) {
const windowsPassed = Math.floor(elapsed / this.windowMs);
if (windowsPassed === 1) {
this.previousCount = this.currentCount;
} else {
this.previousCount = 0;
}
this.currentCount = 0;
this.windowStart = now - (elapsed % this.windowMs);
}
// Calculate weighted count
const windowProgress = (now - this.windowStart) / this.windowMs;
const weightedCount =
this.previousCount * (1 - windowProgress) + this.currentCount;
if (weightedCount < this.maxRequests) {
this.currentCount++;
return true;
}
return false;
}
}
Leaky Bucket Algorithm:
Smooths out bursts by processing requests at a constant rate.
class LeakyBucket {
private queue: Array<() => void> = [];
private processing: boolean = false;
constructor(
private capacity: number, // Queue size
private leakRate: number, // Requests processed per second
) {}
async add(request: () => Promise<void>): Promise<boolean> {
if (this.queue.length >= this.capacity) {
return false; // Queue full, reject
}
return new Promise((resolve) => {
this.queue.push(async () => {
await request();
resolve(true);
});
this.processQueue();
});
}
private async processQueue(): Promise<void> {
if (this.processing || this.queue.length === 0) return;
this.processing = true;
while (this.queue.length > 0) {
const request = this.queue.shift();
if (request) {
await request();
await this.delay(1000 / this.leakRate);
}
}
this.processing = false;
}
private delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
}
API Quota Management
Tiered Quota System:
interface QuotaTier {
name: string;
limits: {
requestsPerMinute: number;
requestsPerDay: number;
burstSize: number;
};
features: string[];
}
const quotaTiers: Record<string, QuotaTier> = {
free: {
name: "Free",
limits: {
requestsPerMinute: 10,
requestsPerDay: 1000,
burstSize: 5,
},
features: ["basic-api"],
},
pro: {
name: "Professional",
limits: {
requestsPerMinute: 100,
requestsPerDay: 50000,
burstSize: 20,
},
features: ["basic-api", "advanced-api", "webhooks"],
},
enterprise: {
name: "Enterprise",
limits: {
requestsPerMinute: 1000,
requestsPerDay: 1000000,
burstSize: 100,
},
features: ["basic-api", "advanced-api", "webhooks", "bulk-api"],
},
};
class QuotaManager {
constructor(private redis: Redis) {}
async checkQuota(
userId: string,
tier: string,
): Promise<{ allowed: boolean; remaining: number; resetAt: number }> {
const config = quotaTiers[tier];
if (!config) throw new Error(`Unknown tier: ${tier}`);
const minuteKey = `quota:${userId}:minute`;
const dayKey = `quota:${userId}:day`;
const [minuteCount, dayCount] = await Promise.all([
this.redis.get(minuteKey),
this.redis.get(dayKey),
]);
const currentMinute = parseInt(minuteCount || "0");
const currentDay = parseInt(dayCount || "0");
if (currentMinute >= config.limits.requestsPerMinute) {
const ttl = await this.redis.ttl(minuteKey);
return { allowed: false, remaining: 0, resetAt: Date.now() + ttl * 1000 };
}
if (currentDay >= config.limits.requestsPerDay) {
const ttl = await this.redis.ttl(dayKey);
return { allowed: false, remaining: 0, resetAt: Date.now() + ttl * 1000 };
}
// Increment counters
const pipeline = this.redis.pipeline();
pipeline.incr(minuteKey);
pipeline.expire(minuteKey, 60);
pipeline.incr(dayKey);
pipeline.expire(dayKey, 86400);
await pipeline.exec();
return {
allowed: true,
remaining: config.limits.requestsPerMinute - currentMinute - 1,
resetAt: Date.now() + 60000,
};
}
}
Per-User vs Per-IP Limiting
Combined Strategy:
interface RateLimitConfig {
authenticated: {
requestsPerMinute: number;
requestsPerHour: number;
};
anonymous: {
requestsPerMinute: number;
requestsPerHour: number;
};
ipBased: {
requestsPerMinute: number;
maxConnectionsPerIP: number;
};
}
class HybridRateLimiter {
constructor(
private redis: Redis,
private config: RateLimitConfig,
) {}
async check(
ip: string,
userId?: string,
): Promise<{ allowed: boolean; retryAfter?: number }> {
// Always check IP-based limits first (DDoS protection)
const ipResult = await this.checkIPLimit(ip);
if (!ipResult.allowed) {
return ipResult;
}
// Then check user or anonymous limits
if (userId) {
return this.checkUserLimit(userId);
} else {
return this.checkAnonymousLimit(ip);
}
}
private async checkIPLimit(
ip: string,
): Promise<{ allowed: boolean; retryAfter?: number }> {
const key = `ratelimit:ip:${ip}`;
const count = await this.redis.incr(key);
if (count === 1) {
await this.redis.expire(key, 60);
}
if (count > this.config.ipBased.requestsPerMinute) {
const ttl = await this.redis.ttl(key);
return { allowed: false, retryAfter: ttl };
}
return { allowed: true };
}
private async checkUserLimit(
userId: string,
): Promise<{ allowed: boolean; retryAfter?: number }> {
const minuteKey = `ratelimit:user:${userId}:minute`;
const hourKey = `ratelimit:user:${userId}:hour`;
const [minuteCount, hourCount] = await Promise.all([
this.incrementWithExpiry(minuteKey, 60),
this.incrementWithExpiry(hourKey, 3600),
]);
if (minuteCount > this.config.authenticated.requestsPerMinute) {
const ttl = await this.redis.ttl(minuteKey);
return { allowed: false, retryAfter: ttl };
}
if (hourCount > this.config.authenticated.requestsPerHour) {
const ttl = await this.redis.ttl(hourKey);
return { allowed: false, retryAfter: ttl };
}
return { allowed: true };
}
private async incrementWithExpiry(
key: string,
expiry: number,
): Promise<number> {
const count = await this.redis.incr(key);
if (count === 1) {
await this.redis.expire(key, expiry);
}
return count;
}
}
Distributed Rate Limiting (Redis)
Redis-Based Sliding Window:
import Redis from "ioredis";
class DistributedRateLimiter {
private redis: Redis;
constructor(redisUrl: string) {
this.redis = new Redis(redisUrl);
}
async isAllowed(
key: string,
limit: number,
windowSeconds: number,
): Promise<{ allowed: boolean; remaining: number; resetAt: number }> {
const now = Date.now();
const windowStart = now - windowSeconds * 1000;
const redisKey = `ratelimit:${key}`;
// Use Lua script for atomic operation
const luaScript = `
local key = KEYS[1]
local now = tonumber(ARGV[1])
local window_start = tonumber(ARGV[2])
local limit = tonumber(ARGV[3])
local window_seconds = tonumber(ARGV[4])
-- Remove old entries
redis.call('ZREMRANGEBYSCORE', key, '-inf', window_start)
-- Count current entries
local count = redis.call('ZCARD', key)
if count < limit then
-- Add new entry
redis.call('ZADD', key, now, now .. '-' .. math.random())
redis.call('EXPIRE', key, window_seconds)
return {1, limit - count - 1}
else
-- Get oldest entry for reset time
local oldest = redis.call('ZRANGE', key, 0, 0, 'WITHSCORES')
local reset_at = oldest[2] and (oldest[2] + window_seconds * 1000) or (now + window_seconds * 1000)
return {0, 0, reset_at}
end
`;
const result = (await this.redis.eval(
luaScript,
1,
redisKey,
now.toString(),
windowStart.toString(),
limit.toString(),
windowSeconds.toString(),
)) as number[];
return {
allowed: result[0] === 1,
remaining: result[1],
resetAt: result[2] || now + windowSeconds * 1000,
};
}
}
Redis Cluster Support:
class ClusterRateLimiter {
private cluster: Redis.Cluster;
constructor(nodes: { host: string; port: number }[]) {
this.cluster = new Redis.Cluster(nodes, {
redisOptions: {
password: process.env.REDIS_PASSWORD,
},
scaleReads: "slave",
});
}
async checkLimit(
identifier: string,
limit: number,
windowMs: number,
): Promise<boolean> {
// Use hash tags to ensure all keys for a user go to same slot
const key = `{ratelimit:${identifier}}:counter`;
const count = await this.cluster.incr(key);
if (count === 1) {
await this.cluster.pexpire(key, windowMs);
}
return count <= limit;
}
}
Backpressure Patterns
Circuit Breaker with Rate Limiting:
enum CircuitState {
CLOSED = "CLOSED",
OPEN = "OPEN",
HALF_OPEN = "HALF_OPEN",
}
class CircuitBreaker {
private state: CircuitState = CircuitState.CLOSED;
private failures: number = 0;
private lastFailure: number = 0;
private successCount: number = 0;
constructor(
private failureThreshold: number = 5,
private resetTimeout: number = 30000,
private halfOpenSuccessThreshold: number = 3,
) {}
async execute<T>(fn: () => Promise<T>): Promise<T> {
if (this.state === CircuitState.OPEN) {
if (Date.now() - this.lastFailure >= this.resetTimeout) {
this.state = CircuitState.HALF_OPEN;
this.successCount = 0;
} else {
throw new Error("Circuit breaker is OPEN");
}
}
try {
const result = await fn();
this.onSuccess();
return result;
} catch (error) {
this.onFailure();
throw error;
}
}
private onSuccess(): void {
if (this.state === CircuitState.HALF_OPEN) {
this.successCount++;
if (this.successCount >= this.halfOpenSuccessThreshold) {
this.state = CircuitState.CLOSED;
this.failures = 0;
}
} else {
this.failures = 0;
}
}
private onFailure(): void {
this.failures++;
this.lastFailure = Date.now();
if (this.failures >= this.failureThreshold) {
this.state = CircuitState.OPEN;
}
}
getState(): CircuitState {
return this.state;
}
}
Adaptive Rate Limiting:
class AdaptiveRateLimiter {
private currentLimit: number;
private successRate: number = 1;
private window: { success: boolean; timestamp: number }[] = [];
constructor(
private minLimit: number,
private maxLimit: number,
private targetSuccessRate: number = 0.95,
private windowSize: number = 100,
) {
this.currentLimit = maxLimit;
}
recordResult(success: boolean): void {
const now = Date.now();
this.window.push({ success, timestamp: now });
// Keep window size manageable
if (this.window.length > this.windowSize) {
this.window.shift();
}
this.adjustLimit();
}
private adjustLimit(): void {
if (this.window.length < 10) return;
const successes = this.window.filter((r) => r.success).length;
this.successRate = successes / this.window.length;
if (this.successRate < this.targetSuccessRate) {
// Reduce limit when success rate drops
this.currentLimit = Math.max(
this.minLimit,
Math.floor(this.currentLimit * 0.9),
);
} else if (this.successRate > this.targetSuccessRate + 0.02) {
// Slowly increase limit when stable
this.currentLimit = Math.min(
this.maxLimit,
Math.ceil(this.currentLimit * 1.05),
);
}
}
getCurrentLimit(): number {
return this.currentLimit;
}
getSuccessRate(): number {
return this.successRate;
}
}
Rate Limit Headers and Client Communication
Standard Headers:
interface RateLimitInfo {
limit: number;
remaining: number;
reset: number; // Unix timestamp
retryAfter?: number; // Seconds
}
function setRateLimitHeaders(res: Response, info: RateLimitInfo): void {
// Standard headers
res.setHeader("X-RateLimit-Limit", info.limit.toString());
res.setHeader("X-RateLimit-Remaining", info.remaining.toString());
res.setHeader("X-RateLimit-Reset", info.reset.toString());
// Draft IETF standard headers
res.setHeader("RateLimit-Limit", info.limit.toString());
res.setHeader("RateLimit-Remaining", info.remaining.toString());
res.setHeader("RateLimit-Reset", info.reset.toString());
if (info.retryAfter !== undefined) {
res.setHeader("Retry-After", info.retryAfter.toString());
}
}
// Response body for 429 errors
interface RateLimitErrorResponse {
error: {
code: "RATE_LIMIT_EXCEEDED";
message: string;
retryAfter: number;
limit: number;
resetAt: string;
};
}
function createRateLimitError(info: RateLimitInfo): RateLimitErrorResponse {
return {
error: {
code: "RATE_LIMIT_EXCEEDED",
message: `Rate limit exceeded. Please retry after ${info.retryAfter} seconds.`,
retryAfter: info.retryAfter!,
limit: info.limit,
resetAt: new Date(info.reset * 1000).toISOString(),
},
};
}
Express Middleware:
import { Request, Response, NextFunction } from "express";
interface RateLimiterOptions {
windowMs: number;
max: number;
keyGenerator?: (req: Request) => string;
skip?: (req: Request) => boolean;
handler?: (req: Request, res: Response) => void;
}
function createRateLimiter(options: RateLimiterOptions) {
const limiter = new DistributedRateLimiter(process.env.REDIS_URL!);
return async (req: Request, res: Response, next: NextFunction) => {
// Skip if configured
if (options.skip?.(req)) {
return next();
}
const key = options.keyGenerator?.(req) || req.ip;
const result = await limiter.isAllowed(
key,
options.max,
options.windowMs / 1000,
);
const info: RateLimitInfo = {
limit: options.max,
remaining: result.remaining,
reset: Math.ceil(result.resetAt / 1000),
};
setRateLimitHeaders(res, info);
if (!result.allowed) {
info.retryAfter = Math.ceil((result.resetAt - Date.now()) / 1000);
if (options.handler) {
return options.handler(req, res);
}
return res.status(429).json(createRateLimitError(info));
}
next();
};
}
// Usage
app.use(
"/api/",
createRateLimiter({
windowMs: 60 * 1000, // 1 minute
max: 100,
keyGenerator: (req) => req.user?.id || req.ip,
skip: (req) => req.path === "/api/health",
}),
);
Graceful Degradation
Priority-Based Degradation:
enum RequestPriority {
CRITICAL = 1, // Health checks, auth
HIGH = 2, // User-initiated actions
NORMAL = 3, // Regular API calls
LOW = 4, // Background tasks
BATCH = 5, // Bulk operations
}
class PriorityRateLimiter {
private limiters: Map<RequestPriority, TokenBucket> = new Map();
private systemLoad: number = 0;
constructor() {
// Different limits per priority
this.limiters.set(RequestPriority.CRITICAL, new TokenBucket(1000, 100));
this.limiters.set(RequestPriority.HIGH, new TokenBucket(500, 50));
this.limiters.set(RequestPriority.NORMAL, new TokenBucket(200, 20));
this.limiters.set(RequestPriority.LOW, new TokenBucket(50, 5));
this.limiters.set(RequestPriority.BATCH, new TokenBucket(10, 1));
}
setSystemLoad(load: number): void {
this.systemLoad = Math.max(0, Math.min(1, load));
}
canProcess(priority: RequestPriority): boolean {
// Under high load, reject low-priority requests
if (this.systemLoad > 0.8 && priority > RequestPriority.HIGH) {
return false;
}
if (this.systemLoad > 0.9 && priority > RequestPriority.CRITICAL) {
return false;
}
const limiter = this.limiters.get(priority);
return limiter?.consume() ?? false;
}
}
Feature Flag Integration:
class DegradationManager {
private degradedFeatures: Set<string> = new Set();
async checkAndDegrade(
feature: string,
fallback: () => Promise<any>,
): Promise<any> {
if (this.degradedFeatures.has(feature)) {
return fallback();
}
return null; // Continue with normal execution
}
enableDegradedMode(feature: string): void {
this.degradedFeatures.add(feature);
console.log(`Degraded mode enabled for: ${feature}`);
}
disableDegradedMode(feature: string): void {
this.degradedFeatures.delete(feature);
console.log(`Degraded mode disabled for: ${feature}`);
}
isDegraded(feature: string): boolean {
return this.degradedFeatures.has(feature);
}
}
Best Practices
Algorithm Selection
- Token Bucket: Best for allowing bursts while maintaining average rate
- Sliding Window: Best for precise rate limiting without bursts
- Leaky Bucket: Best for smoothing out traffic to downstream services
Redis Configuration
- Use Redis Cluster for high availability
- Set appropriate memory limits and eviction policies
- Use Lua scripts for atomic operations
- Monitor Redis latency and connection pool
Client Experience
- Always return rate limit headers
- Provide clear error messages with retry timing
- Consider implementing client-side rate limiting
- Document rate limits in API documentation
Monitoring
- Track rate limit hits by endpoint and user
- Alert on sudden spikes in rate limiting
- Monitor for distributed attack patterns
- Log rate limit events for debugging
Security
- Implement IP-based limits as first line of defense
- Use authenticated rate limits for legitimate users
- Consider geographic rate limiting for region-specific abuse
- Implement CAPTCHA for suspicious patterns
Examples
Complete Express Rate Limiting Setup
import express from "express";
import Redis from "ioredis";
const app = express();
const redis = new Redis(process.env.REDIS_URL);
// Rate limiter factory
function rateLimiter(config: {
points: number;
duration: number;
keyPrefix: string;
}) {
return async (
req: express.Request,
res: express.Response,
next: express.NextFunction,
) => {
const key = `${config.keyPrefix}:${req.user?.id || req.ip}`;
try {
const current = await redis.incr(key);
if (current === 1) {
await redis.expire(key, config.duration);
}
const ttl = await redis.ttl(key);
const remaining = Math.max(0, config.points - current);
res.set({
"X-RateLimit-Limit": config.points.toString(),
"X-RateLimit-Remaining": remaining.toString(),
"X-RateLimit-Reset": (Math.floor(Date.now() / 1000) + ttl).toString(),
});
if (current > config.points) {
res.set("Retry-After", ttl.toString());
return res.status(429).json({
error: "Too Many Requests",
retryAfter: ttl,
});
}
next();
} catch (error) {
// Fail open if Redis is unavailable
console.error("Rate limiter error:", error);
next();
}
};
}
// Apply different limits to different endpoints
app.use(
"/api/auth",
rateLimiter({ points: 5, duration: 60, keyPrefix: "auth" }),
);
app.use(
"/api/search",
rateLimiter({ points: 30, duration: 60, keyPrefix: "search" }),
);
app.use("/api", rateLimiter({ points: 100, duration: 60, keyPrefix: "api" }));
app.listen(3000);
Client-Side Rate Limit Handling
class APIClient {
private retryAfter: number = 0;
async request<T>(url: string, options?: RequestInit): Promise<T> {
// Check if we're in a rate-limited state
if (this.retryAfter > Date.now()) {
const waitTime = this.retryAfter - Date.now();
throw new Error(
`Rate limited. Retry after ${Math.ceil(waitTime / 1000)}s`,
);
}
const response = await fetch(url, options);
// Update rate limit state from headers
const remaining = response.headers.get("X-RateLimit-Remaining");
const reset = response.headers.get("X-RateLimit-Reset");
if (response.status === 429) {
const retryAfter = response.headers.get("Retry-After");
this.retryAfter = Date.now() + parseInt(retryAfter || "60") * 1000;
const body = await response.json();
throw new RateLimitError(body.error, parseInt(retryAfter || "60"));
}
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
return response.json();
}
async requestWithRetry<T>(
url: string,
options?: RequestInit,
maxRetries: number = 3,
): Promise<T> {
let lastError: Error | null = null;
for (let i = 0; i < maxRetries; i++) {
try {
return await this.request<T>(url, options);
} catch (error) {
lastError = error as Error;
if (error instanceof RateLimitError) {
// Wait for the retry-after period
await this.delay(error.retryAfter * 1000);
} else {
// Exponential backoff for other errors
await this.delay(Math.pow(2, i) * 1000);
}
}
}
throw lastError;
}
private delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
}
class RateLimitError extends Error {
constructor(
message: string,
public retryAfter: number,
) {
super(message);
this.name = "RateLimitError";
}
}
Repository
