Counter } from 'prom-client';
const httpRequestDuration = new Histogram({
name: 'http_request_duration_seconds',
help: 'Duration of HTTP requests in seconds',
labelNames: ['method', 'route', 'status_code'],
buckets: [0.01, 0.05, 0.1, 0.5, 1, 2, 5],
});
const httpRequestTotal = new Counter({
name: 'http_requests_total',
help: 'Total number of HTTP requests',
labelNames: ['method', 'route', 'status_code'],
});
// Middleware to record metrics
export const metricsMiddleware = (req: Request, res: Response, next: NextFunction) => {
const start = process.hrtime();
res.on('finish', () => {
const diff = process.hrtime(start);
const duration = diff[0] + diff[1] / 1e9;
httpRequestDuration
.labels(req.method, req.route?.path || req.path, res.statusCode.toString())
.observe(duration);
httpRequestTotal
.labels(req.method, req.route?.path || req.path, res.statusCode.toString())
.inc();
});
next();
};
#### 2. Solving the N+1 Problem with Batching
The N+1 query pattern is the most common performance killer in ORMs and GraphQL resolvers. Fetching related resources individually per item causes database connection exhaustion and latency spikes. The solution is batching via the DataLoader pattern.
```typescript
import DataLoader from 'dataloader';
import { Pool } from 'pg';
// DataLoader batches requests and caches results within a single request lifecycle
const createBatchLoaders = (dbPool: Pool) => ({
users: new DataLoader(async (userIds: readonly string[]) => {
// Single query for all requested IDs
const result = await dbPool.query(
'SELECT id, name, email FROM users WHERE id = ANY($1)',
[userIds]
);
// Map results back to the order of requested IDs
const userMap = new Map(result.rows.map(user => [user.id, user]));
return userIds.map(id => userMap.get(id) || null);
}, { maxBatchSize: 100, cacheKeyFn: (key: string) => key }),
});
// Usage in resolver/controller
const getUser = async (userId: string, loaders: ReturnType<typeof createBatchLoaders>) => {
return loaders.users.load(userId);
};
3. Payload Optimization: Compression and Serialization
Network transfer time is a significant component of latency. Enabling compression and optimizing serialization formats reduces payload size, lowering bandwidth usage and parsing time.
- Compression: Use Brotli for text-based payloads. It offers better compression ratios than Gzip with comparable decompression speed.
- Serialization: For internal microservice communication, consider Protocol Buffers or MessagePack over JSON to reduce serialization overhead and payload size.
import compression from 'compression';
import { Request, Response, NextFunction } from 'express';
// Configure compression with Brotli priority
export const compressionMiddleware = compression({
level: 6, // Balanced speed/compression ratio
threshold: 1024, // Only compress responses > 1KB
filter: (req: Request, res: Response) => {
// Skip compression for specific content types or clients
if (req.headers['x-no-compression']) {
return false;
}
return compression.filter(req, res);
},
brotli: {
params: {
[require('zlib').constants.BROTLI_PARAM_QUALITY]: 6,
},
},
});
4. Caching Strategies
Implement a multi-tier caching strategy to reduce compute load and database hits.
- Edge/CDN Caching: For static or semi-static resources, cache at the edge.
- Application Cache: Use in-memory caching (e.g., Redis) for frequently accessed data with short TTLs.
- Cache Invalidation: Implement cache-aside or write-through patterns. Avoid cache stampedes by using probabilistic early expiration or mutex locks.
import { createClient } from 'redis';
const redisClient = createClient({ url: 'redis://localhost:6379' });
await redisClient.connect();
// Cache-aside pattern with probabilistic jitter to prevent stampedes
export const getCachedData = async (key: string, fetchFn: () => Promise<any>, ttl: number) => {
const cached = await redisClient.get(key);
if (cached) {
return JSON.parse(cached);
}
const data = await fetchFn();
// Add jitter to TTL to prevent cache stampede
const jitter = Math.floor(Math.random() * (ttl * 0.1));
const effectiveTtl = ttl - jitter;
await redisClient.set(key, JSON.stringify(data), { EX: effectiveTtl });
return data;
};
5. Async Offloading
For operations that do not require immediate response, offload processing to a message queue. This decouples the API response time from heavy computation or third-party integrations.
import { RabbitMQ } from './mq-client';
// Immediate response with async processing
export const processOrder = async (req: Request, res: Response) => {
const { orderId, payload } = req.body;
// Acknowledge receipt immediately
res.status(202).json({ status: 'accepted', orderId });
// Offload heavy processing
await RabbitMQ.publish('order-processing', {
orderId,
payload,
timestamp: Date.now(),
});
};
Pitfall Guide
- Optimizing the Wrong Layer: Developers often optimize database queries while ignoring network latency or serialization overhead. Always profile the full request lifecycle. Use APM tools to identify the true bottleneck before applying fixes.
- Ignoring P99 Latency: Average latency masks tail latency issues. A system with 50ms average latency can still have 1% of requests taking 2000ms, which may trigger timeouts in downstream services. Monitor and alert on P99/P999 metrics.
- Cache Stampedes: When a popular cache key expires, multiple concurrent requests may trigger simultaneous database fetches, causing a thundering herd. Mitigate this with mutex locks, probabilistic early expiration, or background refresh patterns.
- Over-fetching and Under-fetching: REST APIs often return excessive data or require multiple round-trips. Implement field selection (via GraphQL or REST projections) to return only requested fields. Use DataLoader or batch endpoints to reduce round-trips.
- Blocking the Event Loop: In Node.js environments, CPU-intensive operations or synchronous I/O block the event loop, degrading throughput for all concurrent requests. Offload CPU-heavy tasks to worker threads or separate services.
- Missing Connection Pooling: Failing to configure database connection pools leads to connection exhaustion under load. Ensure pool sizes are tuned based on concurrency and query duration. Use connection pooling middleware and monitor pool utilization.
- Lack of Backpressure: APIs that accept requests faster than they can process them will eventually crash. Implement rate limiting, queueing, and backpressure mechanisms to handle traffic bursts gracefully.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| High Read, Low Write | CDN + Read Replicas | Offloads traffic from primary DB; reduces latency globally. | Low infra cost; high cache hit ratio. |
| Complex Queries, High Latency | GraphQL / Field Selection | Reduces payload size and eliminates over-fetching. | Dev cost; lower bandwidth and compute. |
| Bursty Traffic | Async Queue + Rate Limit | Smooths traffic spikes; prevents resource exhaustion. | Queue service cost; improved stability. |
| Mobile Clients | Protocol Buffers / Brotli | Minimizes bandwidth usage; faster parsing on device. | Higher CPU for compression; lower data costs. |
| Real-time Updates | WebSockets / Server-Sent Events | Eliminates polling overhead; reduces latency. | Connection management overhead. |
Configuration Template
// express-server.ts
import express from 'express';
import { compressionMiddleware } from './middleware/compression';
import { metricsMiddleware } from './middleware/metrics';
import { rateLimit } from 'express-rate-limit';
import { createBatchLoaders } from './loaders';
import { Pool } from 'pg';
const app = express();
const dbPool = new Pool({
connectionString: process.env.DATABASE_URL,
max: 20, // Tuned based on load test
idleTimeoutMillis: 30000,
});
// Global middleware
app.use(metricsMiddleware);
app.use(compressionMiddleware);
// Rate limiting
const limiter = rateLimit({
windowMs: 15 * 60 * 1000, // 15 minutes
max: 100, // Limit each IP to 100 requests per windowMs
standardHeaders: true,
legacyHeaders: false,
});
app.use('/api/', limiter);
// Request-scoped loaders
app.use((req, res, next) => {
req.loaders = createBatchLoaders(dbPool);
next();
});
// Routes
app.get('/api/users/:id', async (req, res) => {
const user = await req.loaders.users.load(req.params.id);
res.json(user);
});
// Error handling
app.use((err: Error, req: Request, res: Response, next: NextFunction) => {
console.error(err.stack);
res.status(500).json({ error: 'Internal Server Error' });
});
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
console.log(`Server running on port ${PORT}`);
});
Quick Start Guide
- Initialize Project: Create a new TypeScript project with
express, compression, dataloader, and prom-client.
- Add Metrics: Implement the
metricsMiddleware to capture P99 latency and request counts. Export metrics for Prometheus scraping.
- Enable Compression: Add
compressionMiddleware with Brotli configuration to reduce payload sizes.
- Implement Batching: Create DataLoader instances for your primary data entities to eliminate N+1 queries. Integrate into resolvers/controllers.
- Load Test: Run a load test using
k6 or Artillery to validate P99 latency improvements and throughput gains. Monitor metrics and adjust configurations as needed.