ng;
plan: 'free' | 'pro' | 'enterprise';
region: 'us' | 'eu' | 'ap';
}
declare global {
namespace Express {
interface Request {
tenant: TenantContext;
}
}
}
export function tenantMiddleware(req: Request, res: Response, next: NextFunction) {
const authHeader = req.headers.authorization;
if (!authHeader?.startsWith('Bearer ')) {
return res.status(401).json({ error: 'Missing token' });
}
try {
const token = authHeader.split(' ')[1];
const payload = verifyToken(payload); // JWT verification stub
req.tenant = {
tenantId: payload.tenant_id,
plan: payload.plan,
region: payload.region || 'us'
};
next();
} catch (err) {
res.status(403).json({ error: 'Invalid tenant token' });
}
}
### Step 2: Streaming Usage Metering
Batch metering introduces billing delays and reconciliation overhead. Event-driven metering captures consumption at the source and pushes it to a streaming layer for real-time aggregation.
```typescript
import { Kafka, Producer } from 'kafkajs';
import { Span, trace } from '@opentelemetry/api';
const kafka = new Kafka({ brokers: [process.env.KAFKA_BROKER!] });
const producer: Producer = kafka.producer();
const tracer = trace.getTracer('metering');
export async function emitUsageEvent(
tenantId: string,
metric: string,
quantity: number,
parentSpan?: Span
) {
return tracer.startActiveSpan('emit.usage', async (span) => {
try {
await producer.connect();
await producer.send({
topic: 'usage.events',
messages: [{
value: JSON.stringify({
tenant_id: tenantId,
metric,
quantity,
timestamp: Date.now(),
trace_id: span.spanContext().traceId
})
}]
});
span.setStatus({ code: 1 });
} finally {
span.end();
}
});
}
Step 3: Cost Attribution & Optimization
Cloud costs must be mapped to tenants using OpenTelemetry semantic conventions and infrastructure tagging. This enables automated right-sizing and chargeback models.
import { metrics } from '@opentelemetry/api';
const meter = metrics.getMeter('saaS.cost');
const costCounter = meter.createCounter('cloud.cost.by_tenant', {
description: 'Infrastructure cost attributed per tenant'
});
export function recordTenantCost(tenantId: string, service: string, costUsd: number) {
costCounter.add(costUsd, {
tenant_id: tenantId,
service,
currency: 'USD',
attribution: 'auto'
});
}
Step 4: Automated Resource Governance
Static scaling creates waste. Predictive auto-scaling combined with tenant-tier quotas prevents noisy neighbors and caps marginal cost.
export class TenantQuotaEnforcer {
private limits: Record<string, number> = {
free: 100,
pro: 1000,
enterprise: Infinity
};
async enforce(tenantId: string, plan: string, currentUsage: number): Promise<boolean> {
const limit = this.limits[plan] ?? 0;
if (currentUsage >= limit) {
await this.notifyUpgrade(tenantId, plan);
return false;
}
return true;
}
private async notifyUpgrade(tenantId: string, plan: string) {
// Trigger webhook or message queue for upgrade flow
console.log(`[QUOTA] Tenant ${tenantId} on ${plan} approaching limit. Trigger upgrade.`);
}
}
Architecture Decisions & Rationale
- Row-Level Security + Partitioning over DB-per-tenant: Reduces operational overhead, enables cross-tenant analytics, and cuts database licensing costs by ~40%. Partitioning by
tenant_id maintains query performance.
- Event Streaming over Polling: Kafka/PubSub decouples metering from core transactional load. Guarantees at-least-once delivery for billing accuracy without blocking user requests.
- OpenTelemetry Cost Tagging: Standardizes telemetry across services. Enables direct mapping of CPU, memory, and I/O to tenant IDs, transforming observability into a financial control plane.
- Predictive Scaling over Reactive: Uses historical usage patterns and plan limits to provision resources ahead of demand spikes, reducing cold-start latency and avoiding over-provisioning during low-activity windows.
Pitfall Guide
- Over-Isolating Tenants: Provisioning separate databases or Kubernetes namespaces for every tenant multiplies operational overhead and defeats economies of scale. Use row-level security with partitioning until compliance or data residency mandates strict isolation.
- Ignoring Noisy Neighbor Effects: Without quota enforcement and request throttling, a single high-usage tenant can degrade performance for others. Implement tenant-aware rate limiting at the API gateway and database connection pool level.
- Batch Metering for Usage-Based Pricing: Processing metering nightly creates billing discrepancies, customer disputes, and revenue leakage. Stream events at ingestion time and aggregate in real-time using windowed queries.
- Missing Cost Attribution in Observability: Logging CPU or memory without tenant tags makes it impossible to calculate marginal cost per tenant. Instrument all services with
tenant_id and service labels in OpenTelemetry.
- Hardcoded Pricing Tiers: Static plans break when usage patterns evolve. Build a dynamic metering engine that supports tiered, volume, and usage-based pricing without code deployments.
- Neglecting Data Residency Routing: Global SaaS platforms must route data to compliant regions. Failing to enforce region-aware routing triggers GDPR/CCPA violations and forces costly data migrations later.
- Skipping Graceful Degradation: When metering or billing services degrade, core product functionality should continue. Implement fallback queues and circuit breakers to prevent billing outages from blocking user workflows.
Best Practices from Production:
- Tag every infrastructure resource (EC2, RDS, Lambda, Load Balancer) with
tenant_id and environment.
- Use idempotent event consumers to prevent double-billing during retries.
- Run cost attribution dashboards alongside SLOs. Treat margin per tenant as a first-class operational metric.
- Implement tenant-level feature flags to roll out metering changes safely.
- Audit data access patterns quarterly to prune unused indexes and reduce storage costs.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| <500 tenants, compliance light | Schema-per-tenant + RLS | Simplifies queries, reduces DB count, lowers licensing | -35% infra cost |
| 500-5000 tenants, usage-based pricing | Event streaming + real-time aggregation | Eliminates billing latency, supports dynamic pricing | +12% margin retention |
| >5000 tenants, strict data residency | Region-sharded databases + tenant-aware routing | Ensures compliance, reduces cross-region egress fees | -28% network cost |
| Enterprise SLA required | Dedicated compute pool + predictive scaling | Guarantees performance, avoids noisy neighbor impact | +18% infra cost, +25% NRR |
Configuration Template
// config/metering.ts
export const meteringConfig = {
kafka: {
brokers: [process.env.KAFKA_BROKER || 'localhost:9092'],
topic: 'usage.events',
groupId: 'saaS-metering-consumer',
retry: { retries: 3, initialRetryTime: 1000 }
},
quotas: {
free: { apiCalls: 1000, storageGB: 5, computeCU: 0.5 },
pro: { apiCalls: 10000, storageGB: 50, computeCU: 2.0 },
enterprise: { apiCalls: Infinity, storageGB: Infinity, computeCU: Infinity }
},
otel: {
serviceName: 'saaS-core',
exportInterval: 5000,
costLabels: ['tenant_id', 'service', 'region', 'plan']
},
degradation: {
billingTimeout: 3000,
fallbackQueue: 'usage.fallback',
circuitBreakerThreshold: 0.5
}
};
Quick Start Guide
- Initialize Tenant Middleware: Add the
tenantMiddleware to your Express/Fastify router. Verify JWT payload contains tenant_id, plan, and region.
- Deploy Streaming Consumer: Spin up a Kafka consumer group subscribed to
usage.events. Implement idempotent upsert logic to aggregate daily/weekly usage.
- Instrument Cost Labels: Add OpenTelemetry meters to your core services. Ensure every span and metric includes
tenant_id and service attributes.
- Configure Quota Enforcement: Load
meteringConfig into your API gateway. Apply sliding window rate limits based on plan thresholds.
- Validate End-to-End: Simulate a tenant request, verify event emission, check consumer aggregation, and confirm cost attribution appears in your observability dashboard. Total setup: <5 minutes.