t { Pool, PoolClient } from 'pg';
import { context, trace } from '@opentelemetry/api';
export class InstrumentedPool extends Pool {
private queryDuration: any;
private queryErrors: any;
private poolWaitTime: any;
constructor(meterProvider: MeterProvider, config: any) {
super(config);
const meter = meterProvider.getMeter('database-metrics');
this.queryDuration = meter.createHistogram('db.client.query.duration', {
description: 'Duration of database queries in milliseconds',
unit: 'ms',
});
this.queryErrors = meter.createCounter('db.client.query.errors', {
description: 'Number of failed database queries',
});
this.poolWaitTime = meter.createHistogram('db.client.pool.wait_time', {
description: 'Time clients wait for a connection from the pool',
unit: 'ms',
});
}
async connect(): Promise<PoolClient> {
const startTime = Date.now();
const client = await super.connect();
const waitTime = Date.now() - startTime;
this.poolWaitTime.record(waitTime, {
'db.pool.name': this.options.database || 'default'
});
// Instrument query execution
const originalQuery = client.query.bind(client);
client.query = async (text: string, values?: any[]) => {
const queryStart = Date.now();
const span = trace.getSpan(context.active())?.addEvent('db.query');
try {
const result = await originalQuery(text, values);
const duration = Date.now() - queryStart;
this.queryDuration.record(duration, {
'db.system': 'postgresql',
'db.operation': this.extractOperation(text),
'db.statement': this.sanitize(text), // Avoid high cardinality
});
return result;
} catch (error) {
this.queryErrors.add(1, {
'db.system': 'postgresql',
'error.type': error instanceof Error ? error.constructor.name : 'Unknown',
});
throw error;
}
};
return client;
}
private extractOperation(query: string): string {
const match = query.trim().match(/^(SELECT|INSERT|UPDATE|DELETE|CREATE|DROP)/i);
return match ? match[1].toUpperCase() : 'OTHER';
}
private sanitize(query: string): string {
// Replace literals with placeholders to prevent high cardinality
return query.replace(/'[^']*'/g, "'?'").replace(/\b\d+\b/g, '?');
}
}
**Server-Side Instrumentation:**
Enable native statistics collectors. For PostgreSQL, `pg_stat_statements` is non-negotiable. It aggregates query statistics, allowing you to identify top consumers of time and I/O.
```sql
-- Enable extension (requires superuser)
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
-- Critical view for monitoring top queries
SELECT
query,
calls,
total_exec_time,
mean_exec_time,
rows,
shared_blks_hit,
shared_blks_read
FROM pg_stat_statements
ORDER BY total_exec_time DESC
LIMIT 20;
2. Metric Selection and Aggregation
Adopt the RED method for client-side metrics and USE method for server-side resources.
- Rate: Queries per second.
- Errors: Failed queries, connection refusals, deadlocks.
- Duration: Histogram of query latencies. Focus on P99 and P999, not averages.
- Utilization: CPU, Memory, Disk I/O.
- Saturation: Connection pool usage, queue depth, lock waits.
3. Architecture Decisions
- Pull vs. Push: Use a pull-based model (Prometheus) for server metrics to ensure reliability; use a push-based model (OTEL Collector) for application-level database metrics to capture context.
- Storage: Store high-resolution histograms for 7 days and aggregate summaries for 90 days. Avoid storing raw query text in metric labels; use fingerprints or normalized templates.
- Rationale: Separating application metrics from infrastructure metrics prevents metric explosion and allows independent scaling of monitoring components.
Pitfall Guide
1. Monitoring Averages Instead of Percentiles
Mistake: Alerting on average query latency.
Impact: Averages hide tail latency. If 99% of queries take 10ms and 1% take 5000ms, the average might be 60ms, triggering no alert, while a significant portion of users experience timeouts.
Best Practice: Always configure alerts on P95 or P99 latency. Use histograms to calculate percentiles dynamically.
2. Ignoring Connection Pool Saturation
Mistake: Only monitoring active connections.
Impact: Connection pools often have a queue. If the pool is exhausted, clients wait. This wait time is invisible if you only count active connections but is a primary cause of application latency spikes.
Best Practice: Monitor pool.waiting_clients or equivalent queue metrics. Alert when waiting clients exceed a threshold or wait time exceeds 500ms.
3. High Cardinality in Labels
Mistake: Including raw SQL text or user IDs in metric labels.
Impact: This causes metric explosion, crashing the monitoring backend and incurring massive storage costs.
Best Practice: Normalize queries by replacing literals with placeholders. Never include user-specific data in metric labels.
4. Lack of Correlation with Traces
Mistake: Database metrics exist in isolation from application traces.
Impact: When a latency spike occurs, engineers must manually correlate timestamps between monitoring dashboards and trace explorers, slowing down diagnosis.
Best Practice: Inject trace IDs into database queries where supported, or ensure the OTEL instrumentation propagates context so database spans are children of application spans.
5. Alerting on Transient Spikes
Mistake: Alerting on CPU or I/O spikes that last seconds.
Impact: Modern databases and cloud instances handle bursty workloads gracefully. Alerting on every micro-spike causes fatigue.
Best Practice: Use evaluation windows (e.g., "CPU > 80% for 5 minutes") and hysteresis to filter noise.
6. Overlooking Lock Contention
Mistake: Focusing only on query execution time.
Impact: A query may be efficient but stuck waiting for a lock held by a long-running transaction. This manifests as high latency but low CPU usage.
Best Practice: Monitor lock_waits, deadlocks, and long_running_transactions. Alert on lock wait time exceeding SLO thresholds.
7. Static Thresholds for Dynamic Workloads
Mistake: Hardcoding thresholds like "Alert if connections > 100".
Impact: As the application scales, thresholds become obsolete, leading to missed alerts or constant noise.
Best Practice: Use anomaly detection or relative thresholds (e.g., "Connections > 2x the moving average of the last 24 hours").
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| PostgreSQL on Kubernetes | Prometheus + postgres_exporter + OTEL Client | Standard ecosystem, rich metrics via exporter, low overhead. | Low (Open source). |
| MongoDB Sharded Cluster | MongoDB Cloud Manager / Atlas + OTEL | Native tools provide sharding-aware metrics; OTEL adds app context. | Medium (Managed tool licensing). |
| Legacy MySQL 5.7 | mysqld_exporter + ProxySQL Metrics | ProxySQL provides query-level insights that native MySQL lacks. | Low. |
| High-Throughput OLTP | eBPF-based Monitoring (e.g., Pixie) | Captures network and query data without code changes or DB overhead. | Medium (Compute overhead). |
| Multi-Cloud Hybrid | VictoriaMetrics / Grafana Cloud | Vendor-agnostic storage, handles high cardinality well, unified view. | High (SaaS costs). |
Configuration Template
Prometheus Scrape Configuration for PostgreSQL:
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'postgresql'
static_configs:
- targets: ['db-primary:9187', 'db-replica:9187']
metrics_path: /metrics
params:
# Collect extended metrics
collect[]:
- postmaster
- pg_stat_database
- pg_stat_statements
- pg_replication_lag
metric_relabel_configs:
# Sanitize query labels to prevent cardinality explosion
- source_labels: [datname]
target_label: datname
replacement: '${1}'
- source_labels: [query]
regex: '(.{50}).*'
target_label: query_template
replacement: '${1}...'
action: replace
OpenTelemetry Collector Config Snippet:
receivers:
otlp:
protocols:
grpc:
http:
processors:
batch:
timeout: 5s
send_batch_size: 1000
attributes/database:
actions:
- key: db.statement
action: hash
# Hash SQL to prevent high cardinality in traces
exporters:
prometheus:
endpoint: "0.0.0.0:8889"
namespace: "db"
otlp/jaeger:
endpoint: "jaeger-collector:14250"
tls:
insecure: true
service:
pipelines:
metrics:
receivers: [otlp]
processors: [batch]
exporters: [prometheus]
traces:
receivers: [otlp]
processors: [attributes/database, batch]
exporters: [otlp/jaeger]
Quick Start Guide
- Deploy Exporter: Run the
postgres_exporter container connected to your database.
docker run -d --name pg-exporter \
-e DATA_SOURCE_NAME="postgresql://user:pass@db-host:5432/postgres?sslmode=disable" \
-p 9187:9187 prometheuscommunity/postgres-exporter
- Configure Prometheus: Add the exporter target to your
prometheus.yml and restart Prometheus. Verify metrics are scraped at http://localhost:9187/metrics.
- Import Dashboard: Import the community PostgreSQL dashboard (ID 9628) into Grafana. Connect it to your Prometheus data source.
- Validate Alerts: Simulate a slow query (
SELECT pg_sleep(10);) and verify that the P99 latency metric updates and alerts trigger if thresholds are breached.
- Instrument App: Integrate the TypeScript
InstrumentedPool class into your application startup sequence, pointing to your OTEL collector. Verify RED metrics appear in Grafana.