t when a large number of distinct accounts receive exactly one or two attempts from distinct IPs within a defined timeframe.
import { Redis } from 'ioredis';
interface LoginAttempt {
accountId: string;
ipAddress: string;
timestamp: number;
outcome: 'success' | 'failure';
}
export class PopulationCorrelator {
private redis: Redis;
private windowMs: number;
constructor(redisClient: Redis, windowHours: number = 24) {
this.redis = redisClient;
this.windowMs = windowHours * 3600 * 1000;
}
async recordAttempt(attempt: LoginAttempt): Promise<void> {
const windowKey = `auth:population:${Math.floor(Date.now() / this.windowMs)}`;
// Store unique account-IP pairs with timestamp
await this.redis.zadd(windowKey, attempt.timestamp, `${attempt.accountId}:${attempt.ipAddress}`);
// Auto-expire window
await this.redis.expire(windowKey, Math.ceil(this.windowMs / 1000) + 3600);
}
async analyzeWindow(): Promise<{ accountCount: number; ipCount: number; riskScore: number }> {
const currentKey = `auth:population:${Math.floor(Date.now() / this.windowMs)}`;
const entries = await this.redis.zrangebyscore(currentKey, Date.now() - this.windowMs, Date.now());
const accounts = new Set<string>();
const ips = new Set<string>();
for (const entry of entries) {
const [account, ip] = entry.split(':');
accounts.add(account);
ips.add(ip);
}
// Risk increases when many accounts have exactly 1-2 attempts from unique IPs
const avgAttemptsPerAccount = entries.length / Math.max(accounts.size, 1);
const ipDiversityRatio = ips.size / Math.max(accounts.size, 1);
const riskScore = (accounts.size > 500 && ipDiversityRatio > 0.8 && avgAttemptsPerAccount <= 2) ? 0.85 : 0.1;
return { accountCount: accounts.size, ipCount: ips.size, riskScore };
}
}
Architecture Rationale: Redis sorted sets provide O(log N) insertion and efficient time-range queries. The sliding window approach avoids storing raw logs indefinitely while preserving correlation capability. The risk score formula weights account count, IP diversity, and attempt frequency. This replaces per-IP counters with a population heuristic that directly matches low-velocity attack signatures.
Step 2: Contextual Session Validation
When a login succeeds, validate the session context before granting full access. Check device fingerprint consistency, ASN classification, and geographic plausibility.
interface SessionContext {
deviceId: string;
asn: string;
country: string;
lastSuccessfulLogin?: { deviceId: string; asn: string; country: string };
}
export class ContextValidator {
async evaluate(context: SessionContext): Promise<{ isAnomalous: boolean; flags: string[] }> {
const flags: string[] = [];
if (context.lastSuccessfulLogin) {
const prev = context.lastSuccessfulLogin;
if (prev.deviceId !== context.deviceId) flags.push('DEVICE_MISMATCH');
if (prev.country !== context.country) flags.push('GEO_SHIFT');
}
// Residential proxy ASNs typically lack enterprise infrastructure
const isResidential = await this.checkAsnType(context.asn);
if (isResidential && flags.length > 0) flags.push('RESIDENTIAL_PROXY_SUSPECT');
return { isAnomalous: flags.length >= 2, flags };
}
private async checkAsnType(asn: string): Promise<boolean> {
// Integrate with IP2Location, MaxMind, or similar ASN classification service
const asnData = await this.fetchAsnMetadata(asn);
return asnData.type === 'residential' || asnData.type === 'mobile';
}
}
Architecture Rationale: Successful logins are no longer trusted by default. Device fingerprinting (using libraries like fingerprintjs-pro or custom canvas/WebGL hashing) combined with ASN classification creates a baseline of expected behavior. Residential proxy networks are heavily utilized in credential stuffing; flagging them when combined with device or geographic shifts creates a high-signal anomaly without blocking legitimate users.
Step 3: Breach Corpus Integration
Validate credentials against known breach datasets at authentication time. Use the k-anonymity API to preserve privacy while checking password exposure.
import { createHash } from 'crypto';
export class BreachChecker {
private hibpEndpoint = 'https://api.pwnedpasswords.com/range/';
async isCompromised(password: string): Promise<boolean> {
const hash = createHash('sha1').update(password).digest('hex').toUpperCase();
const prefix = hash.slice(0, 5);
const suffix = hash.slice(5);
const response = await fetch(`${this.hibpEndpoint}${prefix}`);
const text = await response.text();
const matches = text.split('\n');
for (const line of matches) {
const [hashSuffix, count] = line.split(':');
if (hashSuffix === suffix && parseInt(count, 10) > 0) {
return true;
}
}
return false;
}
}
Architecture Rationale: The HIBP k-anonymity model sends only the first 5 characters of the SHA-1 hash, preventing the full password from ever leaving your infrastructure. Checking at login time catches reused credentials that originated from unrelated breaches. This is a low-latency, high-impact control that directly addresses the credential supply chain problem.
Step 4: Adaptive Step-Up Authentication
Replace account lockouts with friction. When population correlation, context validation, or breach checking returns elevated risk, require step-up authentication instead of denying access.
export class StepUpOrchestrator {
async handleAnomalousLogin(
accountId: string,
riskScore: number,
contextFlags: string[]
): Promise<{ action: 'allow' | 'step_up' | 'deny'; challengeType?: string }> {
if (riskScore < 0.3 && contextFlags.length === 0) {
return { action: 'allow' };
}
if (riskScore > 0.7 || contextFlags.includes('RESIDENTIAL_PROXY_SUSPECT')) {
return { action: 'step_up', challengeType: 'TOTP_OR_WEBAUTHN' };
}
// Moderate risk: require email verification or knowledge-based challenge
return { action: 'step_up', challengeType: 'EMAIL_OTP' };
}
}
Architecture Rationale: Lockouts punish legitimate users and provide attackers with enumeration data. Step-up authentication adds friction that automated stuffing operations cannot scale. TOTP, WebAuthn, or email OTP challenges are trivial for humans but break automated pipelines. The risk score thresholds should be tuned to your user base's typical behavior.
Pitfall Guide
1. Per-IP Throttling as Primary Defense
Explanation: Rate limiting per IP address assumes attackers concentrate requests from single endpoints. Low-velocity operations distribute across thousands of residential proxies, ensuring each IP stays well below thresholds.
Fix: Deprecate per-IP blocking as a primary control. Implement population-level sliding windows that correlate distinct accounts and IPs across time. Use per-IP limits only as a secondary noise filter.
Explanation: Locking accounts after N failures assumes attackers will retry the same account. Credential stuffing tests each account exactly once. Lockouts rarely trigger, and when they do, they create support overhead and enable account enumeration.
Fix: Replace lockouts with risk-scoring and step-up authentication. Track failure patterns at the population level, not the account level. Allow failed attempts but flag accounts that appear in high-risk correlation windows.
3. Ignoring ASN and Proxy Classification
Explanation: Most telemetry pipelines treat all IPs equally. Residential and mobile ASNs are heavily leveraged by credential stuffing tooling because they blend with organic traffic. Without ASN classification, anomalous logins appear legitimate.
Fix: Integrate an ASN classification service (MaxMind, IP2Location, or commercial proxy detection APIs). Flag sessions originating from residential/mobile ranges when combined with device or geographic anomalies. Do not block residential IPs outright; use them as risk multipliers.
4. Treating Successful Logins as Authorized
Explanation: Traditional auth pipelines assume a correct password equals a legitimate user. Credential stuffing exploits this by using valid credentials from unrelated breaches. The password is correct; the entity is not.
Fix: Implement contextual validation on every successful login. Check device fingerprint consistency, geographic plausibility, and ASN type. Apply step-up authentication when context deviates from historical baselines, regardless of password correctness.
5. Blocking HIBP Checks Due to Latency Concerns
Explanation: Developers often skip breach checking at login, fearing API latency will degrade UX. The HIBP k-anonymity endpoint responds in <200ms and only requires the first 5 hash characters.
Fix: Implement HIBP checks asynchronously or with a short timeout fallback. Cache results for frequently tested password hashes. The security value of catching reused credentials far outweighs the minimal latency impact. Use a circuit breaker pattern to prevent API outages from blocking logins.
6. Honeypot Account Misconfiguration
Explanation: Seeding the user database with canary accounts is an effective signal for credential dump circulation. However, if these accounts are exposed in user search, password reset flows, or error messages, attackers can identify and avoid them.
Fix: Store honeypot accounts in a separate table with no public-facing references. Ensure they cannot be discovered via email lookup, username search, or password reset APIs. Any authentication attempt against a honeypot account should immediately trigger a high-severity alert and temporary credential corpus quarantine.
7. Static Risk Thresholds Without Tuning
Explanation: Hardcoding risk scores or population thresholds leads to false positives during legitimate traffic spikes (e.g., marketing campaigns, regional outages) or false negatives when attackers adapt pacing.
Fix: Implement dynamic thresholding based on rolling baselines. Use exponential moving averages to adjust population correlation windows. Log all risk decisions and review weekly to calibrate sensitivity. Provide a feedback loop where security analysts can mark false positives to retrain the scoring model.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| High-volume consumer SaaS (>100k MAU) | Population correlation + HIBP + step-up TOTP | Scales efficiently; catches distributed attacks without blocking legitimate users | Moderate (Redis cluster, HIBP API, MFA infrastructure) |
| Internal enterprise tool (<5k users) | Strict device fingerprinting + geo-fencing + immediate step-up | Lower traffic volume allows tighter controls; internal users have predictable patterns | Low (Fingerprinting library, geo-IP database) |
| Regulated financial/healthcare app | HIBP + WebAuthn mandatory + population correlation + honeypots | Compliance requires strong authentication; breach detection is mandatory | High (WebAuthn deployment, compliance auditing, dedicated security team) |
| Legacy monolith with limited infra | HIBP k-anonymity + basic population window + email OTP fallback | Minimal architectural changes; leverages existing email service for step-up | Low-Moderate (API integration, simple Redis/ZSet setup) |
Configuration Template
# auth-risk-config.yaml
population_correlation:
window_hours: 24
min_accounts_threshold: 500
max_attempts_per_account: 2
min_ip_diversity_ratio: 0.8
context_validation:
require_device_fingerprint: true
flag_residential_asn: true
geo_shift_tolerance_hours: 48
max_concurrent_devices: 3
breach_detection:
provider: hibp_k_anonymity
timeout_ms: 200
cache_ttl_seconds: 3600
fallback_action: allow_with_flag
step_up_policy:
risk_threshold_low: 0.3
risk_threshold_high: 0.7
challenges:
- type: email_otp
condition: "risk < 0.7 && flags.length == 1"
- type: totp_or_webauthn
condition: "risk >= 0.7 || flags.includes('RESIDENTIAL_PROXY_SUSPECT')"
lockout_enabled: false
Quick Start Guide
- Initialize Redis Sliding Window: Deploy a Redis instance and configure the
PopulationCorrelator class from the Core Solution. Set the window to 24 hours and begin logging all authentication attempts to the sorted set.
- Add HIBP Check: Integrate the
BreachChecker into your authentication middleware. Call it after password verification but before session creation. Implement a 200ms timeout and cache results for 1 hour.
- Configure Step-Up Flow: Replace account lockout logic with the
StepUpOrchestrator. Route anomalous sessions to an email OTP or TOTP challenge. Ensure your frontend handles the challenge redirect gracefully.
- Seed Canary Accounts: Create 50 dummy accounts in your database with no public references. Add a database trigger or event listener that fires a high-priority alert if any authentication attempt targets these accounts.
- Validate & Tune: Run the system in monitoring-only mode for 7 days. Review population correlation logs, HIBP hit rates, and step-up challenge completion rates. Adjust thresholds based on false positive/negative ratios before enabling automatic blocking or session termination.