SENSITIVE_PII = 'sensitive_pii',
NON_PII = 'non_pii'
}
export interface DataFieldMetadata {
fieldName: string;
classification: DataClassification;
retentionDays: number;
requiresConsent: boolean;
lineageSource: string; // e.g., 'auth_service', 'analytics_sdk'
}
export const USER_SCHEMA_METADATA: Record<string, DataFieldMetadata> = {
email: { fieldName: 'email', classification: DataClassification.PII, retentionDays: 365, requiresConsent: true, lineageSource: 'auth_service' },
ip_address: { fieldName: 'ip_address', classification: DataClassification.PII, retentionDays: 30, requiresConsent: false, lineageSource: 'web_server' },
device_id: { fieldName: 'device_id', classification: DataClassification.PII, retentionDays: 90, requiresConsent: true, lineageSource: 'mobile_sdk' }
};
### Step 2: Consent Orchestration
Consent is not a boolean. It is a granular, time-bound, auditable state. Implement a consent service that records purpose-specific flags, versioning, and withdrawal events. Store consent as an immutable append-only log.
```typescript
// consent-engine.ts
export interface ConsentRecord {
userId: string;
purpose: string; // e.g., 'marketing', 'analytics', 'functional'
granted: boolean;
timestamp: Date;
consentVersion: string;
auditId: string;
}
export class ConsentOrchestrator {
private auditLog: ConsentRecord[] = [];
recordConsent(record: ConsentRecord): void {
this.auditLog.push(record);
// Persist to append-only ledger or immutable storage
}
validateConsent(userId: string, purpose: string): boolean {
const userConsents = this.auditLog.filter(r => r.userId === userId);
const latest = userConsents
.filter(r => r.purpose === purpose)
.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime())[0];
return latest?.granted ?? false;
}
}
Step 3: Cryptographic Enforcement & Pseudonymization
Encrypt PII at rest and in transit. Use field-level encryption for sensitive attributes. Implement pseudonymization by replacing direct identifiers with reversible tokens stored in a separate, access-controlled vault. Key management must follow KMS best practices with automatic rotation.
// crypto-enforcement.ts
import { createCipheriv, createDecipheriv, randomBytes } from 'crypto';
const ALGORITHM = 'aes-256-gcm';
export class PIIEncryptor {
private key: Buffer;
constructor(kmsKey: Buffer) {
this.key = kmsKey;
}
encrypt(plaintext: string): { ciphertext: string; iv: string; authTag: string } {
const iv = randomBytes(16);
const cipher = createCipheriv(ALGORITHM, this.key, iv);
let encrypted = cipher.update(plaintext, 'utf8', 'hex');
encrypted += cipher.final('hex');
const authTag = cipher.getAuthTag();
return { ciphertext: encrypted, iv: iv.toString('hex'), authTag: authTag.toString('hex') };
}
decrypt(ciphertext: string, iv: string, authTag: string): string {
const decipher = createDecipheriv(ALGORITHM, this.key, Buffer.from(iv, 'hex'));
decipher.setAuthTag(Buffer.from(authTag, 'hex'));
let decrypted = decipher.update(ciphertext, 'hex', 'utf8');
decrypted += decipher.final('utf8');
return decrypted;
}
}
Step 4: DSAR Automation Pipeline
Data Subject Access Requests require idempotent, auditable processing. Build an event-driven pipeline that aggregates data across services, applies consent filters, formats exports, and executes erasure. Use a message broker to handle async processing and retry logic.
// dsar-pipeline.ts
export interface DSARPayload {
requestId: string;
userId: string;
type: 'access' | 'erasure' | 'portability';
requestedAt: Date;
}
export class DSARProcessor {
async process(payload: DSARPayload): Promise<void> {
const userPII = await this.fetchPIILineage(payload.userId);
if (payload.type === 'erasure') {
await this.executeSecureDeletion(userPII);
await this.notifyThirdParties(payload.userId);
} else {
const exportBundle = this.formatDataPortability(userPII);
await this.deliverToSubject(payload.userId, exportBundle);
}
await this.logAuditTrail(payload, 'completed');
}
private async executeSecureDeletion(records: any[]): Promise<void> {
// Overwrite storage, purge caches, trigger backup retention expiry
// Implement cryptographic erasure for encrypted fields
}
}
Step 5: Automated Retention & Secure Deletion
Retention policies must be dynamic and tied to consent state. Implement TTL-based cleanup with cryptographic verification. Soft deletes are insufficient; overwrite or cryptographically erase PII after retention expiry. Schedule automated purging with idempotent execution and audit logging.
Architecture Decisions & Rationale:
- Schema-Level Classification: Enforces data minimization at ingestion. Prevents untagged PII from entering pipelines.
- Append-Only Consent Ledger: Guarantees auditability. Supports versioning and withdrawal tracking without mutation risks.
- Event-Driven DSAR Pipeline: Decouples request handling from core services. Enables parallel processing, retry safety, and SLA tracking.
- Field-Level Encryption + KMS Rotation: Limits blast radius. Ensures compliance with integrity/confidentiality requirements without degrading system performance.
- Immutable Audit Trails: Satisfies accountability principle. Enables regulatory verification without manual reconstruction.
Pitfall Guide
-
Treating Consent as a Boolean Flag
Consent is purpose-specific, versioned, and revocable. A single consent_given: boolean field fails GDPR granularity requirements and breaks portability/erasure logic. Implement a consent state machine with purpose mapping and immutable audit records.
-
Ignoring Indirect Data Collection
Logs, analytics SDKs, CDN headers, and third-party pixels collect PII without explicit schema definitions. These become untracked data silos. Enforce DLP (Data Loss Prevention) hooks at network egress, scan raw logs for PII patterns, and route third-party data through a consent-aware proxy.
-
Relying on Soft Deletes for Erasure
Setting is_deleted = true leaves PII in database pages, backups, and search indices. GDPR requires secure erasure. Implement cryptographic deletion (destroy encryption keys for PII fields), overwrite storage blocks, and purge cache/CDN layers. Verify with automated data scanning post-deletion.
-
Hardcoding Retention Policies
Static retention periods ignore consent withdrawal, legal holds, and user-initiated deletion. Retention must be dynamic, tied to consent state and purpose limitation. Use policy-as-code engines (e.g., Open Policy Agent) to evaluate TTL based on real-time consent and regulatory context.
-
Manual DSAR Processing
Spreadsheet-driven or ticket-based DSAR handling fails at scale and introduces audit risk. Automate lineage traversal, consent filtering, and export formatting. Implement idempotent request IDs, async processing, and SLA monitoring to guarantee 30-day compliance windows.
-
Assuming Encryption Equals Compliance
Encryption satisfies integrity/confidentiality but ignores purpose limitation, data minimization, consent orchestration, and portability. A fully encrypted database with unconstrained PII proliferation and missing audit trails remains non-compliant. Treat cryptography as one layer within a broader privacy architecture.
-
Failing to Handle Data Portability Correctly
Portability requires machine-readable formats, schema mapping, and inclusion of derived data. Exporting raw database dumps violates minimization and exposes unrelated PII. Implement structured JSON/CSV exports with consent-filtered fields, lineage annotations, and versioned schema definitions.
Best Practices from Production:
- Implement policy-as-code for retention and consent validation.
- Use event sourcing for data lineage to enable deterministic DSAR reconstruction.
- Deploy automated PII scanning in CI/CD pipelines to catch schema violations early.
- Maintain separate PII vaults with strict RBAC and audit logging.
- Conduct quarterly DSAR simulation drills to validate pipeline latency and completeness.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Legacy monolith with untagged PII | Implement schema migration + automated PII scanner + retroactive consent capture | Prevents uncontrolled data proliferation; establishes baseline for DSAR automation | High initial engineering cost; reduces long-term audit/fine risk |
| Microservices with fragmented consent | Centralize consent orchestration via event bus + append-only ledger | Eliminates inconsistent consent states across services; guarantees auditability | Medium integration cost; accelerates compliance reporting |
| High-volume DSAR requests | Deploy async DSAR pipeline with parallel lineage traversal + idempotent request IDs | Meets 30-day SLA at scale; prevents processing collisions and data duplication | Low infrastructure cost; eliminates manual processing overhead |
| Third-party analytics SDKs collecting PII | Route through consent-aware proxy + DLP scanning + field-level pseudonymization | Blocks uncontrolled data egress; satisfies purpose limitation and minimization | Medium network overhead; avoids vendor compliance liability |
| Backup retention compliance | Implement cryptographic erasure + backup TTL policies + automated purge verification | Ensures GDPR erasure extends to disaster recovery layers; prevents backup recovery of deleted PII | Low storage cost; requires backup system integration |
Configuration Template
# gdpr-policy-config.yaml
data_classification:
pii_fields:
- email
- phone_number
- ip_address
- device_id
sensitive_pii_fields:
- national_id
- health_data
- biometric_template
consent:
ledger_type: append_only
granularity: purpose_specific
purposes:
- marketing
- analytics
- functional
- research
versioning: true
audit_retention_days: 730
retention:
default_ttl_days: 365
dynamic_policy: true
override_on_consent_withdrawal: true
backup_expiry_days: 90
encryption:
algorithm: aes-256-gcm
key_management: kms
rotation_interval_days: 90
field_level: true
dsar:
sla_days: 30
processing_mode: async
idempotency: true
export_format: json
include_derived_data: true
audit_logging: immutable
Quick Start Guide
- Tag Schema Metadata: Add
DataFieldMetadata interfaces to all database models. Classify fields as PII, sensitive PII, or non-PII. Define retention days and consent requirements.
- Deploy Consent Ledger: Implement the
ConsentOrchestrator service. Store consent records in an append-only database or immutable storage. Integrate with user preference UI to capture purpose-specific grants.
- Enable Field Encryption: Wrap PII write/read operations with
PIIEncryptor. Configure KMS key rotation. Replace plaintext storage with ciphertext + IV + authTag tuples.
- Spin Up DSAR Pipeline: Create a message queue topic for
DSARPayload. Implement DSARProcessor with lineage traversal, consent filtering, and secure deletion. Add SLA monitoring and audit logging.
- Validate & Test: Run a simulated DSAR request. Verify consent filtering, encryption boundaries, export formatting, and erasure completeness. Confirm audit trail immutability and backup purge alignment.