rnanceOutcome = 'allow' | 'deny' | 'defer';
interface PolicyEvaluation {
outcome: GovernanceOutcome;
policyId: string;
policyVersion: string;
reasoning: string;
triggeredRules: string[];
remediation?: RemediationPath;
escalationContext?: EscalationPayload;
}
interface RemediationPath {
isRemediable: boolean;
missingConditions: string[];
suggestedModifications: string[];
}
// 2. Governance Decision Engine
class PolicyEvaluator {
async evaluate(
request: ActionRequest,
context: GovernanceContext
): Promise<PolicyEvaluation> {
const policies = await this.loadActivePolicies(context.tenantId);
const trace: PolicyEvaluation = {
outcome: 'allow',
policyId: '',
policyVersion: '',
reasoning: '',
triggeredRules: [],
};
for (const policy of policies) {
const violation = await policy.check(request, context);
if (violation) {
trace.outcome = violation.severity === 'critical' ? 'deny' : 'defer';
trace.policyId = policy.id;
trace.policyVersion = policy.version;
trace.reasoning = violation.explanation;
trace.triggeredRules = violation.ruleIds;
if (trace.outcome === 'deny') {
trace.remediation = this.computeRemediation(violation);
} else {
trace.escalationContext = {
riskScore: violation.riskScore,
precedents: await this.findPrecedents(request),
requiredReviewerRole: policy.escalationRole,
};
}
break;
}
}
return trace;
}
private computeRemediation(violation: PolicyViolation): RemediationPath {
if (violation.isCategoricalProhibition) {
return { isRemediable: false, missingConditions: [], suggestedModifications: [] };
}
return {
isRemediable: true,
missingConditions: violation.unmetConditions,
suggestedModifications: violation.compliantAlternatives,
};
}
}
// 3. Refusal Router & Audit Sink
class RefusalHandler {
constructor(
private auditStore: ComplianceLedger,
private telemetryBus: MetricsEmitter
) {}
async handleDenial(request: ActionRequest, decision: PolicyEvaluation): Promise<DenialResponse> {
const auditRecord = {
traceId: request.traceId,
timestamp: new Date().toISOString(),
actionIntent: request.payload,
requestingEntity: request.context.entityId,
policiesEvaluated: decision.triggeredRules,
triggeringPolicy: decision.policyId,
policyVersion: decision.policyVersion,
decisionReasoning: decision.reasoning,
remediationOffered: decision.remediation?.isRemediable ?? false,
integrityHash: await this.computeRecordHash(decision),
};
// Write to immutable compliance ledger
await this.auditStore.append(auditRecord);
// Emit leading-indicator telemetry
this.telemetryBus.increment('governance.denial.count', {
category: decision.triggeredRules[0] ?? 'unknown',
policy: decision.policyId,
entity: request.context.entityId,
});
return {
status: 'denied',
reason: decision.reasoning,
remediation: decision.remediation,
auditReference: auditRecord.traceId,
};
}
private async computeRecordHash(decision: PolicyEvaluation): Promise<string> {
const payload = JSON.stringify(decision);
const encoder = new TextEncoder();
const data = encoder.encode(payload);
const hashBuffer = await crypto.subtle.digest('SHA-256', data);
return Array.from(new Uint8Array(hashBuffer)).map(b => b.toString(16).padStart(2, '0')).join('');
}
}
// 4. Escalation Orchestrator
class EscalationManager {
private reviewQueue: Map<string, ReviewTicket> = new Map();
async routeForReview(request: ActionRequest, decision: PolicyEvaluation): Promise<DeferralResponse> {
const ticketId = crypto.randomUUID();
const ticket: ReviewTicket = {
id: ticketId,
request,
decision,
assignedRole: decision.escalationContext!.requiredReviewerRole,
createdAt: Date.now(),
timeoutMs: this.calculateTimeout(request.context.tier),
status: 'pending',
};
this.reviewQueue.set(ticketId, ticket);
// Notify reviewer service / queue
await this.notifyReviewers(ticket);
return {
status: 'pending_review',
reviewId: ticketId,
estimatedResolutionMs: ticket.timeoutMs,
pollingEndpoint: `/governance/reviews/${ticketId}/status`,
};
}
async resolveReview(ticketId: string, reviewerDecision: 'approve' | 'deny'): Promise<void> {
const ticket = this.reviewQueue.get(ticketId);
if (!ticket) throw new Error('Review ticket not found');
ticket.status = reviewerDecision;
ticket.resolvedAt = Date.now();
// Execute downstream action or emit final denial
if (reviewerDecision === 'approve') {
await this.executeDeferredAction(ticket.request);
}
this.reviewQueue.delete(ticketId);
}
private calculateTimeout(tier: string): number {
return tier === 'enterprise' ? 300_000 : 60_000; // 5m vs 1m
}
}
### Architecture Rationale
- **Discriminated Unions for Outcomes**: Using explicit `allow | deny | defer` types prevents implicit fallback behavior. The compiler enforces handling of every path, eliminating silent failures.
- **Separation of Audit and Telemetry**: Compliance logs require immutability, cryptographic hashing, and long retention. Operational metrics require aggregation, low latency, and short retention. Mixing them creates performance bottlenecks and compliance risks.
- **Default-Deny Timeouts**: Escalation queues must enforce strict SLAs. If a human reviewer does not respond within the calculated window, the system automatically denies the request. This prevents indefinite state accumulation and aligns with zero-trust principles.
- **Remediation Computation at Evaluation Time**: Calculating remediation paths during policy evaluation ensures upstream systems receive actionable guidance immediately, reducing retry latency and improving user experience.
## Pitfall Guide
### 1. Catch-Block Denial
**Explanation**: Wrapping policy checks in `try/catch` and treating violations as exceptions. This strips context, mixes control flow with error handling, and makes audit trails impossible to reconstruct.
**Fix**: Use explicit decision enums or discriminated unions. Policy evaluation should return a structured result, never throw, unless the evaluation engine itself fails.
### 2. Context Stripping in Denial Responses
**Explanation**: Returning generic `403 Forbidden` or `{"error": "blocked"}` responses. Upstream systems cannot differentiate between rate limits, policy violations, or malformed inputs.
**Fix**: Enforce a strict `DenialResponse` contract that includes `reason`, `policyId`, `remediation`, and `auditReference`. Never expose internal rule logic, but always provide actionable metadata.
### 3. Silent Audit Gaps
**Explanation**: Only logging successful executions. Regulators and compliance teams require proof that governance evaluated sensitive requests, even when denied.
**Fix**: Implement write-ahead logging for all governance evaluations. Every decision must produce an immutable record before the response is sent upstream.
### 4. Escalation Timeouts Without Fallback
**Explanation**: Queuing requests for human review but never enforcing a deadline. This causes state leaks, resource exhaustion, and undefined behavior during peak load.
**Fix**: Calculate timeout windows based on request tier. Implement a background sweeper that automatically transitions `pending` tickets to `denied` and emits compliance records when SLAs expire.
### 5. Mixing Operational Metrics with Compliance Logs
**Explanation**: Sending denial counts to the same logging pipeline as audit trails. This violates data retention policies, complicates encryption requirements, and degrades query performance.
**Fix**: Route compliance records to append-only, encrypted storage with WORM (Write Once Read Many) semantics. Route telemetry to time-series databases with automatic TTL expiration.
### 6. Static Policy Versioning
**Explanation**: Referencing policies by name or ID without versioning. When policies are updated, historical audit records become ambiguous and compliance reports fail reproducibility checks.
**Fix**: Embed `policyVersion` and `policyHash` in every decision record. Store policy snapshots alongside audit logs to enable exact historical reconstruction.
### 7. Ignoring Remediation Paths
**Explanation**: Treating all denials as categorical blocks. This frustrates users, increases support tickets, and misses opportunities for automated self-correction.
**Fix**: Classify denials as `remediable` or `categorical`. For remediable cases, compute missing conditions and compliant alternatives. Pass these directly to the upstream orchestrator.
## Production Bundle
### Action Checklist
- [ ] Define explicit governance outcomes (`allow`, `deny`, `defer`) using TypeScript discriminated unions
- [ ] Implement a dedicated `RefusalHandler` that constructs structured denial payloads
- [ ] Route compliance audit records to an append-only, encrypted storage backend
- [ ] Emit refusal telemetry to a separate time-series metrics pipeline
- [ ] Build an escalation queue with role-based routing and strict timeout enforcement
- [ ] Default to automatic denial when escalation SLAs expire
- [ ] Embed policy version and cryptographic hash in every audit record
- [ ] Compute remediation paths during evaluation, not post-hoc
### Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|----------|---------------------|-----|-------------|
| High-throughput public API | Inline policy evaluation with cached rules | Minimizes latency; denial routing happens in-process | Low infrastructure overhead; higher compute per request |
| Regulated AI agent workflow | Sidecar governance proxy with async audit sink | Isolates compliance logic; enables policy hot-reloading without service restarts | Moderate infrastructure cost; simplifies audit compliance |
| Internal enterprise tool | Centralized escalation queue with SSO reviewer routing | Aligns with existing IAM; reduces custom auth overhead | Low dev cost; leverages existing identity providers |
| Multi-tenant SaaS platform | Tenant-scoped policy engine with isolated audit partitions | Prevents cross-tenant data leakage; enables per-tenant compliance reporting | Higher storage cost; requires partition-aware query design |
### Configuration Template
```yaml
governance:
engine:
evaluation_mode: inline # or sidecar
cache_ttl_seconds: 300
max_policy_chain_depth: 5
denial:
structured_response: true
include_remediation: true
mask_sensitive_payload: true
audit:
storage_backend: s3_worm # or azure_blob_immutable, gcs_retention
encryption: aes-256-gcm
retention_days: 2555 # 7 years for regulatory compliance
hash_algorithm: sha256
telemetry:
backend: prometheus
metrics_prefix: governance.refusal
aggregation_window: 60s
alert_thresholds:
spike_window: 300s
spike_multiplier: 3.0
novel_pattern_detection: true
escalation:
queue_backend: sqs # or rabbitmq, kafka
default_timeout_ms: 60000
enterprise_timeout_ms: 300000
timeout_fallback: deny
reviewer_routing: iam_role_based
Quick Start Guide
- Initialize the Decision Contract: Define
GovernanceOutcome as a discriminated union in your TypeScript project. Create interfaces for PolicyEvaluation, RemediationPath, and DenialResponse.
- Deploy the Policy Evaluator: Implement the
PolicyEvaluator class with async rule checking. Ensure it returns structured outcomes instead of throwing exceptions.
- Wire the Refusal Handler: Instantiate
RefusalHandler with your audit storage client and metrics emitter. Attach it to your API gateway or agent orchestrator as a post-evaluation middleware.
- Configure Escalation Routing: Set up the
EscalationManager with your message queue. Define timeout windows based on tenant tier. Implement the background sweeper for expired tickets.
- Validate with Synthetic Traffic: Send requests that trigger
allow, deny, and defer outcomes. Verify audit records are immutable, telemetry metrics appear in your dashboard, and escalation tickets route correctly. Confirm default-deny behavior on timeout.