an; data?: unknown; error?: string };
export const searchTool: ToolDefinition = {
name: 'search_database',
description: 'Query structured database for user records',
schema: z.object({
query: z.string().min(3).max(200),
limit: z.number().int().min(1).max(50).default(10),
filters: z.record(z.string()).optional()
}),
execute: async (input) => {
// Simulated DB call with validation
try {
const results = await db.find(input.query, { limit: input.limit, filters: input.filters });
return { success: true, data: results };
} catch (err) {
return { success: false, error: Search failed: ${(err as Error).message} };
}
}
};
### Step 2: Implement the Planner
The planner receives a high-level goal and decomposes it into a sequence of executable steps. It outputs a structured plan, not raw text.
```typescript
export interface PlanStep {
id: string;
tool: string;
input: unknown;
dependsOn?: string[];
}
export async function generatePlan(
goal: string,
availableTools: ToolDefinition[],
llmClient: LLMClient
): Promise<PlanStep[]> {
const toolDescriptions = availableTools.map(t => `${t.name}: ${t.description}`).join('\n');
const prompt = `
Decompose the following goal into executable steps using only the provided tools.
Output a JSON array of steps. Each step must include: id, tool, input, dependsOn.
Do not invent tools. Validate inputs against tool schemas.
Tools:
${toolDescriptions}
Goal: ${goal}
`;
const response = await llmClient.chatCompletion({
messages: [{ role: 'user', content: prompt }],
response_format: { type: 'json_object' }
});
const parsed = JSON.parse(response.content);
return parsed.map((step: any) => ({
id: step.id || crypto.randomUUID(),
tool: step.tool,
input: step.input,
dependsOn: step.dependsOn || []
}));
}
Step 3: Build the Executor with Retry & Fallback
The executor runs the plan, handles tool validation, retries transient failures, and falls back gracefully.
export async function executePlan(
plan: PlanStep[],
tools: ToolDefinition[],
maxRetries = 2
): Promise<ToolOutput[]> {
const results: Map<string, ToolOutput> = new Map();
const toolMap = new Map(tools.map(t => [t.name, t]));
for (const step of plan) {
const tool = toolMap.get(step.tool);
if (!tool) {
results.set(step.id, { success: false, error: `Unknown tool: ${step.tool}` });
continue;
}
// Validate input against schema
const validation = tool.schema.safeParse(step.input);
if (!validation.success) {
results.set(step.id, { success: false, error: `Invalid input: ${validation.error.message}` });
continue;
}
let attempt = 0;
let output: ToolOutput = { success: false, error: 'Max retries exceeded' };
while (attempt <= maxRetries) {
try {
output = await tool.execute(validation.data);
if (output.success) break;
} catch (err) {
output = { success: false, error: `Execution error: ${(err as Error).message}` };
}
attempt++;
if (attempt <= maxRetries) {
await new Promise(r => setTimeout(r, 200 * attempt)); // Exponential backoff
}
}
results.set(step.id, output);
}
return Array.from(results.values());
}
Step 4: Orchestrate with Bounded State Management
Context windows must be controlled. The orchestrator maintains a sliding window of relevant history, prunes completed steps, and injects only necessary context into subsequent planner calls.
export class AgentOrchestrator {
private contextWindow: string[] = [];
private readonly maxContextTokens = 4000;
constructor(private llm: LLMClient, private tools: ToolDefinition[]) {}
async run(goal: string): Promise<ToolOutput[]> {
// 1. Generate initial plan
const plan = await generatePlan(goal, this.tools, this.llm);
// 2. Execute with validation & retry
const results = await executePlan(plan, this.tools);
// 3. Update bounded context
this.contextWindow.push(`Goal: ${goal}`);
results.forEach(r => this.contextWindow.push(JSON.stringify(r)));
// Prune oldest entries if token limit approached
if (this.contextWindow.length > 10) {
this.contextWindow = this.contextWindow.slice(-10);
}
return results;
}
}
Architecture Rationale
- Decoupling Planning & Execution: Separates reasoning from I/O. Enables parallel tool execution, deterministic validation, and independent scaling of planning vs. action layers.
- Schema-First Tool Contracts: Prevents LLM hallucination of parameters, reduces injection risk, and enables static validation before runtime.
- Bounded Context Window: Pruning and explicit state tracking prevent token blowout and context drift, which are the primary causes of agent degradation over long sessions.
- Retry with Backoff & Fallback: Transient failures (rate limits, network timeouts, temporary service outages) are isolated from logic failures. The executor distinguishes between retryable and terminal errors.
Pitfall Guide
-
Unbounded Context Accumulation
Appending every tool output and LLM response to the prompt causes context window exhaustion, increased latency, and cost multiplication. Best practice: implement explicit state management with token-aware pruning. Keep only task-relevant history and summarize completed steps.
-
Vague or Missing Tool Schemas
LLMs will guess parameter types, omit required fields, or pass malformed JSON when tools lack strict contracts. Best practice: use Zod/Pydantic schemas, validate inputs before execution, and return structured error messages that the planner can consume for self-correction.
-
Synchronous Blocking Loops
Running tool calls sequentially when they are independent creates artificial latency. Best practice: identify dependency graphs in the plan, execute independent steps in parallel using Promise.all, and resolve dependencies before downstream steps.
-
No Circuit Breaker or Fallback Strategy
External APIs and LLM endpoints fail. Without circuit breakers, agents cascade into repeated failures. Best practice: implement timeout thresholds, track failure rates per tool, open circuits after N consecutive failures, and route to fallback tools or degrade gracefully.
-
Over-Engineering with Multi-Agent Systems
Introducing specialized agents (researcher, writer, critic) adds coordination overhead, token cost, and debugging complexity. Best practice: start with Planner-Executor. Only introduce multi-agent patterns when tasks require fundamentally different expertise, security boundaries, or parallel workstreams that cannot be abstracted into tools.
-
Ignoring Evaluation Metrics
Shipping agents without measuring reliability, cost, and latency per task leads to silent degradation. Best practice: instrument every step with metrics. Track tool success rate, planner accuracy, retry frequency, and token consumption. Run regression evals before deployment.
-
Prompt Injection via Tool Outputs
Unsanitized tool responses injected back into the LLM context can trigger prompt injection or logic hijacking. Best practice: sanitize tool outputs, wrap them in explicit delimiters, and use system-level instructions that forbid interpreting tool data as commands.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Simple CRUD or single-step query | Single-Agent (Direct) | Low complexity, deterministic path, minimal overhead | Lowest |
| Multi-step workflow with external APIs | Planner-Executor | Decoupled reasoning, bounded state, parallel tool execution | Moderate |
| Tasks requiring distinct expertise (legal + technical) | Multi-Agent (Specialized) | Isolated contexts, domain-specific prompts, security boundaries | High |
| Rapid prototyping / internal tooling | ReAct Loop | Fast iteration, built-in reasoning trace, lower boilerplate | Low-Moderate |
Configuration Template
// agent.config.ts
import { z } from 'zod';
export const agentConfig = {
llm: {
provider: 'openai',
model: 'gpt-4o-mini',
temperature: 0.1,
maxTokens: 2048,
timeout: 15000
},
planning: {
maxSteps: 8,
maxRetries: 2,
contextLimit: 4000,
pruningStrategy: 'fifo' // 'fifo' | 'semantic' | 'recent'
},
execution: {
parallelEnabled: true,
circuitBreaker: {
threshold: 3,
resetTimeout: 30000
},
fallbackTool: 'search_database_fallback'
},
tools: [
{
name: 'search_database',
schema: z.object({
query: z.string().min(3),
limit: z.number().int().min(1).max(50).default(10)
}),
timeout: 5000
},
{
name: 'generate_report',
schema: z.object({
title: z.string(),
sections: z.array(z.string()),
format: z.enum(['markdown', 'json']).default('markdown')
}),
timeout: 10000
}
],
observability: {
metrics: ['latency', 'token_cost', 'tool_success_rate', 'retry_count'],
logLevel: 'info'
}
} as const;
Quick Start Guide
- Initialize project:
npm init -y && npm install zod openai @anthropic-ai/sdk
- Create config file: Copy the Configuration Template into
agent.config.ts and adjust provider/model settings.
- Implement tools: Define tool schemas and execution functions matching the template structure. Ensure all inputs are validated before runtime.
- Launch orchestrator: Instantiate
AgentOrchestrator with your LLM client and tools, call run() with a goal string, and monitor metrics via your observability stack.
- Validate: Run against 10β20 known test cases. Check tool success rate, latency, and token cost. Adjust
maxRetries and contextLimit based on results before production deployment.