me validation and type inference.
import { z } from 'zod';
export interface ToolDefinition<T extends z.ZodType> {
name: string;
description: string;
parameters: T;
execute: (args: z.infer<T>) => Promise<ToolResult>;
idempotencyKey?: boolean;
}
export interface ToolResult {
success: boolean;
output: string;
error?: string;
metadata?: Record<string, unknown>;
}
// Example Tool: Weather Lookup
const weatherTool: ToolDefinition<typeof z.object({
location: z.string().describe("City and country, e.g. 'London, UK'"),
units: z.enum(['celsius', 'fahrenheit']).default('celsius')
})> = {
name: 'get_weather',
description: 'Retrieves current weather data for a location.',
parameters: z.object({
location: z.string().describe("City and country, e.g. 'London, UK'"),
units: z.enum(['celsius', 'fahrenheit']).default('celsius')
}),
execute: async (args) => {
// Simulate API call
return {
success: true,
output: `Weather in ${args.location}: 22°C, Sunny`,
metadata: { cached: false }
};
},
idempotencyKey: true
};
The executor handles validation, execution, timeouts, and error normalization.
export class ToolExecutor {
private tools: Map<string, ToolDefinition<any>>;
private timeoutMs: number;
constructor(tools: ToolDefinition<any>[], timeoutMs = 5000) {
this.tools = new Map(tools.map(t => [t.name, t]));
this.timeoutMs = timeoutMs;
}
async execute(toolName: string, args: unknown): Promise<ToolResult> {
const tool = this.tools.get(toolName);
if (!tool) {
return { success: false, output: '', error: `Unknown tool: ${toolName}` };
}
try {
const validatedArgs = tool.parameters.parse(args);
// Timeout wrapper
const executionPromise = tool.execute(validatedArgs);
const timeoutPromise = new Promise<never>((_, reject) =>
setTimeout(() => reject(new Error('Tool execution timeout')), this.timeoutMs)
);
const result = await Promise.race([executionPromise, timeoutPromise]);
return result;
} catch (err) {
const message = err instanceof Error ? err.message : 'Unknown error';
return { success: false, output: '', error: message };
}
}
getToolList(): ToolDefinition<any>[] {
return Array.from(this.tools.values());
}
}
3. Orchestration Patterns
Pattern A: Parallel Fan-Out
For independent tool calls, parallel execution reduces latency. The model outputs multiple tool calls, and the executor runs them concurrently.
export async function executeParallel(
executor: ToolExecutor,
calls: Array<{ name: string; args: unknown }>
): Promise<ToolResult[]> {
const promises = calls.map(call => executor.execute(call.name, call.args));
return Promise.all(promises);
}
// Usage
const results = await executeParallel(executor, [
{ name: 'get_weather', args: { location: 'London' } },
{ name: 'get_stock', args: { symbol: 'AAPL' } }
]);
Pattern B: Structured Agentic Loop (ReAct)
For complex reasoning, implement a loop that feeds tool observations back to the model with structured thought traces.
export class AgenticLoop {
private executor: ToolExecutor;
private maxIterations: number;
private llmClient: any; // Abstracted LLM client
constructor(executor: ToolExecutor, llmClient: any, maxIterations = 10) {
this.executor = executor;
this.llmClient = llmClient;
this.maxIterations = maxIterations;
}
async run(userQuery: string): Promise<string> {
let history: any[] = [{ role: 'user', content: userQuery }];
let iterations = 0;
while (iterations < this.maxIterations) {
iterations++;
// Request tool use from LLM
const response = await this.llmClient.chat({
messages: history,
tools: this.executor.getToolList().map(t => ({
type: 'function',
function: {
name: t.name,
description: t.description,
parameters: t.parameters
}
})),
tool_choice: 'auto'
});
if (!response.tool_calls || response.tool_calls.length === 0) {
return response.content; // Final answer
}
// Add assistant message with tool calls
history.push({
role: 'assistant',
content: response.content,
tool_calls: response.tool_calls
});
// Execute tools and collect observations
// Supports parallel execution if model requests multiple tools
const toolResults = await executeParallel(
this.executor,
response.tool_calls.map(tc => ({ name: tc.function.name, args: JSON.parse(tc.function.arguments) }))
);
// Append observations to history
toolResults.forEach((result, index) => {
history.push({
role: 'tool',
tool_call_id: response.tool_calls[index].id,
content: result.success ? result.output : `Error: ${result.error}`
});
});
}
return 'Error: Maximum iterations reached without resolution.';
}
}
4. Architecture Decisions
- Schema-First Design: Tools are defined with Zod schemas. This enables automatic JSON schema generation for the LLM API and runtime validation, eliminating class of errors related to malformed arguments.
- Idempotency Support: Tools expose an
idempotencyKey flag. The orchestrator can use this to cache results or prevent duplicate side-effects during retries.
- Parallel-First Execution: The
executeParallel function allows the system to batch independent tool calls. The model should be prompted to request multiple tools when possible to minimize loop latency.
- Timeout and Circuit Breaking: The executor enforces strict timeouts. Production systems should integrate circuit breakers to prevent cascading failures when external APIs are degraded.
Pitfall Guide
1. Context Window Explosion
Mistake: Appending full tool outputs to the conversation history without truncation or summarization.
Impact: Context window overflow leads to truncation of critical instructions or excessive token costs.
Best Practice: Implement output truncation based on token limits. Use summarization for large outputs or store full results in external memory, passing only relevant excerpts to the LLM.
Mistake: The model repeatedly calls the same tool with identical arguments due to lack of state tracking or error feedback.
Impact: Wasted compute, latency spikes, and potential rate limit violations.
Best Practice: Implement a loop detection mechanism in the orchestrator. If the same tool is called twice with identical arguments, force a fallback or inject a system message instructing the model to change strategy.
3. Schema Drift
Mistake: Updating tool implementation without updating the LLM schema or vice versa.
Impact: Tool calls fail with validation errors, or the model hallucinates parameters that no longer exist.
Best Practice: Generate tool schemas programmatically from the tool definition code. Use CI/CD checks to ensure schema consistency. Version tools when breaking changes occur.
Mistake: Passing unvalidated LLM output directly to system commands or sensitive APIs.
Impact: Prompt injection attacks can manipulate tool arguments to execute unauthorized actions or leak data.
Best Practice: Validate all arguments against strict schemas. Sanitize inputs for tools that interact with databases or file systems. Implement least-privilege execution contexts for tools.
5. Latency Stacking in Serial Execution
Mistake: Forcing the model to call tools sequentially when they are independent.
Impact: Unnecessary latency degradation.
Best Practice: Configure the LLM to support parallel_tool_calls. Prompt the model to identify independent operations and request them in a single turn. Monitor execution graphs to identify serialization bottlenecks.
6. Lack of Error Recovery Strategies
Mistake: Treating tool errors as fatal. The model receives an error and halts or hallucinates a response.
Impact: Poor user experience and failure to complete tasks.
Best Practice: Structure error messages to be actionable. Include hints in the error output (e.g., "Invalid format, expected ISO date"). Implement retry logic with exponential backoff for transient errors.
Mistake: The model invents tool names or parameters not in the catalog.
Impact: Execution failures.
Best Practice: Use tool_choice constraints to force the model to select from available tools. Implement a fallback router that maps similar tool names to correct ones or returns a clear "tool not found" error with suggestions.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Simple Data Lookup | Naive Single-Step | Low latency, sufficient accuracy for direct queries. | Low |
| Complex Multi-Step Reasoning | Structured Agentic Loop | Handles dependencies and dynamic decision-making. | High (Multiple LLM calls) |
| Batch Data Fetching | Parallel Fan-Out | Reduces latency for independent operations. | Medium (Optimized latency) |
| Large Tool Catalog (>50 tools) | Hierarchical Routing | Reduces context window and improves selection accuracy. | Medium (Router overhead) |
| High-Reliability Requirement | Agentic with Validation | Schema validation and error recovery ensure robustness. | High |
Configuration Template
// tool-config.ts
import { z } from 'zod';
export const ToolConfig = {
executor: {
timeoutMs: 5000,
maxRetries: 2,
retryBackoff: 'exponential'
},
orchestrator: {
maxIterations: 10,
enableParallelExecution: true,
loopDetection: {
enabled: true,
maxSameCallCount: 2
},
contextManagement: {
maxOutputTokens: 1000,
summarizeLargeOutputs: true
}
},
tools: [
{
name: 'search_database',
schema: z.object({
query: z.string(),
limit: z.number().int().min(1).max(100).default(10)
}),
idempotent: true
},
{
name: 'send_email',
schema: z.object({
to: z.string().email(),
subject: z.string(),
body: z.string()
}),
idempotent: false,
requiresConfirmation: true
}
]
};
Quick Start Guide
- Install Dependencies:
npm install zod
- Define Tools: Create tool definitions using
ToolDefinition interface and Zod schemas.
- Initialize Executor: Instantiate
ToolExecutor with your tool list and configuration.
- Run Agent: Use
AgenticLoop to process user queries, passing the executor and LLM client.
- Monitor: Log tool execution metrics and adjust timeouts/limits based on production data.
// main.ts
import { ToolExecutor, AgenticLoop } from './orchestrator';
import { weatherTool } from './tools';
const executor = new ToolExecutor([weatherTool], 5000);
const agent = new AgenticLoop(executor, llmClient, 10);
const result = await agent.run("What is the weather in Tokyo?");
console.log(result);