onst DEFAULT_CONFIG: Required<SearchConfig> = {
timeoutMs: 8000,
maxRetries: 3,
chunkSize: 1500,
};
export class SemanticClient {
private readonly baseUrl: string;
private readonly config: Required<SearchConfig>;
constructor(baseUrl: string, config?: Partial<SearchConfig>) {
this.baseUrl = baseUrl;
this.config = { ...DEFAULT_CONFIG, ...config };
}
async query(request: SemanticSearchRequest): Promise<SemanticSearchResult[]> {
const payload = this.preparePayload(request);
let attempt = 0;
while (attempt < this.config.maxRetries) {
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), this.config.timeoutMs);
const response = await fetch(`${this.baseUrl}/search`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(payload),
signal: controller.signal,
});
clearTimeout(timeoutId);
if (!response.ok) {
const errorBody = await response.json().catch(() => ({}));
throw new Error(errorBody.message || `HTTP ${response.status}`);
}
const data = await response.json();
return data.results as SemanticSearchResult[];
} catch (error) {
attempt++;
if (attempt === this.config.maxRetries || error instanceof DOMException) {
throw error;
}
await this.backoff(attempt);
}
}
throw new Error('Max retries exceeded');
}
private preparePayload(request: SemanticSearchRequest): SemanticSearchRequest {
const chunkedDocs = request.documents.flatMap(doc =>
doc.length > this.config.chunkSize
? this.splitIntoChunks(doc, this.config.chunkSize)
: [doc]
);
return {
...request,
documents: chunkedDocs,
top_k: request.top_k ?? 5,
threshold: request.threshold ?? 0.5,
};
}
private splitIntoChunks(text: string, size: number): string[] {
const chunks: string[] = [];
for (let i = 0; i < text.length; i += size) {
chunks.push(text.slice(i, i + size));
}
return chunks;
}
private backoff(attempt: number): Promise<void> {
const delay = Math.min(1000 * 2 ** attempt, 5000);
return new Promise(resolve => setTimeout(resolve, delay));
}
}
### 3. State Management with React Query
Use `useMutation` for search execution and `useQuery` for caching results. Implement query key normalization to prevent cache fragmentation.
```ts
// hooks/useSemanticSearch.ts
import { useMutation, useQueryClient } from '@tanstack/react-query';
import { SemanticClient } from '../lib/semantic-client';
import { SemanticSearchRequest } from '../types/semantic-search';
const client = new SemanticClient('/api/search-proxy');
export function useSemanticSearch() {
const queryClient = useQueryClient();
return useMutation({
mutationFn: async (params: SemanticSearchRequest) => {
const normalizedKey = params.query.trim().toLowerCase();
const cached = queryClient.getQueryData<SemanticSearchResult[]>(['semantic', normalizedKey]);
if (cached) return cached;
return client.query(params);
},
onSuccess: (data, variables) => {
const normalizedKey = variables.query.trim().toLowerCase();
queryClient.setQueryData(['semantic', normalizedKey], data);
},
retry: 1,
staleTime: 1000 * 60 * 3,
});
}
4. UI Implementation with Debouncing
Decouple user input from network calls using a custom debounce hook. This prevents request flooding while maintaining responsive UI feedback.
// hooks/useDebounce.ts
import { useState, useEffect } from 'react';
export function useDebounce<T>(value: T, delay: number): T {
const [debounced, setDebounced] = useState(value);
useEffect(() => {
const timer = setTimeout(() => setDebounced(value), delay);
return () => clearTimeout(timer);
}, [value, delay]);
return debounced;
}
// components/SearchInterface.tsx
import { useState, useEffect } from 'react';
import { useSemanticSearch } from '../hooks/useSemanticSearch';
import { useDebounce } from '../hooks/useDebounce';
export function SearchInterface() {
const [rawInput, setRawInput] = useState('');
const [docs, setDocs] = useState<string[]>(['']);
const debouncedQuery = useDebounce(rawInput, 400);
const { mutate: executeSearch, data: results, isPending, error } = useSemanticSearch();
useEffect(() => {
if (debouncedQuery.trim().length > 2) {
executeSearch({ query: debouncedQuery, documents: docs.filter(d => d.trim()) });
}
}, [debouncedQuery, docs, executeSearch]);
return (
<div className="search-module">
<input
type="text"
value={rawInput}
onChange={e => setRawInput(e.target.value)}
placeholder="Enter semantic query..."
disabled={isPending}
/>
{docs.map((doc, i) => (
<textarea
key={i}
value={doc}
onChange={e => {
const updated = [...docs];
updated[i] = e.target.value;
setDocs(updated);
}}
placeholder="Document content..."
/>
))}
<button onClick={() => setDocs([...docs, ''])}>Add Context</button>
{isPending && <div className="loader">Processing vectors...</div>}
{error && <div className="error">Retrieval failed: {error.message}</div>}
{results && (
<ul className="results-list">
{results.map((r, idx) => (
<li key={idx}>
<span className="score-badge">{(r.score * 100).toFixed(1)}%</span>
<p>{r.document.slice(0, 180)}{r.document.length > 180 ? '...' : ''}</p>
</li>
))}
</ul>
)}
</div>
);
}
5. Server-Side Proxy Route
Never expose NEXT_PUBLIC_DEEPSEEK_API_KEY to the browser. Route requests through a secure backend endpoint that injects credentials and enforces rate limits.
// app/api/search-proxy/route.ts
import { NextResponse } from 'next/server';
export async function POST(request: Request) {
const body = await request.json();
const response = await fetch('https://api.deepseek.com/v1/search', {
method: 'POST',
headers: {
'Authorization': `Bearer ${process.env.DEEPSEEK_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(body),
});
if (!response.ok) {
const err = await response.json().catch(() => ({}));
return NextResponse.json({ message: err.message || 'Upstream error' }, { status: response.status });
}
const data = await response.json();
return NextResponse.json(data);
}
Pitfall Guide
1. Client-Side Credential Exposure
Explanation: Storing API keys in .env.local with NEXT_PUBLIC_ prefixes bakes them into client bundles. Attackers can extract them and consume your quota.
Fix: Always proxy through a server route. Use process.env.DEEPSEEK_API_KEY without the public prefix. Implement IP allowlisting or JWT validation on the proxy route.
2. Unbounded Document Payloads
Explanation: Sending raw, multi-megabyte documents directly to the API triggers timeout errors and exceeds token limits. DeepSeek R1 processes semantic vectors efficiently, but payload size directly impacts serialization overhead.
Fix: Implement client-side chunking with semantic boundaries (paragraphs or sentences). Merge results server-side or use a sliding window approach to preserve context continuity.
3. Race Conditions in Async Search
Explanation: Rapid input changes cause multiple in-flight requests. The last dispatched request may resolve before an earlier one, overwriting fresh results with stale data.
Fix: Use AbortController to cancel pending requests when new input arrives. React Query v5 handles this automatically when query keys change, but manual fetch implementations require explicit cancellation logic.
4. Static Threshold Configuration
Explanation: Hardcoding threshold: 0.5 ignores domain-specific relevance requirements. Technical documentation may need higher precision, while conversational search benefits from lower thresholds.
Fix: Expose threshold as a configurable parameter tied to user roles or search context. Implement adaptive thresholds that adjust based on result count and historical click-through rates.
5. WebSocket Connection Leaks
Explanation: Real-time search implementations often open persistent connections without cleanup. Unmounted components leave dangling listeners, causing memory leaks and duplicate event handling.
Fix: Implement a connection manager with reference counting. Use useEffect cleanup functions to close sockets and remove listeners. Add exponential backoff reconnection logic for network interruptions.
6. Missing Retry & Backoff Strategies
Explanation: Network blips, temporary rate limits, or upstream throttling cause silent failures. Without retry logic, users see empty states instead of transient errors.
Fix: Implement exponential backoff with jitter. Respect Retry-After headers when present. Cap retries at 3–5 attempts to avoid cascading load on the API.
7. Over-Caching Dynamic Queries
Explanation: Caching every unique query string fragments the cache and wastes memory. Semantic search queries are highly variable, making aggressive caching counterproductive.
Fix: Normalize query keys (trim, lowercase, remove punctuation). Set staleTime to 2–5 minutes for identical queries. Implement cache eviction policies based on LRU principles or explicit invalidation triggers.
Production Bundle
Action Checklist
Decision Matrix
| Scenario | Recommended Approach | Why | Cost Impact |
|---|
| Low-volume internal tool | Direct client-side fetch with React Query | Simplicity reduces development time | Low (acceptable risk for internal use) |
| Customer-facing SaaS | Server proxy + chunking + debounce | Prevents credential leakage and rate limit abuse | Medium (proxy infrastructure) |
| Real-time collaborative search | WebSocket manager + connection pooling | Maintains low-latency updates without polling overhead | High (persistent connections, monitoring) |
| Enterprise compliance | On-premise proxy + audit logging + RBAC | Meets data residency and access control requirements | High (infrastructure + engineering) |
Configuration Template
# .env.local (Server-side only)
DEEPSEEK_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxx
DEEPSEEK_BASE_URL=https://api.deepseek.com/v1
SEARCH_TIMEOUT_MS=8000
SEARCH_MAX_RETRIES=3
SEARCH_CHUNK_SIZE=1500
SEARCH_DEBOUNCE_MS=400
// lib/search-config.ts
export const SEARCH_CONFIG = {
timeout: Number(process.env.SEARCH_TIMEOUT_MS) || 8000,
retries: Number(process.env.SEARCH_MAX_RETRIES) || 3,
chunkSize: Number(process.env.SEARCH_CHUNK_SIZE) || 1500,
debounce: Number(process.env.SEARCH_DEBOUNCE_MS) || 400,
cacheStaleTime: 1000 * 60 * 3,
cacheGcTime: 1000 * 60 * 10,
} as const;
Quick Start Guide
- Initialize Project: Create a Next.js 14 app with TypeScript (
npx create-next-app@latest semantic-search --typescript).
- Install Dependencies: Add
@tanstack/react-query and clsx for state management and styling (npm i @tanstack/react-query clsx).
- Configure Environment: Add
DEEPSEEK_API_KEY to .env.local without the NEXT_PUBLIC_ prefix.
- Create Proxy Route: Implement
/app/api/search-proxy/route.ts to forward requests with server-side authentication.
- Wire Components: Import
useSemanticSearch, wrap your UI in <QueryClientProvider>, and connect the debounced input to the mutation hook. Test with a 300ms debounce and verify cache behavior across rapid keystrokes.