04 — Tool use¶
Schema generation, tool dispatch loops, and ML model wrappers as agent tools. The core insight: existing service clients are already the right shape for LLM tools. The fraud scoring model, the PyICE allocator, the rev-sci partner history endpoint — each is a typed function with a request struct, a response struct, and an HTTP call. Wrapping them as tools means generating a JSON Schema from the existing type and writing a dispatch case.
Tool-call schema policy¶
- Go (rev-sci-vanguard): Temporal activity function signatures +
Go struct tags → JSON Schema. Colocated with the activity in
internal/temporal/bespoke/. - Python (BFF): Pydantic model
.model_json_schema()→ JSON Schema. Colocated with the endpoint handler.
Schemas live next to the code they describe, not in a separate tools manifest. Use Claude's native tool-use API for all layers. MCP is over-engineering for v1 — each layer is a single tool-use loop.
Schema generation — Go¶
// internal/temporal/bespoke/tools.go
package bespoke
import "encoding/json"
func FraudScoringToolSchema() anthropic.Tool {
return anthropic.Tool{
Name: "score_fraud_risk",
Description: "Score fraud risk for a set of quote lines using the ML fraud model. Returns per-line risk bands and an aggregate fraud probability.",
InputSchema: json.RawMessage(`{
"type": "object",
"properties": {
"quote_id": {
"type": "string",
"description": "The quote identifier to score"
},
"features": {
"type": "array",
"items": {
"type": "object",
"properties": {
"line_id": {"type": "string"},
"partner_id": {"type": "string"},
"network": {"type": "string"},
"contract_months": {"type": "integer"},
"service_count": {"type": "integer"},
"monthly_cost_pence": {"type": "integer"}
},
"required": ["line_id", "partner_id", "network",
"contract_months", "service_count",
"monthly_cost_pence"]
},
"description": "Feature rows for the fraud model, one per quote line"
}
},
"required": ["quote_id", "features"]
}`),
}
}
func SimulateAllocationToolSchema() anthropic.Tool {
return anthropic.Tool{
Name: "simulate_allocation",
Description: "Validate a proposed giveaway allocation against floor and budget constraints. Returns constraint violations and delta from the engine recommendation.",
InputSchema: json.RawMessage(`{
"type": "object",
"properties": {
"quote_id": {"type": "string"},
"proposed_allocation": {
"type": "array",
"items": {
"type": "object",
"properties": {
"package_id": {"type": "string"},
"override_gross_monthly_pence": {"type": "integer"}
},
"required": ["package_id", "override_gross_monthly_pence"]
}
},
"target_total_giveaway_pence": {"type": "integer"}
},
"required": ["quote_id", "proposed_allocation",
"target_total_giveaway_pence"]
}`),
}
}
func PartnerHistoryToolSchema() anthropic.Tool {
return anthropic.Tool{
Name: "get_partner_history",
Description: "Retrieve bespoke review history for a partner: connection rates by decision branch, average giveaway, sample sizes. Returns 90/180/365-day windows.",
InputSchema: json.RawMessage(`{
"type": "object",
"properties": {
"partner_id": {"type": "string"},
"window_days": {
"type": "integer",
"enum": [90, 180, 365]
}
},
"required": ["partner_id"]
}`),
}
}
func QueryAnalyticsToolSchema() anthropic.Tool {
return anthropic.Tool{
Name: "query_analytics",
Description: "Execute a read-only SQL query against the ClickHouse analytics layer. Tables: partner_metrics, quote_outcomes, commission_history. Returns up to 100 rows as JSON.",
InputSchema: json.RawMessage(`{
"type": "object",
"properties": {
"sql": {
"type": "string",
"description": "Read-only SQL. SELECT only; no DML/DDL."
}
},
"required": ["sql"]
}`),
}
}
Schema generation — Python¶
# bff/app/tools/schemas.py
from pydantic import BaseModel, Field
class FraudFeatureRow(BaseModel):
line_id: str
partner_id: str
network: str
contract_months: int
service_count: int
monthly_cost_pence: int
class FraudScoringInput(BaseModel):
quote_id: str = Field(description="The quote identifier to score")
features: list[FraudFeatureRow] = Field(
description="Feature rows for the fraud model, one per quote line",
)
def fraud_scoring_tool() -> dict:
return {
"name": "score_fraud_risk",
"description": "Score fraud risk for quote lines using the ML fraud model.",
"input_schema": FraudScoringInput.model_json_schema(),
}
Tool dispatch loop — Anthropic native (Go)¶
Send a request with tools, check if the response contains tool_use
blocks, dispatch each to the real client, send results back, repeat
until the model returns text.
// internal/temporal/bespoke/tool_loop.go
package bespoke
import (
"context"
"encoding/json"
"fmt"
"rev-sci-vanguard/internal/clients/anthropic"
)
type ToolDispatcher struct {
llm anthropic.LLMClient
pyice PyICEClient
fraudModel FraudModelClient
analytics AnalyticsClient
logger *slog.Logger
}
func (d *ToolDispatcher) RunToolLoop(
ctx context.Context,
system string,
userMessage string,
tools []anthropic.Tool,
maxRounds int,
) (*anthropic.MessagesResponse, error) {
messages := []anthropic.Message{{
Role: "user",
Content: []anthropic.ContentBlock{{Type: "text", Text: userMessage}},
}}
for round := range maxRounds {
resp, err := d.llm.Messages(ctx, anthropic.MessagesRequest{
Model: "claude-sonnet-4-6",
MaxTokens: 4096,
System: system,
Messages: messages,
Tools: tools,
})
if err != nil {
return nil, fmt.Errorf("tool loop round %d: %w", round, err)
}
if resp.StopReason != "tool_use" {
return resp, nil
}
messages = append(messages, anthropic.Message{
Role: "assistant", Content: resp.Content,
})
var results []anthropic.ContentBlock
for _, block := range resp.Content {
if block.Type != "tool_use" {
continue
}
output, err := d.dispatch(ctx, block.Name, block.Input)
if err != nil {
output = []byte(fmt.Sprintf(`{"error": %q}`, err.Error()))
}
results = append(results, anthropic.ContentBlock{
Type: "tool_result",
ToolUseID: block.ID,
Content: string(output),
})
d.logger.Info("tool_dispatch",
"tool", block.Name, "round", round, "error", err,
)
}
messages = append(messages, anthropic.Message{
Role: "user", Content: results,
})
}
return nil, fmt.Errorf("tool loop: exceeded %d rounds", maxRounds)
}
func (d *ToolDispatcher) dispatch(
ctx context.Context, name string, input json.RawMessage,
) ([]byte, error) {
switch name {
case "score_fraud_risk":
var req FraudScoringRequest
if err := json.Unmarshal(input, &req); err != nil {
return nil, err
}
result, err := d.fraudModel.Score(ctx, req)
if err != nil {
return nil, err
}
return json.Marshal(result)
case "simulate_allocation":
var req SimulateRequest
if err := json.Unmarshal(input, &req); err != nil {
return nil, err
}
result, err := d.pyice.Simulate(ctx, req)
if err != nil {
return nil, err
}
return json.Marshal(result)
case "get_partner_history":
var req PartnerHistoryRequest
if err := json.Unmarshal(input, &req); err != nil {
return nil, err
}
result, err := d.analytics.GetPartnerHistory(ctx, req)
if err != nil {
return nil, err
}
return json.Marshal(result)
case "query_analytics":
var req AnalyticsQuery
if err := json.Unmarshal(input, &req); err != nil {
return nil, err
}
result, err := d.analytics.Query(ctx, req.SQL)
if err != nil {
return nil, err
}
return json.Marshal(result)
default:
return nil, fmt.Errorf("unknown tool: %s", name)
}
}
Tool dispatch loop — LiteLLM / OpenAI-compatible (Go)¶
Same dispatch logic, different wire format. Tool calls arrive in
choices[0].message.tool_calls; results go back as role: "tool".
// internal/temporal/bespoke/tool_loop_openai.go
package bespoke
import (
"context"
"encoding/json"
"fmt"
"rev-sci-vanguard/internal/clients/llm"
)
type OpenAIToolDispatcher struct {
client *llm.OpenAIClient
pyice PyICEClient
fraudModel FraudModelClient
analytics AnalyticsClient
logger *slog.Logger
}
func (d *OpenAIToolDispatcher) RunToolLoop(
ctx context.Context,
system string,
userMessage string,
tools []llm.ToolDef,
maxRounds int,
) (*llm.ChatResponse, error) {
messages := []llm.ChatMessage{
{Role: "system", Content: &system},
{Role: "user", Content: &userMessage},
}
for round := range maxRounds {
resp, err := d.client.Chat(ctx, llm.ChatRequest{
Model: "claude-sonnet", // LiteLLM model alias
Messages: messages,
Tools: tools,
})
if err != nil {
return nil, fmt.Errorf("tool loop round %d: %w", round, err)
}
choice := resp.Choices[0]
if choice.FinishReason != "tool_calls" {
return resp, nil
}
messages = append(messages, choice.Message)
for _, tc := range choice.Message.ToolCalls {
output, err := d.dispatch(ctx, tc.Function.Name,
[]byte(tc.Function.Arguments))
if err != nil {
errMsg := fmt.Sprintf(`{"error": %q}`, err.Error())
output = []byte(errMsg)
}
messages = append(messages, llm.ChatMessage{
Role: "tool",
Content: ptr(string(output)),
ToolCallID: &tc.ID,
})
}
}
return nil, fmt.Errorf("tool loop: exceeded %d rounds", maxRounds)
}
// dispatch is identical to the Anthropic-native version — same switch,
// same clients. Only the wire format differs, not the business logic.
Tool dispatch — Python (BFF)¶
# bff/app/tools/dispatcher.py
from __future__ import annotations
import json
import logging
from typing import Any
from app.clients.anthropic_client import (
AnthropicClient, MessagesRequest, Message, ContentBlock,
)
from app.clients.uw_engine import UWEngineClient
from app.clients.pyice import PyICEClient
logger = logging.getLogger(__name__)
class ToolDispatcher:
def __init__(
self,
llm: AnthropicClient,
pyice: PyICEClient,
uw_engine: UWEngineClient,
):
self._llm = llm
self._pyice = pyice
self._uw_engine = uw_engine
async def run_tool_loop(
self,
system: str,
user_message: str,
tools: list[dict],
model: str = "claude-sonnet-4-6",
max_rounds: int = 5,
) -> list[ContentBlock]:
messages = [Message(
role="user",
content=[ContentBlock(type="text", text=user_message)],
)]
for round_idx in range(max_rounds):
resp = await self._llm.messages(MessagesRequest(
model=model, max_tokens=4096, system=system,
messages=messages, tools=tools,
))
if resp.stop_reason != "tool_use":
return resp.content
messages.append(Message(role="assistant", content=resp.content))
results: list[ContentBlock] = []
for block in resp.content:
if block.type != "tool_use":
continue
output = await self._dispatch(block.name, block.input or {})
results.append(ContentBlock(
type="tool_result",
tool_use_id=block.id,
content=json.dumps(output),
))
logger.info("tool_dispatch", extra={
"tool": block.name, "round": round_idx,
})
messages.append(Message(role="user", content=results))
raise RuntimeError(f"Tool loop exceeded {max_rounds} rounds")
async def _dispatch(self, name: str, input: dict) -> Any:
match name:
case "score_fraud_risk":
return await self._uw_engine.score_fraud(
input["quote_id"], input["features"],
)
case "simulate_allocation":
return await self._pyice.simulate(
input["quote_id"],
input["proposed_allocation"],
input["target_total_giveaway_pence"],
)
case "get_partner_history":
return await self._pyice.get_partner_history(
input["partner_id"],
input.get("window_days", 180),
)
case "query_analytics":
return await self._query_clickhouse(input["sql"])
case _:
return {"error": f"Unknown tool: {name}"}
Open questions¶
- Tool granularity. Separate
approve_review/decline_review/escalate_to_human, or a singlesubmit_decision(action, ...)? - Missing tools?
get_quote_snapshot,get_similar_quotes,get_partner_tier? - ClickHouse SQL guardrails. Schema-in-prompt vs
list_tables/describe_tabletool pair?