Skip to content

04 — Tool use

Schema generation, tool dispatch loops, and ML model wrappers as agent tools. The core insight: existing service clients are already the right shape for LLM tools. The fraud scoring model, the PyICE allocator, the rev-sci partner history endpoint — each is a typed function with a request struct, a response struct, and an HTTP call. Wrapping them as tools means generating a JSON Schema from the existing type and writing a dispatch case.

Tool-call schema policy

  • Go (rev-sci-vanguard): Temporal activity function signatures + Go struct tags → JSON Schema. Colocated with the activity in internal/temporal/bespoke/.
  • Python (BFF): Pydantic model .model_json_schema() → JSON Schema. Colocated with the endpoint handler.

Schemas live next to the code they describe, not in a separate tools manifest. Use Claude's native tool-use API for all layers. MCP is over-engineering for v1 — each layer is a single tool-use loop.

Schema generation — Go

// internal/temporal/bespoke/tools.go
package bespoke

import "encoding/json"

func FraudScoringToolSchema() anthropic.Tool {
    return anthropic.Tool{
        Name:        "score_fraud_risk",
        Description: "Score fraud risk for a set of quote lines using the ML fraud model. Returns per-line risk bands and an aggregate fraud probability.",
        InputSchema: json.RawMessage(`{
            "type": "object",
            "properties": {
                "quote_id": {
                    "type": "string",
                    "description": "The quote identifier to score"
                },
                "features": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "line_id":          {"type": "string"},
                            "partner_id":       {"type": "string"},
                            "network":          {"type": "string"},
                            "contract_months":  {"type": "integer"},
                            "service_count":    {"type": "integer"},
                            "monthly_cost_pence": {"type": "integer"}
                        },
                        "required": ["line_id", "partner_id", "network",
                                     "contract_months", "service_count",
                                     "monthly_cost_pence"]
                    },
                    "description": "Feature rows for the fraud model, one per quote line"
                }
            },
            "required": ["quote_id", "features"]
        }`),
    }
}

func SimulateAllocationToolSchema() anthropic.Tool {
    return anthropic.Tool{
        Name:        "simulate_allocation",
        Description: "Validate a proposed giveaway allocation against floor and budget constraints. Returns constraint violations and delta from the engine recommendation.",
        InputSchema: json.RawMessage(`{
            "type": "object",
            "properties": {
                "quote_id": {"type": "string"},
                "proposed_allocation": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "package_id": {"type": "string"},
                            "override_gross_monthly_pence": {"type": "integer"}
                        },
                        "required": ["package_id", "override_gross_monthly_pence"]
                    }
                },
                "target_total_giveaway_pence": {"type": "integer"}
            },
            "required": ["quote_id", "proposed_allocation",
                         "target_total_giveaway_pence"]
        }`),
    }
}

func PartnerHistoryToolSchema() anthropic.Tool {
    return anthropic.Tool{
        Name:        "get_partner_history",
        Description: "Retrieve bespoke review history for a partner: connection rates by decision branch, average giveaway, sample sizes. Returns 90/180/365-day windows.",
        InputSchema: json.RawMessage(`{
            "type": "object",
            "properties": {
                "partner_id": {"type": "string"},
                "window_days": {
                    "type": "integer",
                    "enum": [90, 180, 365]
                }
            },
            "required": ["partner_id"]
        }`),
    }
}

func QueryAnalyticsToolSchema() anthropic.Tool {
    return anthropic.Tool{
        Name:        "query_analytics",
        Description: "Execute a read-only SQL query against the ClickHouse analytics layer. Tables: partner_metrics, quote_outcomes, commission_history. Returns up to 100 rows as JSON.",
        InputSchema: json.RawMessage(`{
            "type": "object",
            "properties": {
                "sql": {
                    "type": "string",
                    "description": "Read-only SQL. SELECT only; no DML/DDL."
                }
            },
            "required": ["sql"]
        }`),
    }
}

Schema generation — Python

# bff/app/tools/schemas.py
from pydantic import BaseModel, Field

class FraudFeatureRow(BaseModel):
    line_id: str
    partner_id: str
    network: str
    contract_months: int
    service_count: int
    monthly_cost_pence: int

class FraudScoringInput(BaseModel):
    quote_id: str = Field(description="The quote identifier to score")
    features: list[FraudFeatureRow] = Field(
        description="Feature rows for the fraud model, one per quote line",
    )

def fraud_scoring_tool() -> dict:
    return {
        "name": "score_fraud_risk",
        "description": "Score fraud risk for quote lines using the ML fraud model.",
        "input_schema": FraudScoringInput.model_json_schema(),
    }

Tool dispatch loop — Anthropic native (Go)

Send a request with tools, check if the response contains tool_use blocks, dispatch each to the real client, send results back, repeat until the model returns text.

// internal/temporal/bespoke/tool_loop.go
package bespoke

import (
    "context"
    "encoding/json"
    "fmt"

    "rev-sci-vanguard/internal/clients/anthropic"
)

type ToolDispatcher struct {
    llm         anthropic.LLMClient
    pyice       PyICEClient
    fraudModel  FraudModelClient
    analytics   AnalyticsClient
    logger      *slog.Logger
}

func (d *ToolDispatcher) RunToolLoop(
    ctx context.Context,
    system string,
    userMessage string,
    tools []anthropic.Tool,
    maxRounds int,
) (*anthropic.MessagesResponse, error) {

    messages := []anthropic.Message{{
        Role: "user",
        Content: []anthropic.ContentBlock{{Type: "text", Text: userMessage}},
    }}

    for round := range maxRounds {
        resp, err := d.llm.Messages(ctx, anthropic.MessagesRequest{
            Model:     "claude-sonnet-4-6",
            MaxTokens: 4096,
            System:    system,
            Messages:  messages,
            Tools:     tools,
        })
        if err != nil {
            return nil, fmt.Errorf("tool loop round %d: %w", round, err)
        }

        if resp.StopReason != "tool_use" {
            return resp, nil
        }

        messages = append(messages, anthropic.Message{
            Role: "assistant", Content: resp.Content,
        })

        var results []anthropic.ContentBlock
        for _, block := range resp.Content {
            if block.Type != "tool_use" {
                continue
            }

            output, err := d.dispatch(ctx, block.Name, block.Input)
            if err != nil {
                output = []byte(fmt.Sprintf(`{"error": %q}`, err.Error()))
            }

            results = append(results, anthropic.ContentBlock{
                Type:      "tool_result",
                ToolUseID: block.ID,
                Content:   string(output),
            })

            d.logger.Info("tool_dispatch",
                "tool", block.Name, "round", round, "error", err,
            )
        }

        messages = append(messages, anthropic.Message{
            Role: "user", Content: results,
        })
    }

    return nil, fmt.Errorf("tool loop: exceeded %d rounds", maxRounds)
}

func (d *ToolDispatcher) dispatch(
    ctx context.Context, name string, input json.RawMessage,
) ([]byte, error) {
    switch name {
    case "score_fraud_risk":
        var req FraudScoringRequest
        if err := json.Unmarshal(input, &req); err != nil {
            return nil, err
        }
        result, err := d.fraudModel.Score(ctx, req)
        if err != nil {
            return nil, err
        }
        return json.Marshal(result)

    case "simulate_allocation":
        var req SimulateRequest
        if err := json.Unmarshal(input, &req); err != nil {
            return nil, err
        }
        result, err := d.pyice.Simulate(ctx, req)
        if err != nil {
            return nil, err
        }
        return json.Marshal(result)

    case "get_partner_history":
        var req PartnerHistoryRequest
        if err := json.Unmarshal(input, &req); err != nil {
            return nil, err
        }
        result, err := d.analytics.GetPartnerHistory(ctx, req)
        if err != nil {
            return nil, err
        }
        return json.Marshal(result)

    case "query_analytics":
        var req AnalyticsQuery
        if err := json.Unmarshal(input, &req); err != nil {
            return nil, err
        }
        result, err := d.analytics.Query(ctx, req.SQL)
        if err != nil {
            return nil, err
        }
        return json.Marshal(result)

    default:
        return nil, fmt.Errorf("unknown tool: %s", name)
    }
}

Tool dispatch loop — LiteLLM / OpenAI-compatible (Go)

Same dispatch logic, different wire format. Tool calls arrive in choices[0].message.tool_calls; results go back as role: "tool".

// internal/temporal/bespoke/tool_loop_openai.go
package bespoke

import (
    "context"
    "encoding/json"
    "fmt"

    "rev-sci-vanguard/internal/clients/llm"
)

type OpenAIToolDispatcher struct {
    client     *llm.OpenAIClient
    pyice      PyICEClient
    fraudModel FraudModelClient
    analytics  AnalyticsClient
    logger     *slog.Logger
}

func (d *OpenAIToolDispatcher) RunToolLoop(
    ctx context.Context,
    system string,
    userMessage string,
    tools []llm.ToolDef,
    maxRounds int,
) (*llm.ChatResponse, error) {

    messages := []llm.ChatMessage{
        {Role: "system", Content: &system},
        {Role: "user", Content: &userMessage},
    }

    for round := range maxRounds {
        resp, err := d.client.Chat(ctx, llm.ChatRequest{
            Model:    "claude-sonnet",  // LiteLLM model alias
            Messages: messages,
            Tools:    tools,
        })
        if err != nil {
            return nil, fmt.Errorf("tool loop round %d: %w", round, err)
        }

        choice := resp.Choices[0]
        if choice.FinishReason != "tool_calls" {
            return resp, nil
        }

        messages = append(messages, choice.Message)

        for _, tc := range choice.Message.ToolCalls {
            output, err := d.dispatch(ctx, tc.Function.Name,
                []byte(tc.Function.Arguments))
            if err != nil {
                errMsg := fmt.Sprintf(`{"error": %q}`, err.Error())
                output = []byte(errMsg)
            }

            messages = append(messages, llm.ChatMessage{
                Role:       "tool",
                Content:    ptr(string(output)),
                ToolCallID: &tc.ID,
            })
        }
    }

    return nil, fmt.Errorf("tool loop: exceeded %d rounds", maxRounds)
}

// dispatch is identical to the Anthropic-native version — same switch,
// same clients. Only the wire format differs, not the business logic.

Tool dispatch — Python (BFF)

# bff/app/tools/dispatcher.py
from __future__ import annotations

import json
import logging
from typing import Any

from app.clients.anthropic_client import (
    AnthropicClient, MessagesRequest, Message, ContentBlock,
)
from app.clients.uw_engine import UWEngineClient
from app.clients.pyice import PyICEClient

logger = logging.getLogger(__name__)

class ToolDispatcher:
    def __init__(
        self,
        llm: AnthropicClient,
        pyice: PyICEClient,
        uw_engine: UWEngineClient,
    ):
        self._llm = llm
        self._pyice = pyice
        self._uw_engine = uw_engine

    async def run_tool_loop(
        self,
        system: str,
        user_message: str,
        tools: list[dict],
        model: str = "claude-sonnet-4-6",
        max_rounds: int = 5,
    ) -> list[ContentBlock]:

        messages = [Message(
            role="user",
            content=[ContentBlock(type="text", text=user_message)],
        )]

        for round_idx in range(max_rounds):
            resp = await self._llm.messages(MessagesRequest(
                model=model, max_tokens=4096, system=system,
                messages=messages, tools=tools,
            ))

            if resp.stop_reason != "tool_use":
                return resp.content

            messages.append(Message(role="assistant", content=resp.content))

            results: list[ContentBlock] = []
            for block in resp.content:
                if block.type != "tool_use":
                    continue

                output = await self._dispatch(block.name, block.input or {})
                results.append(ContentBlock(
                    type="tool_result",
                    tool_use_id=block.id,
                    content=json.dumps(output),
                ))

                logger.info("tool_dispatch", extra={
                    "tool": block.name, "round": round_idx,
                })

            messages.append(Message(role="user", content=results))

        raise RuntimeError(f"Tool loop exceeded {max_rounds} rounds")

    async def _dispatch(self, name: str, input: dict) -> Any:
        match name:
            case "score_fraud_risk":
                return await self._uw_engine.score_fraud(
                    input["quote_id"], input["features"],
                )
            case "simulate_allocation":
                return await self._pyice.simulate(
                    input["quote_id"],
                    input["proposed_allocation"],
                    input["target_total_giveaway_pence"],
                )
            case "get_partner_history":
                return await self._pyice.get_partner_history(
                    input["partner_id"],
                    input.get("window_days", 180),
                )
            case "query_analytics":
                return await self._query_clickhouse(input["sql"])
            case _:
                return {"error": f"Unknown tool: {name}"}

Open questions

  1. Tool granularity. Separate approve_review / decline_review / escalate_to_human, or a single submit_decision(action, ...)?
  2. Missing tools? get_quote_snapshot, get_similar_quotes, get_partner_tier?
  3. ClickHouse SQL guardrails. Schema-in-prompt vs list_tables / describe_table tool pair?