fix(forecasting): persist LLM overlay under Tier-1 ITPM via two-call architecture

The daily forecast:llm-overlay command was being skipped because the previous
single-conversation flow consumed more than Tier-1's 50,000 input-tokens-per-
minute Anthropic bucket. The web_search tool auto-caches its results (~55k
tokens) and requires `encrypted_content` intact when those blocks are resent,
so the prior retry-on-missing-citations path either 429'd or 400'd on the
second call.

LlmOverlayService now runs two independent API calls. Phase 1 invokes the
web_search tool and we discard the transcript after harvesting the URLs +
titles from the returned web_search_tool_result blocks. Phase 2 is a fresh
conversation containing the forecast context and the harvested headlines as
plain text, with a forced submit_overlay tool call. events_cited is now
optional in the tool schema — Haiku's flaky compliance no longer matters
because citations come from the search results, not the model's transcription.
Model-tagged events (with directional impact) merge with harvested-only
entries (impact: 'neutral'), deduped by URL.

Between phases the service reads anthropic-ratelimit-input-tokens-remaining /
…-reset from Phase 1's headers and sleeps proportionally — only long enough
for the SUBMIT_TOKEN_BUDGET worth of refill, not for the full bucket reset,
capped at 65 seconds.

ApiLogger now captures usage.input_tokens, usage.output_tokens,
cache_read_input_tokens, cache_creation_input_tokens, plus the rate-limit
remaining/reset headers on every Anthropic response. New nullable columns on
api_logs make rate-limit diagnostics directly queryable.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ovidiu U
2026-05-14 14:22:42 +01:00
parent 97e27fc057
commit 07e0789044
6 changed files with 668 additions and 325 deletions

View File

@@ -34,10 +34,12 @@ class ApiLogger
$statusCode = null;
$error = null;
$responseBody = null;
$usage = [];
try {
$response = $request();
$statusCode = $response->status();
$usage = $this->extractUsage($response);
if ($response->failed()) {
$body = $response->body();
@@ -53,6 +55,7 @@ class ApiLogger
// doesn't. Pull the body when it's available.
if ($e instanceof RequestException) {
$responseBody = $this->truncate($e->response->body());
$usage = $this->extractUsage($e->response);
}
throw $e;
@@ -65,6 +68,7 @@ class ApiLogger
'duration_ms' => (int) round((microtime(true) - $start) * 1000),
'error' => $error,
'response_body' => $responseBody,
...$usage,
]);
}
}
@@ -75,4 +79,39 @@ class ApiLogger
? substr($body, 0, self::RESPONSE_BODY_CAP)
: $body;
}
/**
* Pull token-usage and rate-limit telemetry from a provider response.
*
* Today only Anthropic exposes both. Other providers return mostly
* NULLs callers don't need to know which is which.
*
* @return array<string, int|string|null>
*/
private function extractUsage(?Response $response): array
{
if ($response === null) {
return [];
}
$usage = $response->json('usage');
$tokens = is_array($usage) ? $usage : [];
$reset = $response->header('anthropic-ratelimit-input-tokens-reset');
$remaining = $response->header('anthropic-ratelimit-input-tokens-remaining');
return [
'input_tokens' => $this->intOrNull($tokens['input_tokens'] ?? null),
'output_tokens' => $this->intOrNull($tokens['output_tokens'] ?? null),
'cache_read_tokens' => $this->intOrNull($tokens['cache_read_input_tokens'] ?? null),
'cache_write_tokens' => $this->intOrNull($tokens['cache_creation_input_tokens'] ?? null),
'ratelimit_remaining' => $this->intOrNull($remaining !== '' ? $remaining : null),
'ratelimit_reset_at' => $reset !== '' ? $reset : null,
];
}
private function intOrNull(mixed $value): ?int
{
return is_numeric($value) ? (int) $value : null;
}
}

View File

@@ -6,7 +6,9 @@ use App\Models\BrentPrice;
use App\Models\LlmOverlay;
use App\Models\VolatilityRegime;
use App\Services\ApiLogger;
use Carbon\CarbonImmutable;
use Carbon\CarbonInterface;
use Illuminate\Http\Client\Response;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Log;
@@ -15,9 +17,21 @@ use Throwable;
/**
* Layer 4 daily news-aware overlay on the calibrated ridge forecast.
*
* Calls Anthropic Haiku with the web_search tool, then forces a
* submit_overlay tool call to get structured output. Cites events with
* URLs; URLs are verified before storing. Empty citations rejection.
* Runs as two independent Anthropic API calls:
* Phase 1 web_search tool only; we capture the URLs/titles from
* the returned web_search_tool_result blocks.
* Phase 2 fresh conversation containing those URLs+titles as plain
* text plus a forced submit_overlay tool call.
*
* Phase 1's transcript is never sent back to Phase 2. Anthropic's
* web_search auto-caches the encrypted page text (~55k tokens per
* search) and requires it intact when web_search_tool_result blocks
* are resent. Threading it through to Phase 2 either blows the Tier-1
* 50k ITPM bucket or 400s if we try to strip it. Two clean calls keep
* Phase 2 around 3k input tokens.
*
* Citations are harvested directly from Phase 1's web_search_tool_result
* blocks Haiku is unreliable about populating `events_cited` itself.
*
* Read-only with respect to the volatility flag Layer 4 writes its
* `llm_overlays` row; Layer 5's hourly cron picks it up and decides
@@ -31,6 +45,15 @@ final class LlmOverlayService
private const int COOLDOWN_HOURS = 4;
private const int MAX_SEARCH_TURNS = 2;
/**
* Approximate input-token cost of Phase 2 (system + tool schema +
* forecast context + harvested URL list). If Phase 1 leaves
* remaining ITPM below this, wait for the bucket to refill.
*/
private const int SUBMIT_TOKEN_BUDGET = 4_000;
public function __construct(
private readonly ApiLogger $apiLogger,
private readonly WeeklyForecastService $weeklyForecast,
@@ -55,19 +78,24 @@ final class LlmOverlayService
$forecast = $this->weeklyForecast->currentForecast();
$context = $this->buildContext($forecast);
$rawResult = $this->callAnthropic($context);
if ($rawResult === null) {
$callResult = $this->callAnthropic($context);
if ($callResult === null) {
return null;
}
$verifiedEvents = $this->verifyCitedUrls($rawResult['events_cited'] ?? []);
$rawResult = $callResult['raw'];
$harvested = $callResult['harvested'];
$mergedEvents = $this->mergeEvents($rawResult['events_cited'] ?? [], $harvested);
$verifiedEvents = $this->verifyCitedUrls($mergedEvents);
if ($verifiedEvents === []) {
Log::warning('LlmOverlayService: no verified citations, rejecting overlay', [
'events_cited_count' => count($rawResult['events_cited'] ?? []),
'model_events' => $rawResult['events_cited'] ?? null,
'harvested_urls' => array_column($harvested, 'url'),
'direction' => $rawResult['direction'] ?? null,
'confidence' => $rawResult['confidence'] ?? null,
'reasoning_short' => $rawResult['reasoning_short'] ?? null,
'raw_result' => $rawResult,
]);
return null;
@@ -131,70 +159,44 @@ final class LlmOverlayService
];
}
/** @return array<string, mixed>|null */
/**
* Two independent API calls:
*
* Phase 1 runs the web_search tool, captures the assistant's
* returned `web_search_tool_result` blocks, then
* discards the transcript.
*
* Phase 2 issues a brand-new conversation with the harvested
* URLs/titles flattened into a plain-text user message
* and forces a `submit_overlay` tool call.
*
* Why not one stitched conversation: Anthropic auto-caches web_search
* results into ITPM (≈55k tokens for a 1-search call) and requires
* `encrypted_content` intact when those blocks are sent back.
* Resending the Phase 1 transcript to Phase 2 either rate-limits us
* (29k+ tokens twice exceeds the Tier-1 50k ITPM bucket) or 400s
* if we strip the encrypted blob. A fresh Phase 2 sends ~3k tokens
* total small enough to fit in the recovered bucket after a
* short adaptive sleep.
*
* @return array{raw: array<string, mixed>, harvested: array<int, array{url: string, title: string}>}|null
*/
private function callAnthropic(array $context): ?array
{
$messages = [['role' => 'user', 'content' => $this->prompt($context)]];
try {
// Phase 1: web search loop. Append the assistant turn after every
// successful response, then decide whether to keep looping —
// this guarantees the messages array stays well-formed regardless
// of whether we exit via `break` or by exhausting iterations.
for ($i = 0, $response = null; $i < 5; $i++) {
$response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(45)
->withHeaders($this->headers())
->post(self::URL, [
'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'),
'max_tokens' => 1024,
'tools' => [['type' => 'web_search_20250305', 'name' => 'web_search']],
'messages' => $messages,
]));
if (! $response->successful()) {
Log::error('LlmOverlayService: search request failed', ['status' => $response->status()]);
return null;
}
$messages[] = ['role' => 'assistant', 'content' => $response->json('content')];
if ($response->json('stop_reason') !== 'pause_turn') {
break;
}
}
$messages[] = ['role' => 'user', 'content' => 'Now submit your overlay using the submit_overlay tool. Cite at least one event with a URL.'];
// Phase 2: forced structured output
$submitResponse = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20)
->withHeaders($this->headers())
->post(self::URL, [
'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'),
'max_tokens' => 512,
'tools' => [$this->submitOverlayTool()],
'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'],
'messages' => $messages,
]));
if (! $submitResponse->successful()) {
Log::error('LlmOverlayService: submit request failed', ['status' => $submitResponse->status()]);
$phase1 = $this->runWebSearch($context);
if ($phase1 === null) {
return null;
}
$submitContent = $submitResponse->json('content') ?? [];
$rawResult = $this->extractToolInput($submitContent);
$this->waitForRateLimitIfNeeded($phase1['response']);
// Haiku sometimes calls submit_overlay without `events_cited` even
// though the schema marks it required. Confirmed in laravel.log on
// 2026-05-12: tool_use input had only direction/confidence/reasoning.
// Retry once with an explicit tool_result error.
if ($this->citationsMissing($rawResult)) {
$rawResult = $this->retrySubmitWithCitationError($messages, $submitContent) ?? $rawResult;
$rawResult = $this->runSubmit($context, $phase1['harvested']);
if ($rawResult === null) {
return null;
}
return $rawResult;
return ['raw' => $rawResult, 'harvested' => $phase1['harvested']];
} catch (Throwable $e) {
Log::error('LlmOverlayService: callAnthropic failed', ['error' => $e->getMessage()]);
@@ -202,6 +204,239 @@ final class LlmOverlayService
}
}
/**
* Phase 1: ask the model to search for news and capture the
* web_search_tool_result blocks. Returns the harvested citations
* and the final response (whose rate-limit headers tell us when
* the ITPM bucket will be replenished for Phase 2).
*
* @return array{harvested: array<int, array{url: string, title: string}>, response: Response}|null
*/
private function runWebSearch(array $context): ?array
{
$messages = [['role' => 'user', 'content' => $this->searchUserMessage($context)]];
$response = null;
for ($i = 0; $i < self::MAX_SEARCH_TURNS; $i++) {
$response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(45)
->withHeaders($this->headers())
->post(self::URL, [
'model' => $this->model(),
'max_tokens' => 1024,
'system' => $this->searchSystem(),
'tools' => [['type' => 'web_search_20250305', 'name' => 'web_search']],
'messages' => $messages,
]));
if (! $response->successful()) {
Log::error('LlmOverlayService: search request failed', [
'status' => $response->status(),
'body' => substr($response->body(), 0, 500),
]);
return null;
}
$messages[] = ['role' => 'assistant', 'content' => $response->json('content')];
if ($response->json('stop_reason') !== 'pause_turn') {
break;
}
}
if ($response === null) {
return null;
}
return [
'harvested' => $this->harvestSearchResults($messages),
'response' => $response,
];
}
/**
* Phase 2: fresh API call no Phase 1 transcript with the
* harvested citations as plain text and a forced submit_overlay
* tool call.
*
* @param array<int, array{url: string, title: string}> $harvested
* @return array<string, mixed>|null
*/
private function runSubmit(array $context, array $harvested): ?array
{
$response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20)
->withHeaders($this->headers())
->post(self::URL, [
'model' => $this->model(),
'max_tokens' => 512,
'system' => $this->submitSystem(),
'tools' => [$this->submitOverlayTool()],
'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'],
'messages' => [['role' => 'user', 'content' => $this->submitUserMessage($context, $harvested)]],
]));
if (! $response->successful()) {
Log::error('LlmOverlayService: submit request failed', [
'status' => $response->status(),
'body' => substr($response->body(), 0, 500),
]);
return null;
}
$rawResult = $this->extractToolInput($response->json('content') ?? []);
if ($rawResult === null) {
Log::warning('LlmOverlayService: submit response missing tool_use block');
return null;
}
return $rawResult;
}
/**
* Anthropic's web_search burns ≈55k input tokens (mostly auto-cached
* search results) on Phase 1. At Tier 1's 50k ITPM the bucket can
* be at zero immediately afterwards. Read the rate-limit headers
* and sleep until the bucket has refilled enough for Phase 2.
* Capped at 65s so the daily cron never hangs longer than a minute.
*/
private function waitForRateLimitIfNeeded(Response $response): void
{
$remaining = (int) $response->header('anthropic-ratelimit-input-tokens-remaining');
if ($response->header('anthropic-ratelimit-input-tokens-remaining') === ''
|| $remaining >= self::SUBMIT_TOKEN_BUDGET) {
return;
}
$resetAt = $response->header('anthropic-ratelimit-input-tokens-reset');
$bucketSize = (int) $response->header('anthropic-ratelimit-input-tokens-limit');
if ($resetAt === '' || $bucketSize <= 0) {
return;
}
try {
$secondsUntilFullReset = max(0, CarbonImmutable::parse($resetAt)->getTimestamp() - now()->getTimestamp());
} catch (Throwable) {
return;
}
// Anthropic's bucket refills linearly. We don't need to wait for
// the full reset — only enough for SUBMIT_TOKEN_BUDGET tokens to
// become available. Sleep proportionally + a small safety margin,
// hard-capped at 65s.
$tokensNeeded = self::SUBMIT_TOKEN_BUDGET - $remaining;
$proportional = (int) ceil(($tokensNeeded / $bucketSize) * $secondsUntilFullReset);
$waitSeconds = max(1, min(65, $proportional + 2));
Log::info('LlmOverlayService: waiting for ITPM bucket refill before submit', [
'remaining' => $remaining,
'wait_seconds' => $waitSeconds,
'full_reset_in' => $secondsUntilFullReset,
]);
sleep($waitSeconds);
}
/**
* Walk every assistant turn and extract `{url, title}` from each
* `web_search_tool_result` block. Anthropic's web_search returns
* these blocks directly they are the authoritative citation
* source, not anything the model transcribes back to us.
*
* @param array<int, array<string, mixed>> $messages
* @return array<int, array{url: string, title: string}>
*/
private function harvestSearchResults(array $messages): array
{
$byUrl = [];
foreach ($messages as $message) {
if (($message['role'] ?? null) !== 'assistant') {
continue;
}
$content = $message['content'] ?? [];
if (! is_array($content)) {
continue;
}
foreach ($content as $block) {
if (! is_array($block) || ($block['type'] ?? null) !== 'web_search_tool_result') {
continue;
}
$results = $block['content'] ?? [];
if (! is_array($results)) {
continue;
}
foreach ($results as $result) {
if (! is_array($result) || ($result['type'] ?? null) !== 'web_search_result') {
continue;
}
$url = (string) ($result['url'] ?? '');
if ($url === '' || isset($byUrl[$url])) {
continue;
}
$byUrl[$url] = ['url' => $url, 'title' => (string) ($result['title'] ?? '')];
}
}
}
return array_values($byUrl);
}
/**
* Merge model-provided events_cited with citations harvested from
* `web_search_tool_result`. Model entries (which include `impact`
* tagging) take precedence on URL collision; harvested-only entries
* default to `impact: 'neutral'`.
*
* @param array<int, mixed> $modelEvents
* @param array<int, array{url: string, title: string}> $harvested
* @return array<int, array<string, mixed>>
*/
private function mergeEvents(array $modelEvents, array $harvested): array
{
$byUrl = [];
foreach ($modelEvents as $event) {
if (! is_array($event)) {
continue;
}
$url = (string) ($event['url'] ?? '');
if ($url === '') {
continue;
}
$byUrl[$url] = [
'headline' => (string) ($event['headline'] ?? ''),
'source' => (string) ($event['source'] ?? ''),
'url' => $url,
'impact' => in_array($event['impact'] ?? null, ['rising', 'falling', 'neutral'], true)
? $event['impact']
: 'neutral',
];
}
foreach ($harvested as $result) {
$url = $result['url'];
if (isset($byUrl[$url])) {
continue;
}
$byUrl[$url] = [
'headline' => $result['title'],
'source' => $this->domainOf($url),
'url' => $url,
'impact' => 'neutral',
];
}
return array_values($byUrl);
}
private function domainOf(string $url): string
{
$host = parse_url($url, PHP_URL_HOST);
return is_string($host) ? preg_replace('/^www\./', '', $host) : '';
}
private function verificationUserAgent(): string
{
$appUrl = rtrim((string) config('app.url'), '/');
@@ -320,37 +555,61 @@ final class LlmOverlayService
return config('services.anthropic.api_key');
}
private function prompt(array $context): string
private function model(): string
{
$json = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
return <<<PROMPT
You are providing a daily news-aware overlay for a UK weekly pump-price forecast.
The calibrated ridge model has already produced a directional call from price history.
Your job is to search recent oil/fuel news and decide whether to AGREE or DISAGREE
and most importantly, surface any major-impact event that the ridge model can't see
from price history alone.
return config('services.anthropic.model', 'claude-haiku-4-5-20251001');
}
private function searchSystem(): string
{
return <<<'PROMPT'
You are researching news that may affect this week's UK pump-price forecast.
Search recent news (last 48 hours) for:
- OPEC+ production decisions or unexpected announcements
- Geopolitical events affecting oil supply (sanctions, conflict, shipping disruption)
- Major refinery outages or pipeline incidents
- US/EU inventory reports that materially moved Brent
Context for this week:
$json
After searching, you will be asked to submit_overlay with direction, confidence
(capped at $this->confidenceCap), short reasoning, cited events with URLs,
agrees_with_ridge, and major_impact_event.
Citing events with REAL URLs is mandatory. An empty citation array will be
rejected and the overlay discarded.
Return only the search results you will be asked to summarise separately.
PROMPT;
}
private string $confidenceCap = '75';
private function searchUserMessage(array $context): string
{
$json = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
return "Use web_search to find oil/fuel news from the last 48 hours that could move UK pump prices this week.\n\nContext for this week:\n\n".$json;
}
private function submitSystem(): string
{
$cap = self::CONFIDENCE_CAP;
return <<<PROMPT
You are providing a news-aware directional overlay for a UK weekly pump-price forecast.
Decide whether to AGREE or DISAGREE with the ridge model based on the news headlines
provided in the user message. Cap confidence at $cap.
Include events_cited (with impact tags) for any specific headline that drove your
reasoning; you may leave events_cited empty if the news is unremarkable.
PROMPT;
}
/**
* @param array<int, array{url: string, title: string}> $harvested
*/
private function submitUserMessage(array $context, array $harvested): string
{
$contextJson = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
if ($harvested === []) {
$headlines = '(none — no relevant news found)';
} else {
$headlines = collect($harvested)
->map(fn (array $r): string => '- '.$r['title'].' — '.$r['url'])
->implode("\n");
}
return "Context for this week:\n\n".$contextJson."\n\nNews headlines found:\n".$headlines."\n\nNow call submit_overlay with your decision.";
}
/** @return array<string, mixed> */
private function submitOverlayTool(): array
@@ -366,7 +625,7 @@ final class LlmOverlayService
'reasoning_short' => ['type' => 'string', 'description' => '12 sentences.'],
'events_cited' => [
'type' => 'array',
'minItems' => 1,
'description' => 'Optional. Events that drove your reasoning, with directional impact. Citations are otherwise harvested from web_search_tool_result.',
'items' => [
'type' => 'object',
'properties' => [
@@ -381,7 +640,7 @@ final class LlmOverlayService
'agrees_with_ridge' => ['type' => 'boolean'],
'major_impact_event' => ['type' => 'boolean'],
],
'required' => ['direction', 'confidence', 'reasoning_short', 'events_cited', 'agrees_with_ridge', 'major_impact_event'],
'required' => ['direction', 'confidence', 'reasoning_short', 'agrees_with_ridge', 'major_impact_event'],
],
];
}
@@ -396,57 +655,4 @@ final class LlmOverlayService
return $block['input'] ?? null;
}
/** @param array<string, mixed>|null $rawResult */
private function citationsMissing(?array $rawResult): bool
{
return $rawResult === null
|| ! isset($rawResult['events_cited'])
|| ! is_array($rawResult['events_cited'])
|| $rawResult['events_cited'] === [];
}
/**
* @param array<int, mixed> $messages
* @param array<int, mixed> $failedSubmitContent
* @return array<string, mixed>|null
*/
private function retrySubmitWithCitationError(array $messages, array $failedSubmitContent): ?array
{
$toolUseId = collect($failedSubmitContent)->firstWhere('type', 'tool_use')['id'] ?? null;
if ($toolUseId === null) {
Log::warning('LlmOverlayService: cannot retry — no tool_use id in failed submit');
return null;
}
Log::info('LlmOverlayService: retrying submit with citation error', ['tool_use_id' => $toolUseId]);
$messages[] = ['role' => 'assistant', 'content' => $failedSubmitContent];
$messages[] = ['role' => 'user', 'content' => [[
'type' => 'tool_result',
'tool_use_id' => $toolUseId,
'content' => 'events_cited was missing or empty. Resubmit submit_overlay with at least one event from your earlier web search results, including its real URL, headline, source, and impact.',
'is_error' => true,
]]];
$retryResponse = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20)
->withHeaders($this->headers())
->post(self::URL, [
'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'),
'max_tokens' => 512,
'tools' => [$this->submitOverlayTool()],
'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'],
'messages' => $messages,
]));
if (! $retryResponse->successful()) {
Log::error('LlmOverlayService: retry submit failed', ['status' => $retryResponse->status()]);
return null;
}
return $this->extractToolInput($retryResponse->json('content') ?? []);
}
}