fix(forecasting): persist LLM overlay under Tier-1 ITPM via two-call architecture

The daily forecast:llm-overlay command was being skipped because the previous single-conversation flow consumed more than Tier-1's 50,000 input-tokens-per- minute Anthropic bucket. The web_search tool auto-caches its results (~55k tokens) and requires `encrypted_content` intact when those blocks are resent, so the prior retry-on-missing-citations path either 429'd or 400'd on the second call. LlmOverlayService now runs two independent API calls. Phase 1 invokes the web_search tool and we discard the transcript after harvesting the URLs + titles from the returned web_search_tool_result blocks. Phase 2 is a fresh conversation containing the forecast context and the harvested headlines as plain text, with a forced submit_overlay tool call. events_cited is now optional in the tool schema — Haiku's flaky compliance no longer matters because citations come from the search results, not the model's transcription. Model-tagged events (with directional impact) merge with harvested-only entries (impact: 'neutral'), deduped by URL. Between phases the service reads anthropic-ratelimit-input-tokens-remaining / …-reset from Phase 1's headers and sleeps proportionally — only long enough for the SUBMIT_TOKEN_BUDGET worth of refill, not for the full bucket reset, capped at 65 seconds. ApiLogger now captures usage.input_tokens, usage.output_tokens, cache_read_input_tokens, cache_creation_input_tokens, plus the rate-limit remaining/reset headers on every Anthropic response. New nullable columns on api_logs make rate-limit diagnostics directly queryable. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 14:22:42 +01:00
parent 97e27fc057
commit 07e0789044
6 changed files with 668 additions and 325 deletions
--- a/app/Services/Forecasting/LlmOverlayService.php
+++ b/app/Services/Forecasting/LlmOverlayService.php
@@ -6,7 +6,9 @@ use App\Models\BrentPrice;
 use App\Models\LlmOverlay;
 use App\Models\VolatilityRegime;
 use App\Services\ApiLogger;
+use Carbon\CarbonImmutable;
 use Carbon\CarbonInterface;
+use Illuminate\Http\Client\Response;
 use Illuminate\Support\Facades\DB;
 use Illuminate\Support\Facades\Http;
 use Illuminate\Support\Facades\Log;
@@ -15,9 +17,21 @@ use Throwable;
 /**
 * Layer 4 — daily news-aware overlay on the calibrated ridge forecast.
 *
- * Calls Anthropic Haiku with the web_search tool, then forces a
- * submit_overlay tool call to get structured output. Cites events with
- * URLs; URLs are verified before storing. Empty citations → rejection.
+ * Runs as two independent Anthropic API calls:
+ *   Phase 1 — web_search tool only; we capture the URLs/titles from
+ *             the returned web_search_tool_result blocks.
+ *   Phase 2 — fresh conversation containing those URLs+titles as plain
+ *             text plus a forced submit_overlay tool call.
+ *
+ * Phase 1's transcript is never sent back to Phase 2. Anthropic's
+ * web_search auto-caches the encrypted page text (~55k tokens per
+ * search) and requires it intact when web_search_tool_result blocks
+ * are resent. Threading it through to Phase 2 either blows the Tier-1
+ * 50k ITPM bucket or 400s if we try to strip it. Two clean calls keep
+ * Phase 2 around 3k input tokens.
+ *
+ * Citations are harvested directly from Phase 1's web_search_tool_result
+ * blocks — Haiku is unreliable about populating `events_cited` itself.
 *
 * Read-only with respect to the volatility flag — Layer 4 writes its
 * `llm_overlays` row; Layer 5's hourly cron picks it up and decides
@@ -31,6 +45,15 @@ final class LlmOverlayService

    private const int COOLDOWN_HOURS = 4;

+    private const int MAX_SEARCH_TURNS = 2;
+
+    /**
+     * Approximate input-token cost of Phase 2 (system + tool schema +
+     * forecast context + harvested URL list). If Phase 1 leaves
+     * remaining ITPM below this, wait for the bucket to refill.
+     */
+    private const int SUBMIT_TOKEN_BUDGET = 4_000;
+
    public function __construct(
        private readonly ApiLogger $apiLogger,
        private readonly WeeklyForecastService $weeklyForecast,
@@ -55,19 +78,24 @@ final class LlmOverlayService
        $forecast = $this->weeklyForecast->currentForecast();
        $context = $this->buildContext($forecast);

-        $rawResult = $this->callAnthropic($context);
-        if ($rawResult === null) {
+        $callResult = $this->callAnthropic($context);
+        if ($callResult === null) {
            return null;
        }

-        $verifiedEvents = $this->verifyCitedUrls($rawResult['events_cited'] ?? []);
+        $rawResult = $callResult['raw'];
+        $harvested = $callResult['harvested'];
+
+        $mergedEvents = $this->mergeEvents($rawResult['events_cited'] ?? [], $harvested);
+        $verifiedEvents = $this->verifyCitedUrls($mergedEvents);
+
        if ($verifiedEvents === []) {
            Log::warning('LlmOverlayService: no verified citations, rejecting overlay', [
-                'events_cited_count' => count($rawResult['events_cited'] ?? []),
+                'model_events' => $rawResult['events_cited'] ?? null,
+                'harvested_urls' => array_column($harvested, 'url'),
                'direction' => $rawResult['direction'] ?? null,
                'confidence' => $rawResult['confidence'] ?? null,
                'reasoning_short' => $rawResult['reasoning_short'] ?? null,
-                'raw_result' => $rawResult,
            ]);

            return null;
@@ -131,70 +159,44 @@ final class LlmOverlayService
        ];
    }

-    /** @return array<string, mixed>|null */
+    /**
+     * Two independent API calls:
+     *
+     *   Phase 1 — runs the web_search tool, captures the assistant's
+     *             returned `web_search_tool_result` blocks, then
+     *             discards the transcript.
+     *
+     *   Phase 2 — issues a brand-new conversation with the harvested
+     *             URLs/titles flattened into a plain-text user message
+     *             and forces a `submit_overlay` tool call.
+     *
+     * Why not one stitched conversation: Anthropic auto-caches web_search
+     * results into ITPM (≈55k tokens for a 1-search call) and requires
+     * `encrypted_content` intact when those blocks are sent back.
+     * Resending the Phase 1 transcript to Phase 2 either rate-limits us
+     * (29k+ tokens twice → exceeds the Tier-1 50k ITPM bucket) or 400s
+     * if we strip the encrypted blob. A fresh Phase 2 sends ~3k tokens
+     * total — small enough to fit in the recovered bucket after a
+     * short adaptive sleep.
+     *
+     * @return array{raw: array<string, mixed>, harvested: array<int, array{url: string, title: string}>}|null
+     */
    private function callAnthropic(array $context): ?array
    {
-        $messages = [['role' => 'user', 'content' => $this->prompt($context)]];
-
        try {
-            // Phase 1: web search loop. Append the assistant turn after every
-            // successful response, then decide whether to keep looping —
-            // this guarantees the messages array stays well-formed regardless
-            // of whether we exit via `break` or by exhausting iterations.
-            for ($i = 0, $response = null; $i < 5; $i++) {
-                $response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(45)
-                    ->withHeaders($this->headers())
-                    ->post(self::URL, [
-                        'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'),
-                        'max_tokens' => 1024,
-                        'tools' => [['type' => 'web_search_20250305', 'name' => 'web_search']],
-                        'messages' => $messages,
-                    ]));
-
-                if (! $response->successful()) {
-                    Log::error('LlmOverlayService: search request failed', ['status' => $response->status()]);
-
-                    return null;
-                }
-
-                $messages[] = ['role' => 'assistant', 'content' => $response->json('content')];
-
-                if ($response->json('stop_reason') !== 'pause_turn') {
-                    break;
-                }
-            }
-
-            $messages[] = ['role' => 'user', 'content' => 'Now submit your overlay using the submit_overlay tool. Cite at least one event with a URL.'];
-
-            // Phase 2: forced structured output
-            $submitResponse = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20)
-                ->withHeaders($this->headers())
-                ->post(self::URL, [
-                    'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'),
-                    'max_tokens' => 512,
-                    'tools' => [$this->submitOverlayTool()],
-                    'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'],
-                    'messages' => $messages,
-                ]));
-
-            if (! $submitResponse->successful()) {
-                Log::error('LlmOverlayService: submit request failed', ['status' => $submitResponse->status()]);
-
+            $phase1 = $this->runWebSearch($context);
+            if ($phase1 === null) {
                return null;
            }

-            $submitContent = $submitResponse->json('content') ?? [];
-            $rawResult = $this->extractToolInput($submitContent);
+            $this->waitForRateLimitIfNeeded($phase1['response']);

-            // Haiku sometimes calls submit_overlay without `events_cited` even
-            // though the schema marks it required. Confirmed in laravel.log on
-            // 2026-05-12: tool_use input had only direction/confidence/reasoning.
-            // Retry once with an explicit tool_result error.
-            if ($this->citationsMissing($rawResult)) {
-                $rawResult = $this->retrySubmitWithCitationError($messages, $submitContent) ?? $rawResult;
+            $rawResult = $this->runSubmit($context, $phase1['harvested']);
+            if ($rawResult === null) {
+                return null;
            }

-            return $rawResult;
+            return ['raw' => $rawResult, 'harvested' => $phase1['harvested']];
        } catch (Throwable $e) {
            Log::error('LlmOverlayService: callAnthropic failed', ['error' => $e->getMessage()]);

@@ -202,6 +204,239 @@ final class LlmOverlayService
        }
    }

+    /**
+     * Phase 1: ask the model to search for news and capture the
+     * web_search_tool_result blocks. Returns the harvested citations
+     * and the final response (whose rate-limit headers tell us when
+     * the ITPM bucket will be replenished for Phase 2).
+     *
+     * @return array{harvested: array<int, array{url: string, title: string}>, response: Response}|null
+     */
+    private function runWebSearch(array $context): ?array
+    {
+        $messages = [['role' => 'user', 'content' => $this->searchUserMessage($context)]];
+        $response = null;
+
+        for ($i = 0; $i < self::MAX_SEARCH_TURNS; $i++) {
+            $response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(45)
+                ->withHeaders($this->headers())
+                ->post(self::URL, [
+                    'model' => $this->model(),
+                    'max_tokens' => 1024,
+                    'system' => $this->searchSystem(),
+                    'tools' => [['type' => 'web_search_20250305', 'name' => 'web_search']],
+                    'messages' => $messages,
+                ]));
+
+            if (! $response->successful()) {
+                Log::error('LlmOverlayService: search request failed', [
+                    'status' => $response->status(),
+                    'body' => substr($response->body(), 0, 500),
+                ]);
+
+                return null;
+            }
+
+            $messages[] = ['role' => 'assistant', 'content' => $response->json('content')];
+
+            if ($response->json('stop_reason') !== 'pause_turn') {
+                break;
+            }
+        }
+
+        if ($response === null) {
+            return null;
+        }
+
+        return [
+            'harvested' => $this->harvestSearchResults($messages),
+            'response' => $response,
+        ];
+    }
+
+    /**
+     * Phase 2: fresh API call — no Phase 1 transcript — with the
+     * harvested citations as plain text and a forced submit_overlay
+     * tool call.
+     *
+     * @param  array<int, array{url: string, title: string}>  $harvested
+     * @return array<string, mixed>|null
+     */
+    private function runSubmit(array $context, array $harvested): ?array
+    {
+        $response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20)
+            ->withHeaders($this->headers())
+            ->post(self::URL, [
+                'model' => $this->model(),
+                'max_tokens' => 512,
+                'system' => $this->submitSystem(),
+                'tools' => [$this->submitOverlayTool()],
+                'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'],
+                'messages' => [['role' => 'user', 'content' => $this->submitUserMessage($context, $harvested)]],
+            ]));
+
+        if (! $response->successful()) {
+            Log::error('LlmOverlayService: submit request failed', [
+                'status' => $response->status(),
+                'body' => substr($response->body(), 0, 500),
+            ]);
+
+            return null;
+        }
+
+        $rawResult = $this->extractToolInput($response->json('content') ?? []);
+        if ($rawResult === null) {
+            Log::warning('LlmOverlayService: submit response missing tool_use block');
+
+            return null;
+        }
+
+        return $rawResult;
+    }
+
+    /**
+     * Anthropic's web_search burns ≈55k input tokens (mostly auto-cached
+     * search results) on Phase 1. At Tier 1's 50k ITPM the bucket can
+     * be at zero immediately afterwards. Read the rate-limit headers
+     * and sleep until the bucket has refilled enough for Phase 2.
+     * Capped at 65s so the daily cron never hangs longer than a minute.
+     */
+    private function waitForRateLimitIfNeeded(Response $response): void
+    {
+        $remaining = (int) $response->header('anthropic-ratelimit-input-tokens-remaining');
+        if ($response->header('anthropic-ratelimit-input-tokens-remaining') === ''
+            || $remaining >= self::SUBMIT_TOKEN_BUDGET) {
+            return;
+        }
+
+        $resetAt = $response->header('anthropic-ratelimit-input-tokens-reset');
+        $bucketSize = (int) $response->header('anthropic-ratelimit-input-tokens-limit');
+        if ($resetAt === '' || $bucketSize <= 0) {
+            return;
+        }
+
+        try {
+            $secondsUntilFullReset = max(0, CarbonImmutable::parse($resetAt)->getTimestamp() - now()->getTimestamp());
+        } catch (Throwable) {
+            return;
+        }
+
+        // Anthropic's bucket refills linearly. We don't need to wait for
+        // the full reset — only enough for SUBMIT_TOKEN_BUDGET tokens to
+        // become available. Sleep proportionally + a small safety margin,
+        // hard-capped at 65s.
+        $tokensNeeded = self::SUBMIT_TOKEN_BUDGET - $remaining;
+        $proportional = (int) ceil(($tokensNeeded / $bucketSize) * $secondsUntilFullReset);
+        $waitSeconds = max(1, min(65, $proportional + 2));
+
+        Log::info('LlmOverlayService: waiting for ITPM bucket refill before submit', [
+            'remaining' => $remaining,
+            'wait_seconds' => $waitSeconds,
+            'full_reset_in' => $secondsUntilFullReset,
+        ]);
+
+        sleep($waitSeconds);
+    }
+
+    /**
+     * Walk every assistant turn and extract `{url, title}` from each
+     * `web_search_tool_result` block. Anthropic's web_search returns
+     * these blocks directly — they are the authoritative citation
+     * source, not anything the model transcribes back to us.
+     *
+     * @param  array<int, array<string, mixed>>  $messages
+     * @return array<int, array{url: string, title: string}>
+     */
+    private function harvestSearchResults(array $messages): array
+    {
+        $byUrl = [];
+        foreach ($messages as $message) {
+            if (($message['role'] ?? null) !== 'assistant') {
+                continue;
+            }
+            $content = $message['content'] ?? [];
+            if (! is_array($content)) {
+                continue;
+            }
+            foreach ($content as $block) {
+                if (! is_array($block) || ($block['type'] ?? null) !== 'web_search_tool_result') {
+                    continue;
+                }
+                $results = $block['content'] ?? [];
+                if (! is_array($results)) {
+                    continue;
+                }
+                foreach ($results as $result) {
+                    if (! is_array($result) || ($result['type'] ?? null) !== 'web_search_result') {
+                        continue;
+                    }
+                    $url = (string) ($result['url'] ?? '');
+                    if ($url === '' || isset($byUrl[$url])) {
+                        continue;
+                    }
+                    $byUrl[$url] = ['url' => $url, 'title' => (string) ($result['title'] ?? '')];
+                }
+            }
+        }
+
+        return array_values($byUrl);
+    }
+
+    /**
+     * Merge model-provided events_cited with citations harvested from
+     * `web_search_tool_result`. Model entries (which include `impact`
+     * tagging) take precedence on URL collision; harvested-only entries
+     * default to `impact: 'neutral'`.
+     *
+     * @param  array<int, mixed>  $modelEvents
+     * @param  array<int, array{url: string, title: string}>  $harvested
+     * @return array<int, array<string, mixed>>
+     */
+    private function mergeEvents(array $modelEvents, array $harvested): array
+    {
+        $byUrl = [];
+
+        foreach ($modelEvents as $event) {
+            if (! is_array($event)) {
+                continue;
+            }
+            $url = (string) ($event['url'] ?? '');
+            if ($url === '') {
+                continue;
+            }
+            $byUrl[$url] = [
+                'headline' => (string) ($event['headline'] ?? ''),
+                'source' => (string) ($event['source'] ?? ''),
+                'url' => $url,
+                'impact' => in_array($event['impact'] ?? null, ['rising', 'falling', 'neutral'], true)
+                    ? $event['impact']
+                    : 'neutral',
+            ];
+        }
+
+        foreach ($harvested as $result) {
+            $url = $result['url'];
+            if (isset($byUrl[$url])) {
+                continue;
+            }
+            $byUrl[$url] = [
+                'headline' => $result['title'],
+                'source' => $this->domainOf($url),
+                'url' => $url,
+                'impact' => 'neutral',
+            ];
+        }
+
+        return array_values($byUrl);
+    }
+
+    private function domainOf(string $url): string
+    {
+        $host = parse_url($url, PHP_URL_HOST);
+
+        return is_string($host) ? preg_replace('/^www\./', '', $host) : '';
+    }
+
    private function verificationUserAgent(): string
    {
        $appUrl = rtrim((string) config('app.url'), '/');
@@ -320,37 +555,61 @@ final class LlmOverlayService
        return config('services.anthropic.api_key');
    }

-    private function prompt(array $context): string
+    private function model(): string
    {
-        $json = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
-
-        return <<<PROMPT
-            You are providing a daily news-aware overlay for a UK weekly pump-price forecast.
-
-            The calibrated ridge model has already produced a directional call from price history.
-            Your job is to search recent oil/fuel news and decide whether to AGREE or DISAGREE
-            — and most importantly, surface any major-impact event that the ridge model can't see
-            from price history alone.
+        return config('services.anthropic.model', 'claude-haiku-4-5-20251001');
+    }

+    private function searchSystem(): string
+    {
+        return <<<'PROMPT'
+            You are researching news that may affect this week's UK pump-price forecast.
            Search recent news (last 48 hours) for:
            - OPEC+ production decisions or unexpected announcements
            - Geopolitical events affecting oil supply (sanctions, conflict, shipping disruption)
            - Major refinery outages or pipeline incidents
            - US/EU inventory reports that materially moved Brent
-
-            Context for this week:
-            $json
-
-            After searching, you will be asked to submit_overlay with direction, confidence
-            (capped at $this->confidenceCap), short reasoning, cited events with URLs,
-            agrees_with_ridge, and major_impact_event.
-
-            Citing events with REAL URLs is mandatory. An empty citation array will be
-            rejected and the overlay discarded.
+            Return only the search results — you will be asked to summarise separately.
            PROMPT;
    }

-    private string $confidenceCap = '75';
+    private function searchUserMessage(array $context): string
+    {
+        $json = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
+
+        return "Use web_search to find oil/fuel news from the last 48 hours that could move UK pump prices this week.\n\nContext for this week:\n\n".$json;
+    }
+
+    private function submitSystem(): string
+    {
+        $cap = self::CONFIDENCE_CAP;
+
+        return <<<PROMPT
+            You are providing a news-aware directional overlay for a UK weekly pump-price forecast.
+            Decide whether to AGREE or DISAGREE with the ridge model based on the news headlines
+            provided in the user message. Cap confidence at $cap.
+            Include events_cited (with impact tags) for any specific headline that drove your
+            reasoning; you may leave events_cited empty if the news is unremarkable.
+            PROMPT;
+    }
+
+    /**
+     * @param  array<int, array{url: string, title: string}>  $harvested
+     */
+    private function submitUserMessage(array $context, array $harvested): string
+    {
+        $contextJson = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
+
+        if ($harvested === []) {
+            $headlines = '(none — no relevant news found)';
+        } else {
+            $headlines = collect($harvested)
+                ->map(fn (array $r): string => '- '.$r['title'].' — '.$r['url'])
+                ->implode("\n");
+        }
+
+        return "Context for this week:\n\n".$contextJson."\n\nNews headlines found:\n".$headlines."\n\nNow call submit_overlay with your decision.";
+    }

    /** @return array<string, mixed> */
    private function submitOverlayTool(): array
@@ -366,7 +625,7 @@ final class LlmOverlayService
                    'reasoning_short' => ['type' => 'string', 'description' => '1–2 sentences.'],
                    'events_cited' => [
                        'type' => 'array',
-                        'minItems' => 1,
+                        'description' => 'Optional. Events that drove your reasoning, with directional impact. Citations are otherwise harvested from web_search_tool_result.',
                        'items' => [
                            'type' => 'object',
                            'properties' => [
@@ -381,7 +640,7 @@ final class LlmOverlayService
                    'agrees_with_ridge' => ['type' => 'boolean'],
                    'major_impact_event' => ['type' => 'boolean'],
                ],
-                'required' => ['direction', 'confidence', 'reasoning_short', 'events_cited', 'agrees_with_ridge', 'major_impact_event'],
+                'required' => ['direction', 'confidence', 'reasoning_short', 'agrees_with_ridge', 'major_impact_event'],
            ],
        ];
    }
@@ -396,57 +655,4 @@ final class LlmOverlayService

        return $block['input'] ?? null;
    }
-
-    /** @param  array<string, mixed>|null  $rawResult */
-    private function citationsMissing(?array $rawResult): bool
-    {
-        return $rawResult === null
-            || ! isset($rawResult['events_cited'])
-            || ! is_array($rawResult['events_cited'])
-            || $rawResult['events_cited'] === [];
-    }
-
-    /**
-     * @param  array<int, mixed>  $messages
-     * @param  array<int, mixed>  $failedSubmitContent
-     * @return array<string, mixed>|null
-     */
-    private function retrySubmitWithCitationError(array $messages, array $failedSubmitContent): ?array
-    {
-        $toolUseId = collect($failedSubmitContent)->firstWhere('type', 'tool_use')['id'] ?? null;
-
-        if ($toolUseId === null) {
-            Log::warning('LlmOverlayService: cannot retry — no tool_use id in failed submit');
-
-            return null;
-        }
-
-        Log::info('LlmOverlayService: retrying submit with citation error', ['tool_use_id' => $toolUseId]);
-
-        $messages[] = ['role' => 'assistant', 'content' => $failedSubmitContent];
-        $messages[] = ['role' => 'user', 'content' => [[
-            'type' => 'tool_result',
-            'tool_use_id' => $toolUseId,
-            'content' => 'events_cited was missing or empty. Resubmit submit_overlay with at least one event from your earlier web search results, including its real URL, headline, source, and impact.',
-            'is_error' => true,
-        ]]];
-
-        $retryResponse = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20)
-            ->withHeaders($this->headers())
-            ->post(self::URL, [
-                'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'),
-                'max_tokens' => 512,
-                'tools' => [$this->submitOverlayTool()],
-                'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'],
-                'messages' => $messages,
-            ]));
-
-        if (! $retryResponse->successful()) {
-            Log::error('LlmOverlayService: retry submit failed', ['status' => $retryResponse->status()]);
-
-            return null;
-        }
-
-        return $this->extractToolInput($retryResponse->json('content') ?? []);
-    }
 }