From 07e078904424961fc30c90bbe7c524a0b4ccad77 Mon Sep 17 00:00:00 2001 From: Ovidiu U Date: Thu, 14 May 2026 14:22:42 +0100 Subject: [PATCH] fix(forecasting): persist LLM overlay under Tier-1 ITPM via two-call architecture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The daily forecast:llm-overlay command was being skipped because the previous single-conversation flow consumed more than Tier-1's 50,000 input-tokens-per- minute Anthropic bucket. The web_search tool auto-caches its results (~55k tokens) and requires `encrypted_content` intact when those blocks are resent, so the prior retry-on-missing-citations path either 429'd or 400'd on the second call. LlmOverlayService now runs two independent API calls. Phase 1 invokes the web_search tool and we discard the transcript after harvesting the URLs + titles from the returned web_search_tool_result blocks. Phase 2 is a fresh conversation containing the forecast context and the harvested headlines as plain text, with a forced submit_overlay tool call. events_cited is now optional in the tool schema — Haiku's flaky compliance no longer matters because citations come from the search results, not the model's transcription. Model-tagged events (with directional impact) merge with harvested-only entries (impact: 'neutral'), deduped by URL. Between phases the service reads anthropic-ratelimit-input-tokens-remaining / …-reset from Phase 1's headers and sleeps proportionally — only long enough for the SUBMIT_TOKEN_BUDGET worth of refill, not for the full bucket reset, capped at 65 seconds. ApiLogger now captures usage.input_tokens, usage.output_tokens, cache_read_input_tokens, cache_creation_input_tokens, plus the rate-limit remaining/reset headers on every Anthropic response. New nullable columns on api_logs make rate-limit diagnostics directly queryable. Co-Authored-By: Claude Opus 4.7 (1M context) --- app/Models/ApiLog.php | 17 +- app/Services/ApiLogger.php | 39 ++ .../Forecasting/LlmOverlayService.php | 484 +++++++++++++----- ...0126_add_token_usage_to_api_logs_table.php | 46 ++ tests/Unit/ApiLoggerTest.php | 54 ++ .../Forecasting/LlmOverlayServiceTest.php | 353 ++++++------- 6 files changed, 668 insertions(+), 325 deletions(-) create mode 100644 database/migrations/2026_05_14_130126_add_token_usage_to_api_logs_table.php diff --git a/app/Models/ApiLog.php b/app/Models/ApiLog.php index e9f30f9..e45df84 100644 --- a/app/Models/ApiLog.php +++ b/app/Models/ApiLog.php @@ -7,7 +7,21 @@ use Illuminate\Database\Eloquent\Attributes\Fillable; use Illuminate\Database\Eloquent\Factories\HasFactory; use Illuminate\Database\Eloquent\Model; -#[Fillable(['service', 'method', 'url', 'status_code', 'duration_ms', 'error', 'response_body'])] +#[Fillable([ + 'service', + 'method', + 'url', + 'status_code', + 'duration_ms', + 'error', + 'response_body', + 'input_tokens', + 'output_tokens', + 'cache_read_tokens', + 'cache_write_tokens', + 'ratelimit_remaining', + 'ratelimit_reset_at', +])] class ApiLog extends Model { /** @use HasFactory */ @@ -19,6 +33,7 @@ class ApiLog extends Model { return [ 'created_at' => 'datetime', + 'ratelimit_reset_at' => 'datetime', ]; } } diff --git a/app/Services/ApiLogger.php b/app/Services/ApiLogger.php index 6fe5bbd..7ec028d 100644 --- a/app/Services/ApiLogger.php +++ b/app/Services/ApiLogger.php @@ -34,10 +34,12 @@ class ApiLogger $statusCode = null; $error = null; $responseBody = null; + $usage = []; try { $response = $request(); $statusCode = $response->status(); + $usage = $this->extractUsage($response); if ($response->failed()) { $body = $response->body(); @@ -53,6 +55,7 @@ class ApiLogger // doesn't. Pull the body when it's available. if ($e instanceof RequestException) { $responseBody = $this->truncate($e->response->body()); + $usage = $this->extractUsage($e->response); } throw $e; @@ -65,6 +68,7 @@ class ApiLogger 'duration_ms' => (int) round((microtime(true) - $start) * 1000), 'error' => $error, 'response_body' => $responseBody, + ...$usage, ]); } } @@ -75,4 +79,39 @@ class ApiLogger ? substr($body, 0, self::RESPONSE_BODY_CAP) : $body; } + + /** + * Pull token-usage and rate-limit telemetry from a provider response. + * + * Today only Anthropic exposes both. Other providers return mostly + * NULLs — callers don't need to know which is which. + * + * @return array + */ + private function extractUsage(?Response $response): array + { + if ($response === null) { + return []; + } + + $usage = $response->json('usage'); + $tokens = is_array($usage) ? $usage : []; + + $reset = $response->header('anthropic-ratelimit-input-tokens-reset'); + $remaining = $response->header('anthropic-ratelimit-input-tokens-remaining'); + + return [ + 'input_tokens' => $this->intOrNull($tokens['input_tokens'] ?? null), + 'output_tokens' => $this->intOrNull($tokens['output_tokens'] ?? null), + 'cache_read_tokens' => $this->intOrNull($tokens['cache_read_input_tokens'] ?? null), + 'cache_write_tokens' => $this->intOrNull($tokens['cache_creation_input_tokens'] ?? null), + 'ratelimit_remaining' => $this->intOrNull($remaining !== '' ? $remaining : null), + 'ratelimit_reset_at' => $reset !== '' ? $reset : null, + ]; + } + + private function intOrNull(mixed $value): ?int + { + return is_numeric($value) ? (int) $value : null; + } } diff --git a/app/Services/Forecasting/LlmOverlayService.php b/app/Services/Forecasting/LlmOverlayService.php index 99a21a7..b796b50 100644 --- a/app/Services/Forecasting/LlmOverlayService.php +++ b/app/Services/Forecasting/LlmOverlayService.php @@ -6,7 +6,9 @@ use App\Models\BrentPrice; use App\Models\LlmOverlay; use App\Models\VolatilityRegime; use App\Services\ApiLogger; +use Carbon\CarbonImmutable; use Carbon\CarbonInterface; +use Illuminate\Http\Client\Response; use Illuminate\Support\Facades\DB; use Illuminate\Support\Facades\Http; use Illuminate\Support\Facades\Log; @@ -15,9 +17,21 @@ use Throwable; /** * Layer 4 — daily news-aware overlay on the calibrated ridge forecast. * - * Calls Anthropic Haiku with the web_search tool, then forces a - * submit_overlay tool call to get structured output. Cites events with - * URLs; URLs are verified before storing. Empty citations → rejection. + * Runs as two independent Anthropic API calls: + * Phase 1 — web_search tool only; we capture the URLs/titles from + * the returned web_search_tool_result blocks. + * Phase 2 — fresh conversation containing those URLs+titles as plain + * text plus a forced submit_overlay tool call. + * + * Phase 1's transcript is never sent back to Phase 2. Anthropic's + * web_search auto-caches the encrypted page text (~55k tokens per + * search) and requires it intact when web_search_tool_result blocks + * are resent. Threading it through to Phase 2 either blows the Tier-1 + * 50k ITPM bucket or 400s if we try to strip it. Two clean calls keep + * Phase 2 around 3k input tokens. + * + * Citations are harvested directly from Phase 1's web_search_tool_result + * blocks — Haiku is unreliable about populating `events_cited` itself. * * Read-only with respect to the volatility flag — Layer 4 writes its * `llm_overlays` row; Layer 5's hourly cron picks it up and decides @@ -31,6 +45,15 @@ final class LlmOverlayService private const int COOLDOWN_HOURS = 4; + private const int MAX_SEARCH_TURNS = 2; + + /** + * Approximate input-token cost of Phase 2 (system + tool schema + + * forecast context + harvested URL list). If Phase 1 leaves + * remaining ITPM below this, wait for the bucket to refill. + */ + private const int SUBMIT_TOKEN_BUDGET = 4_000; + public function __construct( private readonly ApiLogger $apiLogger, private readonly WeeklyForecastService $weeklyForecast, @@ -55,19 +78,24 @@ final class LlmOverlayService $forecast = $this->weeklyForecast->currentForecast(); $context = $this->buildContext($forecast); - $rawResult = $this->callAnthropic($context); - if ($rawResult === null) { + $callResult = $this->callAnthropic($context); + if ($callResult === null) { return null; } - $verifiedEvents = $this->verifyCitedUrls($rawResult['events_cited'] ?? []); + $rawResult = $callResult['raw']; + $harvested = $callResult['harvested']; + + $mergedEvents = $this->mergeEvents($rawResult['events_cited'] ?? [], $harvested); + $verifiedEvents = $this->verifyCitedUrls($mergedEvents); + if ($verifiedEvents === []) { Log::warning('LlmOverlayService: no verified citations, rejecting overlay', [ - 'events_cited_count' => count($rawResult['events_cited'] ?? []), + 'model_events' => $rawResult['events_cited'] ?? null, + 'harvested_urls' => array_column($harvested, 'url'), 'direction' => $rawResult['direction'] ?? null, 'confidence' => $rawResult['confidence'] ?? null, 'reasoning_short' => $rawResult['reasoning_short'] ?? null, - 'raw_result' => $rawResult, ]); return null; @@ -131,70 +159,44 @@ final class LlmOverlayService ]; } - /** @return array|null */ + /** + * Two independent API calls: + * + * Phase 1 — runs the web_search tool, captures the assistant's + * returned `web_search_tool_result` blocks, then + * discards the transcript. + * + * Phase 2 — issues a brand-new conversation with the harvested + * URLs/titles flattened into a plain-text user message + * and forces a `submit_overlay` tool call. + * + * Why not one stitched conversation: Anthropic auto-caches web_search + * results into ITPM (≈55k tokens for a 1-search call) and requires + * `encrypted_content` intact when those blocks are sent back. + * Resending the Phase 1 transcript to Phase 2 either rate-limits us + * (29k+ tokens twice → exceeds the Tier-1 50k ITPM bucket) or 400s + * if we strip the encrypted blob. A fresh Phase 2 sends ~3k tokens + * total — small enough to fit in the recovered bucket after a + * short adaptive sleep. + * + * @return array{raw: array, harvested: array}|null + */ private function callAnthropic(array $context): ?array { - $messages = [['role' => 'user', 'content' => $this->prompt($context)]]; - try { - // Phase 1: web search loop. Append the assistant turn after every - // successful response, then decide whether to keep looping — - // this guarantees the messages array stays well-formed regardless - // of whether we exit via `break` or by exhausting iterations. - for ($i = 0, $response = null; $i < 5; $i++) { - $response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(45) - ->withHeaders($this->headers()) - ->post(self::URL, [ - 'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'), - 'max_tokens' => 1024, - 'tools' => [['type' => 'web_search_20250305', 'name' => 'web_search']], - 'messages' => $messages, - ])); - - if (! $response->successful()) { - Log::error('LlmOverlayService: search request failed', ['status' => $response->status()]); - - return null; - } - - $messages[] = ['role' => 'assistant', 'content' => $response->json('content')]; - - if ($response->json('stop_reason') !== 'pause_turn') { - break; - } - } - - $messages[] = ['role' => 'user', 'content' => 'Now submit your overlay using the submit_overlay tool. Cite at least one event with a URL.']; - - // Phase 2: forced structured output - $submitResponse = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20) - ->withHeaders($this->headers()) - ->post(self::URL, [ - 'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'), - 'max_tokens' => 512, - 'tools' => [$this->submitOverlayTool()], - 'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'], - 'messages' => $messages, - ])); - - if (! $submitResponse->successful()) { - Log::error('LlmOverlayService: submit request failed', ['status' => $submitResponse->status()]); - + $phase1 = $this->runWebSearch($context); + if ($phase1 === null) { return null; } - $submitContent = $submitResponse->json('content') ?? []; - $rawResult = $this->extractToolInput($submitContent); + $this->waitForRateLimitIfNeeded($phase1['response']); - // Haiku sometimes calls submit_overlay without `events_cited` even - // though the schema marks it required. Confirmed in laravel.log on - // 2026-05-12: tool_use input had only direction/confidence/reasoning. - // Retry once with an explicit tool_result error. - if ($this->citationsMissing($rawResult)) { - $rawResult = $this->retrySubmitWithCitationError($messages, $submitContent) ?? $rawResult; + $rawResult = $this->runSubmit($context, $phase1['harvested']); + if ($rawResult === null) { + return null; } - return $rawResult; + return ['raw' => $rawResult, 'harvested' => $phase1['harvested']]; } catch (Throwable $e) { Log::error('LlmOverlayService: callAnthropic failed', ['error' => $e->getMessage()]); @@ -202,6 +204,239 @@ final class LlmOverlayService } } + /** + * Phase 1: ask the model to search for news and capture the + * web_search_tool_result blocks. Returns the harvested citations + * and the final response (whose rate-limit headers tell us when + * the ITPM bucket will be replenished for Phase 2). + * + * @return array{harvested: array, response: Response}|null + */ + private function runWebSearch(array $context): ?array + { + $messages = [['role' => 'user', 'content' => $this->searchUserMessage($context)]]; + $response = null; + + for ($i = 0; $i < self::MAX_SEARCH_TURNS; $i++) { + $response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(45) + ->withHeaders($this->headers()) + ->post(self::URL, [ + 'model' => $this->model(), + 'max_tokens' => 1024, + 'system' => $this->searchSystem(), + 'tools' => [['type' => 'web_search_20250305', 'name' => 'web_search']], + 'messages' => $messages, + ])); + + if (! $response->successful()) { + Log::error('LlmOverlayService: search request failed', [ + 'status' => $response->status(), + 'body' => substr($response->body(), 0, 500), + ]); + + return null; + } + + $messages[] = ['role' => 'assistant', 'content' => $response->json('content')]; + + if ($response->json('stop_reason') !== 'pause_turn') { + break; + } + } + + if ($response === null) { + return null; + } + + return [ + 'harvested' => $this->harvestSearchResults($messages), + 'response' => $response, + ]; + } + + /** + * Phase 2: fresh API call — no Phase 1 transcript — with the + * harvested citations as plain text and a forced submit_overlay + * tool call. + * + * @param array $harvested + * @return array|null + */ + private function runSubmit(array $context, array $harvested): ?array + { + $response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20) + ->withHeaders($this->headers()) + ->post(self::URL, [ + 'model' => $this->model(), + 'max_tokens' => 512, + 'system' => $this->submitSystem(), + 'tools' => [$this->submitOverlayTool()], + 'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'], + 'messages' => [['role' => 'user', 'content' => $this->submitUserMessage($context, $harvested)]], + ])); + + if (! $response->successful()) { + Log::error('LlmOverlayService: submit request failed', [ + 'status' => $response->status(), + 'body' => substr($response->body(), 0, 500), + ]); + + return null; + } + + $rawResult = $this->extractToolInput($response->json('content') ?? []); + if ($rawResult === null) { + Log::warning('LlmOverlayService: submit response missing tool_use block'); + + return null; + } + + return $rawResult; + } + + /** + * Anthropic's web_search burns ≈55k input tokens (mostly auto-cached + * search results) on Phase 1. At Tier 1's 50k ITPM the bucket can + * be at zero immediately afterwards. Read the rate-limit headers + * and sleep until the bucket has refilled enough for Phase 2. + * Capped at 65s so the daily cron never hangs longer than a minute. + */ + private function waitForRateLimitIfNeeded(Response $response): void + { + $remaining = (int) $response->header('anthropic-ratelimit-input-tokens-remaining'); + if ($response->header('anthropic-ratelimit-input-tokens-remaining') === '' + || $remaining >= self::SUBMIT_TOKEN_BUDGET) { + return; + } + + $resetAt = $response->header('anthropic-ratelimit-input-tokens-reset'); + $bucketSize = (int) $response->header('anthropic-ratelimit-input-tokens-limit'); + if ($resetAt === '' || $bucketSize <= 0) { + return; + } + + try { + $secondsUntilFullReset = max(0, CarbonImmutable::parse($resetAt)->getTimestamp() - now()->getTimestamp()); + } catch (Throwable) { + return; + } + + // Anthropic's bucket refills linearly. We don't need to wait for + // the full reset — only enough for SUBMIT_TOKEN_BUDGET tokens to + // become available. Sleep proportionally + a small safety margin, + // hard-capped at 65s. + $tokensNeeded = self::SUBMIT_TOKEN_BUDGET - $remaining; + $proportional = (int) ceil(($tokensNeeded / $bucketSize) * $secondsUntilFullReset); + $waitSeconds = max(1, min(65, $proportional + 2)); + + Log::info('LlmOverlayService: waiting for ITPM bucket refill before submit', [ + 'remaining' => $remaining, + 'wait_seconds' => $waitSeconds, + 'full_reset_in' => $secondsUntilFullReset, + ]); + + sleep($waitSeconds); + } + + /** + * Walk every assistant turn and extract `{url, title}` from each + * `web_search_tool_result` block. Anthropic's web_search returns + * these blocks directly — they are the authoritative citation + * source, not anything the model transcribes back to us. + * + * @param array> $messages + * @return array + */ + private function harvestSearchResults(array $messages): array + { + $byUrl = []; + foreach ($messages as $message) { + if (($message['role'] ?? null) !== 'assistant') { + continue; + } + $content = $message['content'] ?? []; + if (! is_array($content)) { + continue; + } + foreach ($content as $block) { + if (! is_array($block) || ($block['type'] ?? null) !== 'web_search_tool_result') { + continue; + } + $results = $block['content'] ?? []; + if (! is_array($results)) { + continue; + } + foreach ($results as $result) { + if (! is_array($result) || ($result['type'] ?? null) !== 'web_search_result') { + continue; + } + $url = (string) ($result['url'] ?? ''); + if ($url === '' || isset($byUrl[$url])) { + continue; + } + $byUrl[$url] = ['url' => $url, 'title' => (string) ($result['title'] ?? '')]; + } + } + } + + return array_values($byUrl); + } + + /** + * Merge model-provided events_cited with citations harvested from + * `web_search_tool_result`. Model entries (which include `impact` + * tagging) take precedence on URL collision; harvested-only entries + * default to `impact: 'neutral'`. + * + * @param array $modelEvents + * @param array $harvested + * @return array> + */ + private function mergeEvents(array $modelEvents, array $harvested): array + { + $byUrl = []; + + foreach ($modelEvents as $event) { + if (! is_array($event)) { + continue; + } + $url = (string) ($event['url'] ?? ''); + if ($url === '') { + continue; + } + $byUrl[$url] = [ + 'headline' => (string) ($event['headline'] ?? ''), + 'source' => (string) ($event['source'] ?? ''), + 'url' => $url, + 'impact' => in_array($event['impact'] ?? null, ['rising', 'falling', 'neutral'], true) + ? $event['impact'] + : 'neutral', + ]; + } + + foreach ($harvested as $result) { + $url = $result['url']; + if (isset($byUrl[$url])) { + continue; + } + $byUrl[$url] = [ + 'headline' => $result['title'], + 'source' => $this->domainOf($url), + 'url' => $url, + 'impact' => 'neutral', + ]; + } + + return array_values($byUrl); + } + + private function domainOf(string $url): string + { + $host = parse_url($url, PHP_URL_HOST); + + return is_string($host) ? preg_replace('/^www\./', '', $host) : ''; + } + private function verificationUserAgent(): string { $appUrl = rtrim((string) config('app.url'), '/'); @@ -320,37 +555,61 @@ final class LlmOverlayService return config('services.anthropic.api_key'); } - private function prompt(array $context): string + private function model(): string { - $json = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES); - - return <<confidenceCap), short reasoning, cited events with URLs, - agrees_with_ridge, and major_impact_event. - - Citing events with REAL URLs is mandatory. An empty citation array will be - rejected and the overlay discarded. + Return only the search results — you will be asked to summarise separately. PROMPT; } - private string $confidenceCap = '75'; + private function searchUserMessage(array $context): string + { + $json = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES); + + return "Use web_search to find oil/fuel news from the last 48 hours that could move UK pump prices this week.\n\nContext for this week:\n\n".$json; + } + + private function submitSystem(): string + { + $cap = self::CONFIDENCE_CAP; + + return << $harvested + */ + private function submitUserMessage(array $context, array $harvested): string + { + $contextJson = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES); + + if ($harvested === []) { + $headlines = '(none — no relevant news found)'; + } else { + $headlines = collect($harvested) + ->map(fn (array $r): string => '- '.$r['title'].' — '.$r['url']) + ->implode("\n"); + } + + return "Context for this week:\n\n".$contextJson."\n\nNews headlines found:\n".$headlines."\n\nNow call submit_overlay with your decision."; + } /** @return array */ private function submitOverlayTool(): array @@ -366,7 +625,7 @@ final class LlmOverlayService 'reasoning_short' => ['type' => 'string', 'description' => '1–2 sentences.'], 'events_cited' => [ 'type' => 'array', - 'minItems' => 1, + 'description' => 'Optional. Events that drove your reasoning, with directional impact. Citations are otherwise harvested from web_search_tool_result.', 'items' => [ 'type' => 'object', 'properties' => [ @@ -381,7 +640,7 @@ final class LlmOverlayService 'agrees_with_ridge' => ['type' => 'boolean'], 'major_impact_event' => ['type' => 'boolean'], ], - 'required' => ['direction', 'confidence', 'reasoning_short', 'events_cited', 'agrees_with_ridge', 'major_impact_event'], + 'required' => ['direction', 'confidence', 'reasoning_short', 'agrees_with_ridge', 'major_impact_event'], ], ]; } @@ -396,57 +655,4 @@ final class LlmOverlayService return $block['input'] ?? null; } - - /** @param array|null $rawResult */ - private function citationsMissing(?array $rawResult): bool - { - return $rawResult === null - || ! isset($rawResult['events_cited']) - || ! is_array($rawResult['events_cited']) - || $rawResult['events_cited'] === []; - } - - /** - * @param array $messages - * @param array $failedSubmitContent - * @return array|null - */ - private function retrySubmitWithCitationError(array $messages, array $failedSubmitContent): ?array - { - $toolUseId = collect($failedSubmitContent)->firstWhere('type', 'tool_use')['id'] ?? null; - - if ($toolUseId === null) { - Log::warning('LlmOverlayService: cannot retry — no tool_use id in failed submit'); - - return null; - } - - Log::info('LlmOverlayService: retrying submit with citation error', ['tool_use_id' => $toolUseId]); - - $messages[] = ['role' => 'assistant', 'content' => $failedSubmitContent]; - $messages[] = ['role' => 'user', 'content' => [[ - 'type' => 'tool_result', - 'tool_use_id' => $toolUseId, - 'content' => 'events_cited was missing or empty. Resubmit submit_overlay with at least one event from your earlier web search results, including its real URL, headline, source, and impact.', - 'is_error' => true, - ]]]; - - $retryResponse = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20) - ->withHeaders($this->headers()) - ->post(self::URL, [ - 'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'), - 'max_tokens' => 512, - 'tools' => [$this->submitOverlayTool()], - 'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'], - 'messages' => $messages, - ])); - - if (! $retryResponse->successful()) { - Log::error('LlmOverlayService: retry submit failed', ['status' => $retryResponse->status()]); - - return null; - } - - return $this->extractToolInput($retryResponse->json('content') ?? []); - } } diff --git a/database/migrations/2026_05_14_130126_add_token_usage_to_api_logs_table.php b/database/migrations/2026_05_14_130126_add_token_usage_to_api_logs_table.php new file mode 100644 index 0000000..ab9c3c4 --- /dev/null +++ b/database/migrations/2026_05_14_130126_add_token_usage_to_api_logs_table.php @@ -0,0 +1,46 @@ +unsignedInteger('input_tokens')->nullable()->after('response_body') + ->comment('Input tokens billed (Anthropic usage.input_tokens). NULL for providers that do not report usage.'); + $table->unsignedInteger('output_tokens')->nullable()->after('input_tokens') + ->comment('Output tokens billed (Anthropic usage.output_tokens).'); + $table->unsignedInteger('cache_read_tokens')->nullable()->after('output_tokens') + ->comment('Cache-hit tokens (Anthropic usage.cache_read_input_tokens). Do not count toward ITPM on most models.'); + $table->unsignedInteger('cache_write_tokens')->nullable()->after('cache_read_tokens') + ->comment('Cache-write tokens (Anthropic usage.cache_creation_input_tokens). Count toward ITPM.'); + $table->unsignedInteger('ratelimit_remaining')->nullable()->after('cache_write_tokens') + ->comment('Provider-reported input-tokens remaining in the rolling window (anthropic-ratelimit-input-tokens-remaining).'); + $table->dateTime('ratelimit_reset_at')->nullable()->after('ratelimit_remaining') + ->comment('When the input-tokens bucket will be fully replenished (anthropic-ratelimit-input-tokens-reset, RFC 3339).'); + }); + } + + public function down(): void + { + Schema::table('api_logs', function (Blueprint $table) { + $table->dropColumn([ + 'input_tokens', + 'output_tokens', + 'cache_read_tokens', + 'cache_write_tokens', + 'ratelimit_remaining', + 'ratelimit_reset_at', + ]); + }); + } +}; diff --git a/tests/Unit/ApiLoggerTest.php b/tests/Unit/ApiLoggerTest.php index 561d834..dafbc45 100644 --- a/tests/Unit/ApiLoggerTest.php +++ b/tests/Unit/ApiLoggerTest.php @@ -119,3 +119,57 @@ it('captures response_body when an HTTP RequestException is thrown', function () expect(ApiLog::first()->response_body)->toBe('upstream details'); }); + +it('captures Anthropic usage tokens from a successful response', function (): void { + Http::fake(['https://api.anthropic.com/v1/messages' => Http::response([ + 'content' => [], + 'usage' => [ + 'input_tokens' => 1234, + 'output_tokens' => 56, + 'cache_creation_input_tokens' => 8000, + 'cache_read_input_tokens' => 12000, + ], + ])]); + + $this->apiLogger->send('anthropic', 'POST', 'https://api.anthropic.com/v1/messages', + fn () => Http::post('https://api.anthropic.com/v1/messages')); + + $log = ApiLog::first(); + expect($log->input_tokens)->toBe(1234) + ->and($log->output_tokens)->toBe(56) + ->and($log->cache_write_tokens)->toBe(8000) + ->and($log->cache_read_tokens)->toBe(12000); +}); + +it('captures rate-limit headers from any provider response', function (): void { + Http::fake(['https://api.anthropic.com/v1/messages' => Http::response( + ['content' => [], 'usage' => ['input_tokens' => 100, 'output_tokens' => 10]], + 200, + [ + 'anthropic-ratelimit-input-tokens-remaining' => '38000', + 'anthropic-ratelimit-input-tokens-reset' => '2026-05-14T12:41:00Z', + ], + )]); + + $this->apiLogger->send('anthropic', 'POST', 'https://api.anthropic.com/v1/messages', + fn () => Http::post('https://api.anthropic.com/v1/messages')); + + $log = ApiLog::first(); + expect($log->ratelimit_remaining)->toBe(38000) + ->and($log->ratelimit_reset_at?->toIso8601String())->toBe('2026-05-14T12:41:00+00:00'); +}); + +it('leaves token columns null for services without usage data', function (): void { + Http::fake(['https://example.com/x' => Http::response(['ok' => true])]); + + $this->apiLogger->send('test_service', 'GET', 'https://example.com/x', + fn () => Http::get('https://example.com/x')); + + $log = ApiLog::first(); + expect($log->input_tokens)->toBeNull() + ->and($log->output_tokens)->toBeNull() + ->and($log->cache_read_tokens)->toBeNull() + ->and($log->cache_write_tokens)->toBeNull() + ->and($log->ratelimit_remaining)->toBeNull() + ->and($log->ratelimit_reset_at)->toBeNull(); +}); diff --git a/tests/Unit/Services/Forecasting/LlmOverlayServiceTest.php b/tests/Unit/Services/Forecasting/LlmOverlayServiceTest.php index 127f30d..b914374 100644 --- a/tests/Unit/Services/Forecasting/LlmOverlayServiceTest.php +++ b/tests/Unit/Services/Forecasting/LlmOverlayServiceTest.php @@ -18,32 +18,63 @@ beforeEach(function (): void { Config::set('services.anthropic.api_key', 'test-key'); }); -function fakeAnthropicWithOverlay(string $direction, int $confidence, array $events, bool $major = false): void +/** + * Anthropic-shaped Phase 1 assistant turn that includes a real + * web_search_tool_result block (the source of truth for harvested + * citations). + * + * @param array $results + * @return array + */ +function fakeSearchResultsTurn(array $results): array { - Http::fake([ - '*api.anthropic.com/*' => Http::sequence() - ->push([ - 'stop_reason' => 'end_turn', - 'content' => [['type' => 'text', 'text' => 'Search summary.']], - ]) - ->push([ - 'stop_reason' => 'tool_use', - 'content' => [[ - 'type' => 'tool_use', - 'name' => 'submit_overlay', - 'input' => [ - 'direction' => $direction, - 'confidence' => $confidence, - 'reasoning_short' => 'Test reasoning.', - 'events_cited' => $events, - 'agrees_with_ridge' => true, - 'major_impact_event' => $major, - ], - ]], - ]), - // URL HEAD verification probes — accept everything by default - '*' => Http::response('', 200), - ]); + $content = [['type' => 'text', 'text' => 'Searching...']]; + foreach ($results as $idx => $r) { + $content[] = [ + 'type' => 'server_tool_use', + 'id' => 'srvtoolu_'.$idx, + 'name' => 'web_search', + 'input' => ['query' => 'oil news'], + ]; + $content[] = [ + 'type' => 'web_search_tool_result', + 'tool_use_id' => 'srvtoolu_'.$idx, + 'content' => [[ + 'type' => 'web_search_result', + 'url' => $r['url'], + 'title' => $r['title'], + 'encrypted_content' => str_repeat('LONG_PAGE_TEXT_', 200), + 'page_age' => '1 day ago', + ]], + ]; + } + + return ['stop_reason' => 'end_turn', 'content' => $content]; +} + +/** @param array> $events */ +function fakeSubmitTurn(string $direction, int $confidence, array $events, bool $major = false): array +{ + $input = [ + 'direction' => $direction, + 'confidence' => $confidence, + 'reasoning_short' => 'Test reasoning.', + 'agrees_with_ridge' => true, + 'major_impact_event' => $major, + ]; + if ($events !== []) { + $input['events_cited'] = $events; + } + + return [ + 'stop_reason' => 'tool_use', + 'content' => [[ + 'type' => 'tool_use', + 'id' => 'toolu_submit', + 'name' => 'submit_overlay', + 'input' => $input, + ]], + ]; } it('skips when ANTHROPIC_API_KEY is not set', function (): void { @@ -54,8 +85,13 @@ it('skips when ANTHROPIC_API_KEY is not set', function (): void { expect($service->run())->toBeNull(); }); -it('rejects the overlay when no events are cited', function (): void { - fakeAnthropicWithOverlay('rising', 60, []); +it('rejects only when neither web search nor model cited anything', function (): void { + Http::fake([ + '*api.anthropic.com/*' => Http::sequence() + ->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'no results']]]) + ->push(fakeSubmitTurn('rising', 60, [])), + '*' => Http::response('', 200), + ]); $service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class)); @@ -66,30 +102,13 @@ it('rejects the overlay when no events are cited', function (): void { it('verifies a URL via GET fallback when HEAD returns 405', function (): void { Http::fake([ '*api.anthropic.com/*' => Http::sequence() - ->push([ - 'stop_reason' => 'end_turn', - 'content' => [['type' => 'text', 'text' => 'ok']], - ]) - ->push([ - 'stop_reason' => 'tool_use', - 'content' => [[ - 'type' => 'tool_use', - 'name' => 'submit_overlay', - 'input' => [ - 'direction' => 'rising', - 'confidence' => 60, - 'reasoning_short' => 'Hostile-to-HEAD source.', - 'events_cited' => [ - ['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/x', 'impact' => 'rising'], - ], - 'agrees_with_ridge' => true, - 'major_impact_event' => false, - ], - ]], - ]), + ->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]]) + ->push(fakeSubmitTurn('rising', 60, [ + ['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/x', 'impact' => 'rising'], + ])), 'reuters.com/*' => Http::sequence() - ->push('', 405) // HEAD → 405 Method Not Allowed - ->push('partial-body', 200), // GET fallback succeeds + ->push('', 405) + ->push('partial-body', 200), ]); $service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class)); @@ -99,65 +118,13 @@ it('verifies a URL via GET fallback when HEAD returns 405', function (): void { ->and($row->events_json)->toHaveCount(1); }); -it('rejects the overlay when both HEAD and GET fail', function (): void { - Http::fake([ - '*api.anthropic.com/*' => Http::sequence() - ->push([ - 'stop_reason' => 'end_turn', - 'content' => [['type' => 'text', 'text' => 'ok']], - ]) - ->push([ - 'stop_reason' => 'tool_use', - 'content' => [[ - 'type' => 'tool_use', - 'name' => 'submit_overlay', - 'input' => [ - 'direction' => 'rising', - 'confidence' => 60, - 'reasoning_short' => 'Truly dead URL.', - 'events_cited' => [ - ['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'], - ], - 'agrees_with_ridge' => true, - 'major_impact_event' => false, - ], - ]], - ]), - 'example.com/*' => Http::sequence() - ->push('', 404) // HEAD → 404 - ->push('', 404), // GET → still 404 - ]); - - $service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class)); - - expect($service->run())->toBeNull() - ->and(LlmOverlay::query()->count())->toBe(0); -}); - it('rejects the overlay when every cited URL is unreachable', function (): void { Http::fake([ '*api.anthropic.com/*' => Http::sequence() - ->push([ - 'stop_reason' => 'end_turn', - 'content' => [['type' => 'text', 'text' => 'ok']], - ]) - ->push([ - 'stop_reason' => 'tool_use', - 'content' => [[ - 'type' => 'tool_use', - 'name' => 'submit_overlay', - 'input' => [ - 'direction' => 'rising', - 'confidence' => 60, - 'reasoning_short' => 'Test.', - 'events_cited' => [ - ['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'], - ], - 'agrees_with_ridge' => true, - 'major_impact_event' => false, - ], - ]], - ]), + ->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]]) + ->push(fakeSubmitTurn('rising', 60, [ + ['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'], + ])), 'example.com/*' => Http::response('', 404), ]); @@ -168,14 +135,14 @@ it('rejects the overlay when every cited URL is unreachable', function (): void }); it('persists an overlay row with verified citations and capped confidence', function (): void { - fakeAnthropicWithOverlay( - direction: 'rising', - confidence: 95, // above cap → expect capped to 75 - events: [ - ['headline' => 'OPEC cuts output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'], - ], - major: true, - ); + Http::fake([ + '*api.anthropic.com/*' => Http::sequence() + ->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]]) + ->push(fakeSubmitTurn('rising', 95, [ + ['headline' => 'OPEC cuts output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'], + ], major: true)), + '*' => Http::response('', 200), + ]); $service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class)); @@ -183,51 +150,20 @@ it('persists an overlay row with verified citations and capped confidence', func expect($row)->not->toBeNull() ->and($row->direction)->toBe('rising') - ->and($row->confidence)->toBe(75) // capped + ->and($row->confidence)->toBe(75) ->and($row->major_impact_event)->toBeTrue() ->and($row->search_used)->toBeTrue() ->and($row->events_json)->toHaveCount(1); }); -it('retries the submit when the model omits events_cited', function (): void { +it('harvests citations from web_search_tool_result when the model omits events_cited', function (): void { Http::fake([ '*api.anthropic.com/*' => Http::sequence() - ->push([ - 'stop_reason' => 'end_turn', - 'content' => [['type' => 'text', 'text' => 'Search done.']], - ]) - ->push([ - 'stop_reason' => 'tool_use', - 'content' => [[ - 'type' => 'tool_use', - 'id' => 'toolu_first', - 'name' => 'submit_overlay', - 'input' => [ - 'direction' => 'rising', - 'confidence' => 70, - 'reasoning_short' => 'Forgot citations.', - // events_cited omitted entirely — the bug we are guarding against - ], - ]], - ]) - ->push([ - 'stop_reason' => 'tool_use', - 'content' => [[ - 'type' => 'tool_use', - 'id' => 'toolu_retry', - 'name' => 'submit_overlay', - 'input' => [ - 'direction' => 'rising', - 'confidence' => 70, - 'reasoning_short' => 'With citations now.', - 'events_cited' => [ - ['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'], - ], - 'agrees_with_ridge' => true, - 'major_impact_event' => false, - ], - ]], - ]), + ->push(fakeSearchResultsTurn([ + ['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'], + ['url' => 'https://bloomberg.com/iran', 'title' => 'Iran tensions'], + ])) + ->push(fakeSubmitTurn('rising', 70, [])), '*' => Http::response('', 200), ]); @@ -236,42 +172,79 @@ it('retries the submit when the model omits events_cited', function (): void { $row = $service->run(); expect($row)->not->toBeNull() - ->and($row->events_json)->toHaveCount(1) - ->and(LlmOverlay::query()->count())->toBe(1); + ->and($row->events_json)->toHaveCount(2) + ->and(collect($row->events_json)->pluck('url')->all()) + ->toEqualCanonicalizing(['https://reuters.com/opec', 'https://bloomberg.com/iran']) + ->and(collect($row->events_json)->pluck('impact')->unique()->all()) + ->toBe(['neutral']); }); -it('rejects when the retry also omits events_cited', function (): void { +it('merges model events_cited with harvested URLs deduped by URL', function (): void { Http::fake([ '*api.anthropic.com/*' => Http::sequence() - ->push([ - 'stop_reason' => 'end_turn', - 'content' => [['type' => 'text', 'text' => 'Search done.']], - ]) - ->push([ - 'stop_reason' => 'tool_use', - 'content' => [[ - 'type' => 'tool_use', - 'id' => 'toolu_first', - 'name' => 'submit_overlay', - 'input' => ['direction' => 'rising', 'confidence' => 70, 'reasoning_short' => 'No cites.'], - ]], - ]) - ->push([ - 'stop_reason' => 'tool_use', - 'content' => [[ - 'type' => 'tool_use', - 'id' => 'toolu_retry', - 'name' => 'submit_overlay', - 'input' => ['direction' => 'rising', 'confidence' => 70, 'reasoning_short' => 'Still none.'], - ]], - ]), + ->push(fakeSearchResultsTurn([ + ['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'], + ['url' => 'https://bloomberg.com/iran', 'title' => 'Iran tensions'], + ])) + ->push(fakeSubmitTurn('rising', 70, [ + ['headline' => 'OPEC slashes output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'], + ['headline' => 'Refinery fire', 'source' => 'CNBC', 'url' => 'https://cnbc.com/refinery', 'impact' => 'rising'], + ])), '*' => Http::response('', 200), ]); $service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class)); - expect($service->run())->toBeNull() - ->and(LlmOverlay::query()->count())->toBe(0); + $row = $service->run(); + + expect($row)->not->toBeNull() + ->and(collect($row->events_json)->pluck('url')->all()) + ->toEqualCanonicalizing([ + 'https://reuters.com/opec', + 'https://bloomberg.com/iran', + 'https://cnbc.com/refinery', + ]); + + $opec = collect($row->events_json)->firstWhere('url', 'https://reuters.com/opec'); + expect($opec['impact'])->toBe('rising') + ->and($opec['headline'])->toBe('OPEC slashes output'); + + $bloomberg = collect($row->events_json)->firstWhere('url', 'https://bloomberg.com/iran'); + expect($bloomberg['impact'])->toBe('neutral'); +}); + +it('does not resend Phase 1 web_search_tool_result blocks on the submit call', function (): void { + Http::fake([ + '*api.anthropic.com/*' => Http::sequence() + ->push(fakeSearchResultsTurn([ + ['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'], + ])) + ->push(fakeSubmitTurn('rising', 70, [ + ['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'], + ])), + '*' => Http::response('', 200), + ]); + + $service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class)); + + $service->run(); + + $anthropicRequests = collect(Http::recorded()) + ->filter(fn (array $pair): bool => str_contains($pair[0]->url(), 'api.anthropic.com')) + ->values(); + + expect($anthropicRequests)->toHaveCount(2); + + $submitBody = $anthropicRequests[1][0]->data(); + $messagesJson = json_encode($submitBody['messages'], JSON_UNESCAPED_SLASHES); + + expect($submitBody['messages'])->toHaveCount(1) + ->and($submitBody['messages'][0]['role'])->toBe('user'); + + expect($messagesJson)->not->toContain('web_search_tool_result') + ->and($messagesJson)->not->toContain('LONG_PAGE_TEXT_') + ->and($messagesJson)->not->toContain('server_tool_use') + ->and($messagesJson)->toContain('https://reuters.com/opec'); }); it('honors the 4-hour cooldown for event-driven calls', function (): void { @@ -291,14 +264,19 @@ it('honors the 4-hour cooldown for event-driven calls', function (): void { 'updated_at' => now(), ]); - fakeAnthropicWithOverlay('falling', 40, [ - ['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'], + Http::fake([ + '*api.anthropic.com/*' => Http::sequence() + ->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]]) + ->push(fakeSubmitTurn('falling', 40, [ + ['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'], + ])), + '*' => Http::response('', 200), ]); $service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class)); - expect($service->run(eventDriven: true))->toBeNull() // <4h since prior - ->and(LlmOverlay::query()->count())->toBe(1); // no new row inserted + expect($service->run(eventDriven: true))->toBeNull() + ->and(LlmOverlay::query()->count())->toBe(1); Carbon::setTestNow(); }); @@ -320,8 +298,13 @@ it('always runs (ignores cooldown) when not event-driven', function (): void { 'updated_at' => now(), ]); - fakeAnthropicWithOverlay('falling', 40, [ - ['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'], + Http::fake([ + '*api.anthropic.com/*' => Http::sequence() + ->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]]) + ->push(fakeSubmitTurn('falling', 40, [ + ['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'], + ])), + '*' => Http::response('', 200), ]); $service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));