fix(forecasting): persist LLM overlay under Tier-1 ITPM via two-call architecture

The daily forecast:llm-overlay command was being skipped because the previous
single-conversation flow consumed more than Tier-1's 50,000 input-tokens-per-
minute Anthropic bucket. The web_search tool auto-caches its results (~55k
tokens) and requires `encrypted_content` intact when those blocks are resent,
so the prior retry-on-missing-citations path either 429'd or 400'd on the
second call.

LlmOverlayService now runs two independent API calls. Phase 1 invokes the
web_search tool and we discard the transcript after harvesting the URLs +
titles from the returned web_search_tool_result blocks. Phase 2 is a fresh
conversation containing the forecast context and the harvested headlines as
plain text, with a forced submit_overlay tool call. events_cited is now
optional in the tool schema — Haiku's flaky compliance no longer matters
because citations come from the search results, not the model's transcription.
Model-tagged events (with directional impact) merge with harvested-only
entries (impact: 'neutral'), deduped by URL.

Between phases the service reads anthropic-ratelimit-input-tokens-remaining /
…-reset from Phase 1's headers and sleeps proportionally — only long enough
for the SUBMIT_TOKEN_BUDGET worth of refill, not for the full bucket reset,
capped at 65 seconds.

ApiLogger now captures usage.input_tokens, usage.output_tokens,
cache_read_input_tokens, cache_creation_input_tokens, plus the rate-limit
remaining/reset headers on every Anthropic response. New nullable columns on
api_logs make rate-limit diagnostics directly queryable.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ovidiu U
2026-05-14 14:22:42 +01:00
parent 97e27fc057
commit 07e0789044
6 changed files with 668 additions and 325 deletions

View File

@@ -7,7 +7,21 @@ use Illuminate\Database\Eloquent\Attributes\Fillable;
use Illuminate\Database\Eloquent\Factories\HasFactory;
use Illuminate\Database\Eloquent\Model;
#[Fillable(['service', 'method', 'url', 'status_code', 'duration_ms', 'error', 'response_body'])]
#[Fillable([
'service',
'method',
'url',
'status_code',
'duration_ms',
'error',
'response_body',
'input_tokens',
'output_tokens',
'cache_read_tokens',
'cache_write_tokens',
'ratelimit_remaining',
'ratelimit_reset_at',
])]
class ApiLog extends Model
{
/** @use HasFactory<ApiLogFactory> */
@@ -19,6 +33,7 @@ class ApiLog extends Model
{
return [
'created_at' => 'datetime',
'ratelimit_reset_at' => 'datetime',
];
}
}

View File

@@ -34,10 +34,12 @@ class ApiLogger
$statusCode = null;
$error = null;
$responseBody = null;
$usage = [];
try {
$response = $request();
$statusCode = $response->status();
$usage = $this->extractUsage($response);
if ($response->failed()) {
$body = $response->body();
@@ -53,6 +55,7 @@ class ApiLogger
// doesn't. Pull the body when it's available.
if ($e instanceof RequestException) {
$responseBody = $this->truncate($e->response->body());
$usage = $this->extractUsage($e->response);
}
throw $e;
@@ -65,6 +68,7 @@ class ApiLogger
'duration_ms' => (int) round((microtime(true) - $start) * 1000),
'error' => $error,
'response_body' => $responseBody,
...$usage,
]);
}
}
@@ -75,4 +79,39 @@ class ApiLogger
? substr($body, 0, self::RESPONSE_BODY_CAP)
: $body;
}
/**
* Pull token-usage and rate-limit telemetry from a provider response.
*
* Today only Anthropic exposes both. Other providers return mostly
* NULLs callers don't need to know which is which.
*
* @return array<string, int|string|null>
*/
private function extractUsage(?Response $response): array
{
if ($response === null) {
return [];
}
$usage = $response->json('usage');
$tokens = is_array($usage) ? $usage : [];
$reset = $response->header('anthropic-ratelimit-input-tokens-reset');
$remaining = $response->header('anthropic-ratelimit-input-tokens-remaining');
return [
'input_tokens' => $this->intOrNull($tokens['input_tokens'] ?? null),
'output_tokens' => $this->intOrNull($tokens['output_tokens'] ?? null),
'cache_read_tokens' => $this->intOrNull($tokens['cache_read_input_tokens'] ?? null),
'cache_write_tokens' => $this->intOrNull($tokens['cache_creation_input_tokens'] ?? null),
'ratelimit_remaining' => $this->intOrNull($remaining !== '' ? $remaining : null),
'ratelimit_reset_at' => $reset !== '' ? $reset : null,
];
}
private function intOrNull(mixed $value): ?int
{
return is_numeric($value) ? (int) $value : null;
}
}

View File

@@ -6,7 +6,9 @@ use App\Models\BrentPrice;
use App\Models\LlmOverlay;
use App\Models\VolatilityRegime;
use App\Services\ApiLogger;
use Carbon\CarbonImmutable;
use Carbon\CarbonInterface;
use Illuminate\Http\Client\Response;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Log;
@@ -15,9 +17,21 @@ use Throwable;
/**
* Layer 4 daily news-aware overlay on the calibrated ridge forecast.
*
* Calls Anthropic Haiku with the web_search tool, then forces a
* submit_overlay tool call to get structured output. Cites events with
* URLs; URLs are verified before storing. Empty citations rejection.
* Runs as two independent Anthropic API calls:
* Phase 1 web_search tool only; we capture the URLs/titles from
* the returned web_search_tool_result blocks.
* Phase 2 fresh conversation containing those URLs+titles as plain
* text plus a forced submit_overlay tool call.
*
* Phase 1's transcript is never sent back to Phase 2. Anthropic's
* web_search auto-caches the encrypted page text (~55k tokens per
* search) and requires it intact when web_search_tool_result blocks
* are resent. Threading it through to Phase 2 either blows the Tier-1
* 50k ITPM bucket or 400s if we try to strip it. Two clean calls keep
* Phase 2 around 3k input tokens.
*
* Citations are harvested directly from Phase 1's web_search_tool_result
* blocks Haiku is unreliable about populating `events_cited` itself.
*
* Read-only with respect to the volatility flag Layer 4 writes its
* `llm_overlays` row; Layer 5's hourly cron picks it up and decides
@@ -31,6 +45,15 @@ final class LlmOverlayService
private const int COOLDOWN_HOURS = 4;
private const int MAX_SEARCH_TURNS = 2;
/**
* Approximate input-token cost of Phase 2 (system + tool schema +
* forecast context + harvested URL list). If Phase 1 leaves
* remaining ITPM below this, wait for the bucket to refill.
*/
private const int SUBMIT_TOKEN_BUDGET = 4_000;
public function __construct(
private readonly ApiLogger $apiLogger,
private readonly WeeklyForecastService $weeklyForecast,
@@ -55,19 +78,24 @@ final class LlmOverlayService
$forecast = $this->weeklyForecast->currentForecast();
$context = $this->buildContext($forecast);
$rawResult = $this->callAnthropic($context);
if ($rawResult === null) {
$callResult = $this->callAnthropic($context);
if ($callResult === null) {
return null;
}
$verifiedEvents = $this->verifyCitedUrls($rawResult['events_cited'] ?? []);
$rawResult = $callResult['raw'];
$harvested = $callResult['harvested'];
$mergedEvents = $this->mergeEvents($rawResult['events_cited'] ?? [], $harvested);
$verifiedEvents = $this->verifyCitedUrls($mergedEvents);
if ($verifiedEvents === []) {
Log::warning('LlmOverlayService: no verified citations, rejecting overlay', [
'events_cited_count' => count($rawResult['events_cited'] ?? []),
'model_events' => $rawResult['events_cited'] ?? null,
'harvested_urls' => array_column($harvested, 'url'),
'direction' => $rawResult['direction'] ?? null,
'confidence' => $rawResult['confidence'] ?? null,
'reasoning_short' => $rawResult['reasoning_short'] ?? null,
'raw_result' => $rawResult,
]);
return null;
@@ -131,70 +159,44 @@ final class LlmOverlayService
];
}
/** @return array<string, mixed>|null */
/**
* Two independent API calls:
*
* Phase 1 runs the web_search tool, captures the assistant's
* returned `web_search_tool_result` blocks, then
* discards the transcript.
*
* Phase 2 issues a brand-new conversation with the harvested
* URLs/titles flattened into a plain-text user message
* and forces a `submit_overlay` tool call.
*
* Why not one stitched conversation: Anthropic auto-caches web_search
* results into ITPM (≈55k tokens for a 1-search call) and requires
* `encrypted_content` intact when those blocks are sent back.
* Resending the Phase 1 transcript to Phase 2 either rate-limits us
* (29k+ tokens twice exceeds the Tier-1 50k ITPM bucket) or 400s
* if we strip the encrypted blob. A fresh Phase 2 sends ~3k tokens
* total small enough to fit in the recovered bucket after a
* short adaptive sleep.
*
* @return array{raw: array<string, mixed>, harvested: array<int, array{url: string, title: string}>}|null
*/
private function callAnthropic(array $context): ?array
{
$messages = [['role' => 'user', 'content' => $this->prompt($context)]];
try {
// Phase 1: web search loop. Append the assistant turn after every
// successful response, then decide whether to keep looping —
// this guarantees the messages array stays well-formed regardless
// of whether we exit via `break` or by exhausting iterations.
for ($i = 0, $response = null; $i < 5; $i++) {
$response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(45)
->withHeaders($this->headers())
->post(self::URL, [
'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'),
'max_tokens' => 1024,
'tools' => [['type' => 'web_search_20250305', 'name' => 'web_search']],
'messages' => $messages,
]));
if (! $response->successful()) {
Log::error('LlmOverlayService: search request failed', ['status' => $response->status()]);
return null;
}
$messages[] = ['role' => 'assistant', 'content' => $response->json('content')];
if ($response->json('stop_reason') !== 'pause_turn') {
break;
}
}
$messages[] = ['role' => 'user', 'content' => 'Now submit your overlay using the submit_overlay tool. Cite at least one event with a URL.'];
// Phase 2: forced structured output
$submitResponse = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20)
->withHeaders($this->headers())
->post(self::URL, [
'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'),
'max_tokens' => 512,
'tools' => [$this->submitOverlayTool()],
'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'],
'messages' => $messages,
]));
if (! $submitResponse->successful()) {
Log::error('LlmOverlayService: submit request failed', ['status' => $submitResponse->status()]);
$phase1 = $this->runWebSearch($context);
if ($phase1 === null) {
return null;
}
$submitContent = $submitResponse->json('content') ?? [];
$rawResult = $this->extractToolInput($submitContent);
$this->waitForRateLimitIfNeeded($phase1['response']);
// Haiku sometimes calls submit_overlay without `events_cited` even
// though the schema marks it required. Confirmed in laravel.log on
// 2026-05-12: tool_use input had only direction/confidence/reasoning.
// Retry once with an explicit tool_result error.
if ($this->citationsMissing($rawResult)) {
$rawResult = $this->retrySubmitWithCitationError($messages, $submitContent) ?? $rawResult;
$rawResult = $this->runSubmit($context, $phase1['harvested']);
if ($rawResult === null) {
return null;
}
return $rawResult;
return ['raw' => $rawResult, 'harvested' => $phase1['harvested']];
} catch (Throwable $e) {
Log::error('LlmOverlayService: callAnthropic failed', ['error' => $e->getMessage()]);
@@ -202,6 +204,239 @@ final class LlmOverlayService
}
}
/**
* Phase 1: ask the model to search for news and capture the
* web_search_tool_result blocks. Returns the harvested citations
* and the final response (whose rate-limit headers tell us when
* the ITPM bucket will be replenished for Phase 2).
*
* @return array{harvested: array<int, array{url: string, title: string}>, response: Response}|null
*/
private function runWebSearch(array $context): ?array
{
$messages = [['role' => 'user', 'content' => $this->searchUserMessage($context)]];
$response = null;
for ($i = 0; $i < self::MAX_SEARCH_TURNS; $i++) {
$response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(45)
->withHeaders($this->headers())
->post(self::URL, [
'model' => $this->model(),
'max_tokens' => 1024,
'system' => $this->searchSystem(),
'tools' => [['type' => 'web_search_20250305', 'name' => 'web_search']],
'messages' => $messages,
]));
if (! $response->successful()) {
Log::error('LlmOverlayService: search request failed', [
'status' => $response->status(),
'body' => substr($response->body(), 0, 500),
]);
return null;
}
$messages[] = ['role' => 'assistant', 'content' => $response->json('content')];
if ($response->json('stop_reason') !== 'pause_turn') {
break;
}
}
if ($response === null) {
return null;
}
return [
'harvested' => $this->harvestSearchResults($messages),
'response' => $response,
];
}
/**
* Phase 2: fresh API call no Phase 1 transcript with the
* harvested citations as plain text and a forced submit_overlay
* tool call.
*
* @param array<int, array{url: string, title: string}> $harvested
* @return array<string, mixed>|null
*/
private function runSubmit(array $context, array $harvested): ?array
{
$response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20)
->withHeaders($this->headers())
->post(self::URL, [
'model' => $this->model(),
'max_tokens' => 512,
'system' => $this->submitSystem(),
'tools' => [$this->submitOverlayTool()],
'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'],
'messages' => [['role' => 'user', 'content' => $this->submitUserMessage($context, $harvested)]],
]));
if (! $response->successful()) {
Log::error('LlmOverlayService: submit request failed', [
'status' => $response->status(),
'body' => substr($response->body(), 0, 500),
]);
return null;
}
$rawResult = $this->extractToolInput($response->json('content') ?? []);
if ($rawResult === null) {
Log::warning('LlmOverlayService: submit response missing tool_use block');
return null;
}
return $rawResult;
}
/**
* Anthropic's web_search burns ≈55k input tokens (mostly auto-cached
* search results) on Phase 1. At Tier 1's 50k ITPM the bucket can
* be at zero immediately afterwards. Read the rate-limit headers
* and sleep until the bucket has refilled enough for Phase 2.
* Capped at 65s so the daily cron never hangs longer than a minute.
*/
private function waitForRateLimitIfNeeded(Response $response): void
{
$remaining = (int) $response->header('anthropic-ratelimit-input-tokens-remaining');
if ($response->header('anthropic-ratelimit-input-tokens-remaining') === ''
|| $remaining >= self::SUBMIT_TOKEN_BUDGET) {
return;
}
$resetAt = $response->header('anthropic-ratelimit-input-tokens-reset');
$bucketSize = (int) $response->header('anthropic-ratelimit-input-tokens-limit');
if ($resetAt === '' || $bucketSize <= 0) {
return;
}
try {
$secondsUntilFullReset = max(0, CarbonImmutable::parse($resetAt)->getTimestamp() - now()->getTimestamp());
} catch (Throwable) {
return;
}
// Anthropic's bucket refills linearly. We don't need to wait for
// the full reset — only enough for SUBMIT_TOKEN_BUDGET tokens to
// become available. Sleep proportionally + a small safety margin,
// hard-capped at 65s.
$tokensNeeded = self::SUBMIT_TOKEN_BUDGET - $remaining;
$proportional = (int) ceil(($tokensNeeded / $bucketSize) * $secondsUntilFullReset);
$waitSeconds = max(1, min(65, $proportional + 2));
Log::info('LlmOverlayService: waiting for ITPM bucket refill before submit', [
'remaining' => $remaining,
'wait_seconds' => $waitSeconds,
'full_reset_in' => $secondsUntilFullReset,
]);
sleep($waitSeconds);
}
/**
* Walk every assistant turn and extract `{url, title}` from each
* `web_search_tool_result` block. Anthropic's web_search returns
* these blocks directly they are the authoritative citation
* source, not anything the model transcribes back to us.
*
* @param array<int, array<string, mixed>> $messages
* @return array<int, array{url: string, title: string}>
*/
private function harvestSearchResults(array $messages): array
{
$byUrl = [];
foreach ($messages as $message) {
if (($message['role'] ?? null) !== 'assistant') {
continue;
}
$content = $message['content'] ?? [];
if (! is_array($content)) {
continue;
}
foreach ($content as $block) {
if (! is_array($block) || ($block['type'] ?? null) !== 'web_search_tool_result') {
continue;
}
$results = $block['content'] ?? [];
if (! is_array($results)) {
continue;
}
foreach ($results as $result) {
if (! is_array($result) || ($result['type'] ?? null) !== 'web_search_result') {
continue;
}
$url = (string) ($result['url'] ?? '');
if ($url === '' || isset($byUrl[$url])) {
continue;
}
$byUrl[$url] = ['url' => $url, 'title' => (string) ($result['title'] ?? '')];
}
}
}
return array_values($byUrl);
}
/**
* Merge model-provided events_cited with citations harvested from
* `web_search_tool_result`. Model entries (which include `impact`
* tagging) take precedence on URL collision; harvested-only entries
* default to `impact: 'neutral'`.
*
* @param array<int, mixed> $modelEvents
* @param array<int, array{url: string, title: string}> $harvested
* @return array<int, array<string, mixed>>
*/
private function mergeEvents(array $modelEvents, array $harvested): array
{
$byUrl = [];
foreach ($modelEvents as $event) {
if (! is_array($event)) {
continue;
}
$url = (string) ($event['url'] ?? '');
if ($url === '') {
continue;
}
$byUrl[$url] = [
'headline' => (string) ($event['headline'] ?? ''),
'source' => (string) ($event['source'] ?? ''),
'url' => $url,
'impact' => in_array($event['impact'] ?? null, ['rising', 'falling', 'neutral'], true)
? $event['impact']
: 'neutral',
];
}
foreach ($harvested as $result) {
$url = $result['url'];
if (isset($byUrl[$url])) {
continue;
}
$byUrl[$url] = [
'headline' => $result['title'],
'source' => $this->domainOf($url),
'url' => $url,
'impact' => 'neutral',
];
}
return array_values($byUrl);
}
private function domainOf(string $url): string
{
$host = parse_url($url, PHP_URL_HOST);
return is_string($host) ? preg_replace('/^www\./', '', $host) : '';
}
private function verificationUserAgent(): string
{
$appUrl = rtrim((string) config('app.url'), '/');
@@ -320,37 +555,61 @@ final class LlmOverlayService
return config('services.anthropic.api_key');
}
private function prompt(array $context): string
private function model(): string
{
$json = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
return <<<PROMPT
You are providing a daily news-aware overlay for a UK weekly pump-price forecast.
The calibrated ridge model has already produced a directional call from price history.
Your job is to search recent oil/fuel news and decide whether to AGREE or DISAGREE
and most importantly, surface any major-impact event that the ridge model can't see
from price history alone.
return config('services.anthropic.model', 'claude-haiku-4-5-20251001');
}
private function searchSystem(): string
{
return <<<'PROMPT'
You are researching news that may affect this week's UK pump-price forecast.
Search recent news (last 48 hours) for:
- OPEC+ production decisions or unexpected announcements
- Geopolitical events affecting oil supply (sanctions, conflict, shipping disruption)
- Major refinery outages or pipeline incidents
- US/EU inventory reports that materially moved Brent
Context for this week:
$json
After searching, you will be asked to submit_overlay with direction, confidence
(capped at $this->confidenceCap), short reasoning, cited events with URLs,
agrees_with_ridge, and major_impact_event.
Citing events with REAL URLs is mandatory. An empty citation array will be
rejected and the overlay discarded.
Return only the search results you will be asked to summarise separately.
PROMPT;
}
private string $confidenceCap = '75';
private function searchUserMessage(array $context): string
{
$json = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
return "Use web_search to find oil/fuel news from the last 48 hours that could move UK pump prices this week.\n\nContext for this week:\n\n".$json;
}
private function submitSystem(): string
{
$cap = self::CONFIDENCE_CAP;
return <<<PROMPT
You are providing a news-aware directional overlay for a UK weekly pump-price forecast.
Decide whether to AGREE or DISAGREE with the ridge model based on the news headlines
provided in the user message. Cap confidence at $cap.
Include events_cited (with impact tags) for any specific headline that drove your
reasoning; you may leave events_cited empty if the news is unremarkable.
PROMPT;
}
/**
* @param array<int, array{url: string, title: string}> $harvested
*/
private function submitUserMessage(array $context, array $harvested): string
{
$contextJson = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
if ($harvested === []) {
$headlines = '(none — no relevant news found)';
} else {
$headlines = collect($harvested)
->map(fn (array $r): string => '- '.$r['title'].' — '.$r['url'])
->implode("\n");
}
return "Context for this week:\n\n".$contextJson."\n\nNews headlines found:\n".$headlines."\n\nNow call submit_overlay with your decision.";
}
/** @return array<string, mixed> */
private function submitOverlayTool(): array
@@ -366,7 +625,7 @@ final class LlmOverlayService
'reasoning_short' => ['type' => 'string', 'description' => '12 sentences.'],
'events_cited' => [
'type' => 'array',
'minItems' => 1,
'description' => 'Optional. Events that drove your reasoning, with directional impact. Citations are otherwise harvested from web_search_tool_result.',
'items' => [
'type' => 'object',
'properties' => [
@@ -381,7 +640,7 @@ final class LlmOverlayService
'agrees_with_ridge' => ['type' => 'boolean'],
'major_impact_event' => ['type' => 'boolean'],
],
'required' => ['direction', 'confidence', 'reasoning_short', 'events_cited', 'agrees_with_ridge', 'major_impact_event'],
'required' => ['direction', 'confidence', 'reasoning_short', 'agrees_with_ridge', 'major_impact_event'],
],
];
}
@@ -396,57 +655,4 @@ final class LlmOverlayService
return $block['input'] ?? null;
}
/** @param array<string, mixed>|null $rawResult */
private function citationsMissing(?array $rawResult): bool
{
return $rawResult === null
|| ! isset($rawResult['events_cited'])
|| ! is_array($rawResult['events_cited'])
|| $rawResult['events_cited'] === [];
}
/**
* @param array<int, mixed> $messages
* @param array<int, mixed> $failedSubmitContent
* @return array<string, mixed>|null
*/
private function retrySubmitWithCitationError(array $messages, array $failedSubmitContent): ?array
{
$toolUseId = collect($failedSubmitContent)->firstWhere('type', 'tool_use')['id'] ?? null;
if ($toolUseId === null) {
Log::warning('LlmOverlayService: cannot retry — no tool_use id in failed submit');
return null;
}
Log::info('LlmOverlayService: retrying submit with citation error', ['tool_use_id' => $toolUseId]);
$messages[] = ['role' => 'assistant', 'content' => $failedSubmitContent];
$messages[] = ['role' => 'user', 'content' => [[
'type' => 'tool_result',
'tool_use_id' => $toolUseId,
'content' => 'events_cited was missing or empty. Resubmit submit_overlay with at least one event from your earlier web search results, including its real URL, headline, source, and impact.',
'is_error' => true,
]]];
$retryResponse = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20)
->withHeaders($this->headers())
->post(self::URL, [
'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'),
'max_tokens' => 512,
'tools' => [$this->submitOverlayTool()],
'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'],
'messages' => $messages,
]));
if (! $retryResponse->successful()) {
Log::error('LlmOverlayService: retry submit failed', ['status' => $retryResponse->status()]);
return null;
}
return $this->extractToolInput($retryResponse->json('content') ?? []);
}
}

View File

@@ -0,0 +1,46 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Capture token usage and rate-limit headers from token-metering
* providers (today: Anthropic). These columns let us see the
* cumulative input-tokens-per-minute trajectory directly in
* api_logs rather than inferring it from request counts.
*/
public function up(): void
{
Schema::table('api_logs', function (Blueprint $table) {
$table->unsignedInteger('input_tokens')->nullable()->after('response_body')
->comment('Input tokens billed (Anthropic usage.input_tokens). NULL for providers that do not report usage.');
$table->unsignedInteger('output_tokens')->nullable()->after('input_tokens')
->comment('Output tokens billed (Anthropic usage.output_tokens).');
$table->unsignedInteger('cache_read_tokens')->nullable()->after('output_tokens')
->comment('Cache-hit tokens (Anthropic usage.cache_read_input_tokens). Do not count toward ITPM on most models.');
$table->unsignedInteger('cache_write_tokens')->nullable()->after('cache_read_tokens')
->comment('Cache-write tokens (Anthropic usage.cache_creation_input_tokens). Count toward ITPM.');
$table->unsignedInteger('ratelimit_remaining')->nullable()->after('cache_write_tokens')
->comment('Provider-reported input-tokens remaining in the rolling window (anthropic-ratelimit-input-tokens-remaining).');
$table->dateTime('ratelimit_reset_at')->nullable()->after('ratelimit_remaining')
->comment('When the input-tokens bucket will be fully replenished (anthropic-ratelimit-input-tokens-reset, RFC 3339).');
});
}
public function down(): void
{
Schema::table('api_logs', function (Blueprint $table) {
$table->dropColumn([
'input_tokens',
'output_tokens',
'cache_read_tokens',
'cache_write_tokens',
'ratelimit_remaining',
'ratelimit_reset_at',
]);
});
}
};

View File

@@ -119,3 +119,57 @@ it('captures response_body when an HTTP RequestException is thrown', function ()
expect(ApiLog::first()->response_body)->toBe('upstream details');
});
it('captures Anthropic usage tokens from a successful response', function (): void {
Http::fake(['https://api.anthropic.com/v1/messages' => Http::response([
'content' => [],
'usage' => [
'input_tokens' => 1234,
'output_tokens' => 56,
'cache_creation_input_tokens' => 8000,
'cache_read_input_tokens' => 12000,
],
])]);
$this->apiLogger->send('anthropic', 'POST', 'https://api.anthropic.com/v1/messages',
fn () => Http::post('https://api.anthropic.com/v1/messages'));
$log = ApiLog::first();
expect($log->input_tokens)->toBe(1234)
->and($log->output_tokens)->toBe(56)
->and($log->cache_write_tokens)->toBe(8000)
->and($log->cache_read_tokens)->toBe(12000);
});
it('captures rate-limit headers from any provider response', function (): void {
Http::fake(['https://api.anthropic.com/v1/messages' => Http::response(
['content' => [], 'usage' => ['input_tokens' => 100, 'output_tokens' => 10]],
200,
[
'anthropic-ratelimit-input-tokens-remaining' => '38000',
'anthropic-ratelimit-input-tokens-reset' => '2026-05-14T12:41:00Z',
],
)]);
$this->apiLogger->send('anthropic', 'POST', 'https://api.anthropic.com/v1/messages',
fn () => Http::post('https://api.anthropic.com/v1/messages'));
$log = ApiLog::first();
expect($log->ratelimit_remaining)->toBe(38000)
->and($log->ratelimit_reset_at?->toIso8601String())->toBe('2026-05-14T12:41:00+00:00');
});
it('leaves token columns null for services without usage data', function (): void {
Http::fake(['https://example.com/x' => Http::response(['ok' => true])]);
$this->apiLogger->send('test_service', 'GET', 'https://example.com/x',
fn () => Http::get('https://example.com/x'));
$log = ApiLog::first();
expect($log->input_tokens)->toBeNull()
->and($log->output_tokens)->toBeNull()
->and($log->cache_read_tokens)->toBeNull()
->and($log->cache_write_tokens)->toBeNull()
->and($log->ratelimit_remaining)->toBeNull()
->and($log->ratelimit_reset_at)->toBeNull();
});

View File

@@ -18,32 +18,63 @@ beforeEach(function (): void {
Config::set('services.anthropic.api_key', 'test-key');
});
function fakeAnthropicWithOverlay(string $direction, int $confidence, array $events, bool $major = false): void
/**
* Anthropic-shaped Phase 1 assistant turn that includes a real
* web_search_tool_result block (the source of truth for harvested
* citations).
*
* @param array<int, array{url: string, title: string}> $results
* @return array<string, mixed>
*/
function fakeSearchResultsTurn(array $results): array
{
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push([
'stop_reason' => 'end_turn',
'content' => [['type' => 'text', 'text' => 'Search summary.']],
])
->push([
'stop_reason' => 'tool_use',
'content' => [[
'type' => 'tool_use',
'name' => 'submit_overlay',
'input' => [
'direction' => $direction,
'confidence' => $confidence,
'reasoning_short' => 'Test reasoning.',
'events_cited' => $events,
'agrees_with_ridge' => true,
'major_impact_event' => $major,
],
]],
]),
// URL HEAD verification probes — accept everything by default
'*' => Http::response('', 200),
]);
$content = [['type' => 'text', 'text' => 'Searching...']];
foreach ($results as $idx => $r) {
$content[] = [
'type' => 'server_tool_use',
'id' => 'srvtoolu_'.$idx,
'name' => 'web_search',
'input' => ['query' => 'oil news'],
];
$content[] = [
'type' => 'web_search_tool_result',
'tool_use_id' => 'srvtoolu_'.$idx,
'content' => [[
'type' => 'web_search_result',
'url' => $r['url'],
'title' => $r['title'],
'encrypted_content' => str_repeat('LONG_PAGE_TEXT_', 200),
'page_age' => '1 day ago',
]],
];
}
return ['stop_reason' => 'end_turn', 'content' => $content];
}
/** @param array<int, array<string, mixed>> $events */
function fakeSubmitTurn(string $direction, int $confidence, array $events, bool $major = false): array
{
$input = [
'direction' => $direction,
'confidence' => $confidence,
'reasoning_short' => 'Test reasoning.',
'agrees_with_ridge' => true,
'major_impact_event' => $major,
];
if ($events !== []) {
$input['events_cited'] = $events;
}
return [
'stop_reason' => 'tool_use',
'content' => [[
'type' => 'tool_use',
'id' => 'toolu_submit',
'name' => 'submit_overlay',
'input' => $input,
]],
];
}
it('skips when ANTHROPIC_API_KEY is not set', function (): void {
@@ -54,8 +85,13 @@ it('skips when ANTHROPIC_API_KEY is not set', function (): void {
expect($service->run())->toBeNull();
});
it('rejects the overlay when no events are cited', function (): void {
fakeAnthropicWithOverlay('rising', 60, []);
it('rejects only when neither web search nor model cited anything', function (): void {
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'no results']]])
->push(fakeSubmitTurn('rising', 60, [])),
'*' => Http::response('', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
@@ -66,30 +102,13 @@ it('rejects the overlay when no events are cited', function (): void {
it('verifies a URL via GET fallback when HEAD returns 405', function (): void {
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push([
'stop_reason' => 'end_turn',
'content' => [['type' => 'text', 'text' => 'ok']],
])
->push([
'stop_reason' => 'tool_use',
'content' => [[
'type' => 'tool_use',
'name' => 'submit_overlay',
'input' => [
'direction' => 'rising',
'confidence' => 60,
'reasoning_short' => 'Hostile-to-HEAD source.',
'events_cited' => [
['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/x', 'impact' => 'rising'],
],
'agrees_with_ridge' => true,
'major_impact_event' => false,
],
]],
]),
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
->push(fakeSubmitTurn('rising', 60, [
['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/x', 'impact' => 'rising'],
])),
'reuters.com/*' => Http::sequence()
->push('', 405) // HEAD → 405 Method Not Allowed
->push('partial-body', 200), // GET fallback succeeds
->push('', 405)
->push('partial-body', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
@@ -99,65 +118,13 @@ it('verifies a URL via GET fallback when HEAD returns 405', function (): void {
->and($row->events_json)->toHaveCount(1);
});
it('rejects the overlay when both HEAD and GET fail', function (): void {
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push([
'stop_reason' => 'end_turn',
'content' => [['type' => 'text', 'text' => 'ok']],
])
->push([
'stop_reason' => 'tool_use',
'content' => [[
'type' => 'tool_use',
'name' => 'submit_overlay',
'input' => [
'direction' => 'rising',
'confidence' => 60,
'reasoning_short' => 'Truly dead URL.',
'events_cited' => [
['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'],
],
'agrees_with_ridge' => true,
'major_impact_event' => false,
],
]],
]),
'example.com/*' => Http::sequence()
->push('', 404) // HEAD → 404
->push('', 404), // GET → still 404
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
expect($service->run())->toBeNull()
->and(LlmOverlay::query()->count())->toBe(0);
});
it('rejects the overlay when every cited URL is unreachable', function (): void {
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push([
'stop_reason' => 'end_turn',
'content' => [['type' => 'text', 'text' => 'ok']],
])
->push([
'stop_reason' => 'tool_use',
'content' => [[
'type' => 'tool_use',
'name' => 'submit_overlay',
'input' => [
'direction' => 'rising',
'confidence' => 60,
'reasoning_short' => 'Test.',
'events_cited' => [
['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'],
],
'agrees_with_ridge' => true,
'major_impact_event' => false,
],
]],
]),
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
->push(fakeSubmitTurn('rising', 60, [
['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'],
])),
'example.com/*' => Http::response('', 404),
]);
@@ -168,14 +135,14 @@ it('rejects the overlay when every cited URL is unreachable', function (): void
});
it('persists an overlay row with verified citations and capped confidence', function (): void {
fakeAnthropicWithOverlay(
direction: 'rising',
confidence: 95, // above cap → expect capped to 75
events: [
['headline' => 'OPEC cuts output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
],
major: true,
);
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
->push(fakeSubmitTurn('rising', 95, [
['headline' => 'OPEC cuts output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
], major: true)),
'*' => Http::response('', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
@@ -183,51 +150,20 @@ it('persists an overlay row with verified citations and capped confidence', func
expect($row)->not->toBeNull()
->and($row->direction)->toBe('rising')
->and($row->confidence)->toBe(75) // capped
->and($row->confidence)->toBe(75)
->and($row->major_impact_event)->toBeTrue()
->and($row->search_used)->toBeTrue()
->and($row->events_json)->toHaveCount(1);
});
it('retries the submit when the model omits events_cited', function (): void {
it('harvests citations from web_search_tool_result when the model omits events_cited', function (): void {
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push([
'stop_reason' => 'end_turn',
'content' => [['type' => 'text', 'text' => 'Search done.']],
])
->push([
'stop_reason' => 'tool_use',
'content' => [[
'type' => 'tool_use',
'id' => 'toolu_first',
'name' => 'submit_overlay',
'input' => [
'direction' => 'rising',
'confidence' => 70,
'reasoning_short' => 'Forgot citations.',
// events_cited omitted entirely — the bug we are guarding against
],
]],
])
->push([
'stop_reason' => 'tool_use',
'content' => [[
'type' => 'tool_use',
'id' => 'toolu_retry',
'name' => 'submit_overlay',
'input' => [
'direction' => 'rising',
'confidence' => 70,
'reasoning_short' => 'With citations now.',
'events_cited' => [
['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
],
'agrees_with_ridge' => true,
'major_impact_event' => false,
],
]],
]),
->push(fakeSearchResultsTurn([
['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'],
['url' => 'https://bloomberg.com/iran', 'title' => 'Iran tensions'],
]))
->push(fakeSubmitTurn('rising', 70, [])),
'*' => Http::response('', 200),
]);
@@ -236,42 +172,79 @@ it('retries the submit when the model omits events_cited', function (): void {
$row = $service->run();
expect($row)->not->toBeNull()
->and($row->events_json)->toHaveCount(1)
->and(LlmOverlay::query()->count())->toBe(1);
->and($row->events_json)->toHaveCount(2)
->and(collect($row->events_json)->pluck('url')->all())
->toEqualCanonicalizing(['https://reuters.com/opec', 'https://bloomberg.com/iran'])
->and(collect($row->events_json)->pluck('impact')->unique()->all())
->toBe(['neutral']);
});
it('rejects when the retry also omits events_cited', function (): void {
it('merges model events_cited with harvested URLs deduped by URL', function (): void {
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push([
'stop_reason' => 'end_turn',
'content' => [['type' => 'text', 'text' => 'Search done.']],
])
->push([
'stop_reason' => 'tool_use',
'content' => [[
'type' => 'tool_use',
'id' => 'toolu_first',
'name' => 'submit_overlay',
'input' => ['direction' => 'rising', 'confidence' => 70, 'reasoning_short' => 'No cites.'],
]],
])
->push([
'stop_reason' => 'tool_use',
'content' => [[
'type' => 'tool_use',
'id' => 'toolu_retry',
'name' => 'submit_overlay',
'input' => ['direction' => 'rising', 'confidence' => 70, 'reasoning_short' => 'Still none.'],
]],
]),
->push(fakeSearchResultsTurn([
['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'],
['url' => 'https://bloomberg.com/iran', 'title' => 'Iran tensions'],
]))
->push(fakeSubmitTurn('rising', 70, [
['headline' => 'OPEC slashes output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
['headline' => 'Refinery fire', 'source' => 'CNBC', 'url' => 'https://cnbc.com/refinery', 'impact' => 'rising'],
])),
'*' => Http::response('', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
expect($service->run())->toBeNull()
->and(LlmOverlay::query()->count())->toBe(0);
$row = $service->run();
expect($row)->not->toBeNull()
->and(collect($row->events_json)->pluck('url')->all())
->toEqualCanonicalizing([
'https://reuters.com/opec',
'https://bloomberg.com/iran',
'https://cnbc.com/refinery',
]);
$opec = collect($row->events_json)->firstWhere('url', 'https://reuters.com/opec');
expect($opec['impact'])->toBe('rising')
->and($opec['headline'])->toBe('OPEC slashes output');
$bloomberg = collect($row->events_json)->firstWhere('url', 'https://bloomberg.com/iran');
expect($bloomberg['impact'])->toBe('neutral');
});
it('does not resend Phase 1 web_search_tool_result blocks on the submit call', function (): void {
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push(fakeSearchResultsTurn([
['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'],
]))
->push(fakeSubmitTurn('rising', 70, [
['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
])),
'*' => Http::response('', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
$service->run();
$anthropicRequests = collect(Http::recorded())
->filter(fn (array $pair): bool => str_contains($pair[0]->url(), 'api.anthropic.com'))
->values();
expect($anthropicRequests)->toHaveCount(2);
$submitBody = $anthropicRequests[1][0]->data();
$messagesJson = json_encode($submitBody['messages'], JSON_UNESCAPED_SLASHES);
expect($submitBody['messages'])->toHaveCount(1)
->and($submitBody['messages'][0]['role'])->toBe('user');
expect($messagesJson)->not->toContain('web_search_tool_result')
->and($messagesJson)->not->toContain('LONG_PAGE_TEXT_')
->and($messagesJson)->not->toContain('server_tool_use')
->and($messagesJson)->toContain('https://reuters.com/opec');
});
it('honors the 4-hour cooldown for event-driven calls', function (): void {
@@ -291,14 +264,19 @@ it('honors the 4-hour cooldown for event-driven calls', function (): void {
'updated_at' => now(),
]);
fakeAnthropicWithOverlay('falling', 40, [
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
->push(fakeSubmitTurn('falling', 40, [
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
])),
'*' => Http::response('', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
expect($service->run(eventDriven: true))->toBeNull() // <4h since prior
->and(LlmOverlay::query()->count())->toBe(1); // no new row inserted
expect($service->run(eventDriven: true))->toBeNull()
->and(LlmOverlay::query()->count())->toBe(1);
Carbon::setTestNow();
});
@@ -320,8 +298,13 @@ it('always runs (ignores cooldown) when not event-driven', function (): void {
'updated_at' => now(),
]);
fakeAnthropicWithOverlay('falling', 40, [
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
->push(fakeSubmitTurn('falling', 40, [
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
])),
'*' => Http::response('', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));