The daily forecast:llm-overlay command was being skipped because the previous single-conversation flow consumed more than Tier-1's 50,000 input-tokens-per- minute Anthropic bucket. The web_search tool auto-caches its results (~55k tokens) and requires `encrypted_content` intact when those blocks are resent, so the prior retry-on-missing-citations path either 429'd or 400'd on the second call. LlmOverlayService now runs two independent API calls. Phase 1 invokes the web_search tool and we discard the transcript after harvesting the URLs + titles from the returned web_search_tool_result blocks. Phase 2 is a fresh conversation containing the forecast context and the harvested headlines as plain text, with a forced submit_overlay tool call. events_cited is now optional in the tool schema — Haiku's flaky compliance no longer matters because citations come from the search results, not the model's transcription. Model-tagged events (with directional impact) merge with harvested-only entries (impact: 'neutral'), deduped by URL. Between phases the service reads anthropic-ratelimit-input-tokens-remaining / …-reset from Phase 1's headers and sleeps proportionally — only long enough for the SUBMIT_TOKEN_BUDGET worth of refill, not for the full bucket reset, capped at 65 seconds. ApiLogger now captures usage.input_tokens, usage.output_tokens, cache_read_input_tokens, cache_creation_input_tokens, plus the rate-limit remaining/reset headers on every Anthropic response. New nullable columns on api_logs make rate-limit diagnostics directly queryable. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
317 lines
12 KiB
PHP
317 lines
12 KiB
PHP
<?php
|
|
|
|
use App\Models\LlmOverlay;
|
|
use App\Services\ApiLogger;
|
|
use App\Services\Forecasting\LlmOverlayService;
|
|
use App\Services\Forecasting\WeeklyForecastService;
|
|
use Carbon\Carbon;
|
|
use Illuminate\Foundation\Testing\RefreshDatabase;
|
|
use Illuminate\Support\Facades\Cache;
|
|
use Illuminate\Support\Facades\Config;
|
|
use Illuminate\Support\Facades\DB;
|
|
use Illuminate\Support\Facades\Http;
|
|
|
|
uses(RefreshDatabase::class);
|
|
|
|
beforeEach(function (): void {
|
|
Cache::flush();
|
|
Config::set('services.anthropic.api_key', 'test-key');
|
|
});
|
|
|
|
/**
|
|
* Anthropic-shaped Phase 1 assistant turn that includes a real
|
|
* web_search_tool_result block (the source of truth for harvested
|
|
* citations).
|
|
*
|
|
* @param array<int, array{url: string, title: string}> $results
|
|
* @return array<string, mixed>
|
|
*/
|
|
function fakeSearchResultsTurn(array $results): array
|
|
{
|
|
$content = [['type' => 'text', 'text' => 'Searching...']];
|
|
foreach ($results as $idx => $r) {
|
|
$content[] = [
|
|
'type' => 'server_tool_use',
|
|
'id' => 'srvtoolu_'.$idx,
|
|
'name' => 'web_search',
|
|
'input' => ['query' => 'oil news'],
|
|
];
|
|
$content[] = [
|
|
'type' => 'web_search_tool_result',
|
|
'tool_use_id' => 'srvtoolu_'.$idx,
|
|
'content' => [[
|
|
'type' => 'web_search_result',
|
|
'url' => $r['url'],
|
|
'title' => $r['title'],
|
|
'encrypted_content' => str_repeat('LONG_PAGE_TEXT_', 200),
|
|
'page_age' => '1 day ago',
|
|
]],
|
|
];
|
|
}
|
|
|
|
return ['stop_reason' => 'end_turn', 'content' => $content];
|
|
}
|
|
|
|
/** @param array<int, array<string, mixed>> $events */
|
|
function fakeSubmitTurn(string $direction, int $confidence, array $events, bool $major = false): array
|
|
{
|
|
$input = [
|
|
'direction' => $direction,
|
|
'confidence' => $confidence,
|
|
'reasoning_short' => 'Test reasoning.',
|
|
'agrees_with_ridge' => true,
|
|
'major_impact_event' => $major,
|
|
];
|
|
if ($events !== []) {
|
|
$input['events_cited'] = $events;
|
|
}
|
|
|
|
return [
|
|
'stop_reason' => 'tool_use',
|
|
'content' => [[
|
|
'type' => 'tool_use',
|
|
'id' => 'toolu_submit',
|
|
'name' => 'submit_overlay',
|
|
'input' => $input,
|
|
]],
|
|
];
|
|
}
|
|
|
|
it('skips when ANTHROPIC_API_KEY is not set', function (): void {
|
|
Config::set('services.anthropic.api_key', null);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
expect($service->run())->toBeNull();
|
|
});
|
|
|
|
it('rejects only when neither web search nor model cited anything', function (): void {
|
|
Http::fake([
|
|
'*api.anthropic.com/*' => Http::sequence()
|
|
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'no results']]])
|
|
->push(fakeSubmitTurn('rising', 60, [])),
|
|
'*' => Http::response('', 200),
|
|
]);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
expect($service->run())->toBeNull()
|
|
->and(LlmOverlay::query()->count())->toBe(0);
|
|
});
|
|
|
|
it('verifies a URL via GET fallback when HEAD returns 405', function (): void {
|
|
Http::fake([
|
|
'*api.anthropic.com/*' => Http::sequence()
|
|
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
|
|
->push(fakeSubmitTurn('rising', 60, [
|
|
['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/x', 'impact' => 'rising'],
|
|
])),
|
|
'reuters.com/*' => Http::sequence()
|
|
->push('', 405)
|
|
->push('partial-body', 200),
|
|
]);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
$row = $service->run();
|
|
|
|
expect($row)->not->toBeNull()
|
|
->and($row->events_json)->toHaveCount(1);
|
|
});
|
|
|
|
it('rejects the overlay when every cited URL is unreachable', function (): void {
|
|
Http::fake([
|
|
'*api.anthropic.com/*' => Http::sequence()
|
|
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
|
|
->push(fakeSubmitTurn('rising', 60, [
|
|
['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'],
|
|
])),
|
|
'example.com/*' => Http::response('', 404),
|
|
]);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
expect($service->run())->toBeNull()
|
|
->and(LlmOverlay::query()->count())->toBe(0);
|
|
});
|
|
|
|
it('persists an overlay row with verified citations and capped confidence', function (): void {
|
|
Http::fake([
|
|
'*api.anthropic.com/*' => Http::sequence()
|
|
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
|
|
->push(fakeSubmitTurn('rising', 95, [
|
|
['headline' => 'OPEC cuts output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
|
|
], major: true)),
|
|
'*' => Http::response('', 200),
|
|
]);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
$row = $service->run();
|
|
|
|
expect($row)->not->toBeNull()
|
|
->and($row->direction)->toBe('rising')
|
|
->and($row->confidence)->toBe(75)
|
|
->and($row->major_impact_event)->toBeTrue()
|
|
->and($row->search_used)->toBeTrue()
|
|
->and($row->events_json)->toHaveCount(1);
|
|
});
|
|
|
|
it('harvests citations from web_search_tool_result when the model omits events_cited', function (): void {
|
|
Http::fake([
|
|
'*api.anthropic.com/*' => Http::sequence()
|
|
->push(fakeSearchResultsTurn([
|
|
['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'],
|
|
['url' => 'https://bloomberg.com/iran', 'title' => 'Iran tensions'],
|
|
]))
|
|
->push(fakeSubmitTurn('rising', 70, [])),
|
|
'*' => Http::response('', 200),
|
|
]);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
$row = $service->run();
|
|
|
|
expect($row)->not->toBeNull()
|
|
->and($row->events_json)->toHaveCount(2)
|
|
->and(collect($row->events_json)->pluck('url')->all())
|
|
->toEqualCanonicalizing(['https://reuters.com/opec', 'https://bloomberg.com/iran'])
|
|
->and(collect($row->events_json)->pluck('impact')->unique()->all())
|
|
->toBe(['neutral']);
|
|
});
|
|
|
|
it('merges model events_cited with harvested URLs deduped by URL', function (): void {
|
|
Http::fake([
|
|
'*api.anthropic.com/*' => Http::sequence()
|
|
->push(fakeSearchResultsTurn([
|
|
['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'],
|
|
['url' => 'https://bloomberg.com/iran', 'title' => 'Iran tensions'],
|
|
]))
|
|
->push(fakeSubmitTurn('rising', 70, [
|
|
['headline' => 'OPEC slashes output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
|
|
['headline' => 'Refinery fire', 'source' => 'CNBC', 'url' => 'https://cnbc.com/refinery', 'impact' => 'rising'],
|
|
])),
|
|
'*' => Http::response('', 200),
|
|
]);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
$row = $service->run();
|
|
|
|
expect($row)->not->toBeNull()
|
|
->and(collect($row->events_json)->pluck('url')->all())
|
|
->toEqualCanonicalizing([
|
|
'https://reuters.com/opec',
|
|
'https://bloomberg.com/iran',
|
|
'https://cnbc.com/refinery',
|
|
]);
|
|
|
|
$opec = collect($row->events_json)->firstWhere('url', 'https://reuters.com/opec');
|
|
expect($opec['impact'])->toBe('rising')
|
|
->and($opec['headline'])->toBe('OPEC slashes output');
|
|
|
|
$bloomberg = collect($row->events_json)->firstWhere('url', 'https://bloomberg.com/iran');
|
|
expect($bloomberg['impact'])->toBe('neutral');
|
|
});
|
|
|
|
it('does not resend Phase 1 web_search_tool_result blocks on the submit call', function (): void {
|
|
Http::fake([
|
|
'*api.anthropic.com/*' => Http::sequence()
|
|
->push(fakeSearchResultsTurn([
|
|
['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'],
|
|
]))
|
|
->push(fakeSubmitTurn('rising', 70, [
|
|
['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
|
|
])),
|
|
'*' => Http::response('', 200),
|
|
]);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
$service->run();
|
|
|
|
$anthropicRequests = collect(Http::recorded())
|
|
->filter(fn (array $pair): bool => str_contains($pair[0]->url(), 'api.anthropic.com'))
|
|
->values();
|
|
|
|
expect($anthropicRequests)->toHaveCount(2);
|
|
|
|
$submitBody = $anthropicRequests[1][0]->data();
|
|
$messagesJson = json_encode($submitBody['messages'], JSON_UNESCAPED_SLASHES);
|
|
|
|
expect($submitBody['messages'])->toHaveCount(1)
|
|
->and($submitBody['messages'][0]['role'])->toBe('user');
|
|
|
|
expect($messagesJson)->not->toContain('web_search_tool_result')
|
|
->and($messagesJson)->not->toContain('LONG_PAGE_TEXT_')
|
|
->and($messagesJson)->not->toContain('server_tool_use')
|
|
->and($messagesJson)->toContain('https://reuters.com/opec');
|
|
});
|
|
|
|
it('honors the 4-hour cooldown for event-driven calls', function (): void {
|
|
Carbon::setTestNow('2026-05-01 10:00:00');
|
|
DB::table('llm_overlays')->insert([
|
|
'ran_at' => Carbon::parse('2026-05-01 08:00:00'),
|
|
'forecast_for_week' => '2026-05-04',
|
|
'direction' => 'rising',
|
|
'confidence' => 60,
|
|
'reasoning' => 'prior',
|
|
'events_json' => json_encode([['headline' => 'x', 'url' => 'https://reuters.com/x']]),
|
|
'agrees_with_ridge' => true,
|
|
'major_impact_event' => false,
|
|
'volatility_flag_on' => false,
|
|
'search_used' => true,
|
|
'created_at' => now(),
|
|
'updated_at' => now(),
|
|
]);
|
|
|
|
Http::fake([
|
|
'*api.anthropic.com/*' => Http::sequence()
|
|
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
|
|
->push(fakeSubmitTurn('falling', 40, [
|
|
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
|
|
])),
|
|
'*' => Http::response('', 200),
|
|
]);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
expect($service->run(eventDriven: true))->toBeNull()
|
|
->and(LlmOverlay::query()->count())->toBe(1);
|
|
|
|
Carbon::setTestNow();
|
|
});
|
|
|
|
it('always runs (ignores cooldown) when not event-driven', function (): void {
|
|
Carbon::setTestNow('2026-05-01 10:00:00');
|
|
DB::table('llm_overlays')->insert([
|
|
'ran_at' => Carbon::parse('2026-05-01 08:00:00'),
|
|
'forecast_for_week' => '2026-05-04',
|
|
'direction' => 'rising',
|
|
'confidence' => 60,
|
|
'reasoning' => 'prior',
|
|
'events_json' => json_encode([['headline' => 'x', 'url' => 'https://reuters.com/x']]),
|
|
'agrees_with_ridge' => true,
|
|
'major_impact_event' => false,
|
|
'volatility_flag_on' => false,
|
|
'search_used' => true,
|
|
'created_at' => now(),
|
|
'updated_at' => now(),
|
|
]);
|
|
|
|
Http::fake([
|
|
'*api.anthropic.com/*' => Http::sequence()
|
|
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
|
|
->push(fakeSubmitTurn('falling', 40, [
|
|
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
|
|
])),
|
|
'*' => Http::response('', 200),
|
|
]);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
expect($service->run())->not->toBeNull()
|
|
->and(LlmOverlay::query()->count())->toBe(2);
|
|
|
|
Carbon::setTestNow();
|
|
});
|