Files
fuel-alert/tests/Unit/Services/Forecasting/LlmOverlayServiceTest.php
Ovidiu U 07e0789044 fix(forecasting): persist LLM overlay under Tier-1 ITPM via two-call architecture
The daily forecast:llm-overlay command was being skipped because the previous
single-conversation flow consumed more than Tier-1's 50,000 input-tokens-per-
minute Anthropic bucket. The web_search tool auto-caches its results (~55k
tokens) and requires `encrypted_content` intact when those blocks are resent,
so the prior retry-on-missing-citations path either 429'd or 400'd on the
second call.

LlmOverlayService now runs two independent API calls. Phase 1 invokes the
web_search tool and we discard the transcript after harvesting the URLs +
titles from the returned web_search_tool_result blocks. Phase 2 is a fresh
conversation containing the forecast context and the harvested headlines as
plain text, with a forced submit_overlay tool call. events_cited is now
optional in the tool schema — Haiku's flaky compliance no longer matters
because citations come from the search results, not the model's transcription.
Model-tagged events (with directional impact) merge with harvested-only
entries (impact: 'neutral'), deduped by URL.

Between phases the service reads anthropic-ratelimit-input-tokens-remaining /
…-reset from Phase 1's headers and sleeps proportionally — only long enough
for the SUBMIT_TOKEN_BUDGET worth of refill, not for the full bucket reset,
capped at 65 seconds.

ApiLogger now captures usage.input_tokens, usage.output_tokens,
cache_read_input_tokens, cache_creation_input_tokens, plus the rate-limit
remaining/reset headers on every Anthropic response. New nullable columns on
api_logs make rate-limit diagnostics directly queryable.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 14:22:42 +01:00

317 lines
12 KiB
PHP

<?php
use App\Models\LlmOverlay;
use App\Services\ApiLogger;
use App\Services\Forecasting\LlmOverlayService;
use App\Services\Forecasting\WeeklyForecastService;
use Carbon\Carbon;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Illuminate\Support\Facades\Cache;
use Illuminate\Support\Facades\Config;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Http;
uses(RefreshDatabase::class);
beforeEach(function (): void {
Cache::flush();
Config::set('services.anthropic.api_key', 'test-key');
});
/**
* Anthropic-shaped Phase 1 assistant turn that includes a real
* web_search_tool_result block (the source of truth for harvested
* citations).
*
* @param array<int, array{url: string, title: string}> $results
* @return array<string, mixed>
*/
function fakeSearchResultsTurn(array $results): array
{
$content = [['type' => 'text', 'text' => 'Searching...']];
foreach ($results as $idx => $r) {
$content[] = [
'type' => 'server_tool_use',
'id' => 'srvtoolu_'.$idx,
'name' => 'web_search',
'input' => ['query' => 'oil news'],
];
$content[] = [
'type' => 'web_search_tool_result',
'tool_use_id' => 'srvtoolu_'.$idx,
'content' => [[
'type' => 'web_search_result',
'url' => $r['url'],
'title' => $r['title'],
'encrypted_content' => str_repeat('LONG_PAGE_TEXT_', 200),
'page_age' => '1 day ago',
]],
];
}
return ['stop_reason' => 'end_turn', 'content' => $content];
}
/** @param array<int, array<string, mixed>> $events */
function fakeSubmitTurn(string $direction, int $confidence, array $events, bool $major = false): array
{
$input = [
'direction' => $direction,
'confidence' => $confidence,
'reasoning_short' => 'Test reasoning.',
'agrees_with_ridge' => true,
'major_impact_event' => $major,
];
if ($events !== []) {
$input['events_cited'] = $events;
}
return [
'stop_reason' => 'tool_use',
'content' => [[
'type' => 'tool_use',
'id' => 'toolu_submit',
'name' => 'submit_overlay',
'input' => $input,
]],
];
}
it('skips when ANTHROPIC_API_KEY is not set', function (): void {
Config::set('services.anthropic.api_key', null);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
expect($service->run())->toBeNull();
});
it('rejects only when neither web search nor model cited anything', function (): void {
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'no results']]])
->push(fakeSubmitTurn('rising', 60, [])),
'*' => Http::response('', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
expect($service->run())->toBeNull()
->and(LlmOverlay::query()->count())->toBe(0);
});
it('verifies a URL via GET fallback when HEAD returns 405', function (): void {
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
->push(fakeSubmitTurn('rising', 60, [
['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/x', 'impact' => 'rising'],
])),
'reuters.com/*' => Http::sequence()
->push('', 405)
->push('partial-body', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
$row = $service->run();
expect($row)->not->toBeNull()
->and($row->events_json)->toHaveCount(1);
});
it('rejects the overlay when every cited URL is unreachable', function (): void {
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
->push(fakeSubmitTurn('rising', 60, [
['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'],
])),
'example.com/*' => Http::response('', 404),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
expect($service->run())->toBeNull()
->and(LlmOverlay::query()->count())->toBe(0);
});
it('persists an overlay row with verified citations and capped confidence', function (): void {
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
->push(fakeSubmitTurn('rising', 95, [
['headline' => 'OPEC cuts output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
], major: true)),
'*' => Http::response('', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
$row = $service->run();
expect($row)->not->toBeNull()
->and($row->direction)->toBe('rising')
->and($row->confidence)->toBe(75)
->and($row->major_impact_event)->toBeTrue()
->and($row->search_used)->toBeTrue()
->and($row->events_json)->toHaveCount(1);
});
it('harvests citations from web_search_tool_result when the model omits events_cited', function (): void {
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push(fakeSearchResultsTurn([
['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'],
['url' => 'https://bloomberg.com/iran', 'title' => 'Iran tensions'],
]))
->push(fakeSubmitTurn('rising', 70, [])),
'*' => Http::response('', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
$row = $service->run();
expect($row)->not->toBeNull()
->and($row->events_json)->toHaveCount(2)
->and(collect($row->events_json)->pluck('url')->all())
->toEqualCanonicalizing(['https://reuters.com/opec', 'https://bloomberg.com/iran'])
->and(collect($row->events_json)->pluck('impact')->unique()->all())
->toBe(['neutral']);
});
it('merges model events_cited with harvested URLs deduped by URL', function (): void {
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push(fakeSearchResultsTurn([
['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'],
['url' => 'https://bloomberg.com/iran', 'title' => 'Iran tensions'],
]))
->push(fakeSubmitTurn('rising', 70, [
['headline' => 'OPEC slashes output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
['headline' => 'Refinery fire', 'source' => 'CNBC', 'url' => 'https://cnbc.com/refinery', 'impact' => 'rising'],
])),
'*' => Http::response('', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
$row = $service->run();
expect($row)->not->toBeNull()
->and(collect($row->events_json)->pluck('url')->all())
->toEqualCanonicalizing([
'https://reuters.com/opec',
'https://bloomberg.com/iran',
'https://cnbc.com/refinery',
]);
$opec = collect($row->events_json)->firstWhere('url', 'https://reuters.com/opec');
expect($opec['impact'])->toBe('rising')
->and($opec['headline'])->toBe('OPEC slashes output');
$bloomberg = collect($row->events_json)->firstWhere('url', 'https://bloomberg.com/iran');
expect($bloomberg['impact'])->toBe('neutral');
});
it('does not resend Phase 1 web_search_tool_result blocks on the submit call', function (): void {
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push(fakeSearchResultsTurn([
['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'],
]))
->push(fakeSubmitTurn('rising', 70, [
['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
])),
'*' => Http::response('', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
$service->run();
$anthropicRequests = collect(Http::recorded())
->filter(fn (array $pair): bool => str_contains($pair[0]->url(), 'api.anthropic.com'))
->values();
expect($anthropicRequests)->toHaveCount(2);
$submitBody = $anthropicRequests[1][0]->data();
$messagesJson = json_encode($submitBody['messages'], JSON_UNESCAPED_SLASHES);
expect($submitBody['messages'])->toHaveCount(1)
->and($submitBody['messages'][0]['role'])->toBe('user');
expect($messagesJson)->not->toContain('web_search_tool_result')
->and($messagesJson)->not->toContain('LONG_PAGE_TEXT_')
->and($messagesJson)->not->toContain('server_tool_use')
->and($messagesJson)->toContain('https://reuters.com/opec');
});
it('honors the 4-hour cooldown for event-driven calls', function (): void {
Carbon::setTestNow('2026-05-01 10:00:00');
DB::table('llm_overlays')->insert([
'ran_at' => Carbon::parse('2026-05-01 08:00:00'),
'forecast_for_week' => '2026-05-04',
'direction' => 'rising',
'confidence' => 60,
'reasoning' => 'prior',
'events_json' => json_encode([['headline' => 'x', 'url' => 'https://reuters.com/x']]),
'agrees_with_ridge' => true,
'major_impact_event' => false,
'volatility_flag_on' => false,
'search_used' => true,
'created_at' => now(),
'updated_at' => now(),
]);
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
->push(fakeSubmitTurn('falling', 40, [
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
])),
'*' => Http::response('', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
expect($service->run(eventDriven: true))->toBeNull()
->and(LlmOverlay::query()->count())->toBe(1);
Carbon::setTestNow();
});
it('always runs (ignores cooldown) when not event-driven', function (): void {
Carbon::setTestNow('2026-05-01 10:00:00');
DB::table('llm_overlays')->insert([
'ran_at' => Carbon::parse('2026-05-01 08:00:00'),
'forecast_for_week' => '2026-05-04',
'direction' => 'rising',
'confidence' => 60,
'reasoning' => 'prior',
'events_json' => json_encode([['headline' => 'x', 'url' => 'https://reuters.com/x']]),
'agrees_with_ridge' => true,
'major_impact_event' => false,
'volatility_flag_on' => false,
'search_used' => true,
'created_at' => now(),
'updated_at' => now(),
]);
Http::fake([
'*api.anthropic.com/*' => Http::sequence()
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
->push(fakeSubmitTurn('falling', 40, [
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
])),
'*' => Http::response('', 200),
]);
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
expect($service->run())->not->toBeNull()
->and(LlmOverlay::query()->count())->toBe(2);
Carbon::setTestNow();
});