Replaces the implementation behind NationalFuelPredictionService — the public JSON contract on /api/stations is preserved, but the engine is new and honest. Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md): 1. Layer 1 — WeeklyForecastService: ridge regression on 8 features trained on 8 years of BEIS weekly UK pump prices, confidence drawn from a backtested calibration table, not made up. 2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over station_prices_current. Never speaks about the future. 3. Layer 3 — verdict via rule gates, not confidence multipliers. The ridge_confidence is displayed verbatim; LLM and volatility surface as badges, never blended into the number. 4. Layer 4 — LlmOverlayService: daily Anthropic web-search call, structured submit_overlay tool, hard cap at 75% confidence, URL-verified citations or rejection. 5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the active flag, OR-combined triggers (Brent move >3%, LLM major impact, station churn (gated), watched_events). Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the 8x8 normal-equation matrix. No external ML dependency. Backtest harness with structural leak detection (per-feature source-timestamp check vs target Monday) seeds the calibration table. Backtest gate (62–68% directional accuracy on the 130-week hold-out) ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change baseline by ~30pp on real data. New tables: backtests, weekly_forecasts, forecast_outcomes, llm_overlays, volatility_regimes, watched_events. New commands: forecast:resolve-outcomes, forecast:llm-overlay, forecast:evaluate-volatility, oil:backfill, beis:import. Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK, forecast:evaluate-volatility hourly, beis:import Mon 09:30, forecast:resolve-outcomes Mon 10:00. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
249 lines
9.0 KiB
PHP
249 lines
9.0 KiB
PHP
<?php
|
|
|
|
use App\Models\LlmOverlay;
|
|
use App\Services\ApiLogger;
|
|
use App\Services\Forecasting\LlmOverlayService;
|
|
use App\Services\Forecasting\WeeklyForecastService;
|
|
use Carbon\Carbon;
|
|
use Illuminate\Foundation\Testing\RefreshDatabase;
|
|
use Illuminate\Support\Facades\Cache;
|
|
use Illuminate\Support\Facades\Config;
|
|
use Illuminate\Support\Facades\DB;
|
|
use Illuminate\Support\Facades\Http;
|
|
|
|
uses(RefreshDatabase::class);
|
|
|
|
beforeEach(function (): void {
|
|
Cache::flush();
|
|
Config::set('services.anthropic.api_key', 'test-key');
|
|
});
|
|
|
|
function fakeAnthropicWithOverlay(string $direction, int $confidence, array $events, bool $major = false): void
|
|
{
|
|
Http::fake([
|
|
'*api.anthropic.com/*' => Http::sequence()
|
|
->push([
|
|
'stop_reason' => 'end_turn',
|
|
'content' => [['type' => 'text', 'text' => 'Search summary.']],
|
|
])
|
|
->push([
|
|
'stop_reason' => 'tool_use',
|
|
'content' => [[
|
|
'type' => 'tool_use',
|
|
'name' => 'submit_overlay',
|
|
'input' => [
|
|
'direction' => $direction,
|
|
'confidence' => $confidence,
|
|
'reasoning_short' => 'Test reasoning.',
|
|
'events_cited' => $events,
|
|
'agrees_with_ridge' => true,
|
|
'major_impact_event' => $major,
|
|
],
|
|
]],
|
|
]),
|
|
// URL HEAD verification probes — accept everything by default
|
|
'*' => Http::response('', 200),
|
|
]);
|
|
}
|
|
|
|
it('skips when ANTHROPIC_API_KEY is not set', function (): void {
|
|
Config::set('services.anthropic.api_key', null);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
expect($service->run())->toBeNull();
|
|
});
|
|
|
|
it('rejects the overlay when no events are cited', function (): void {
|
|
fakeAnthropicWithOverlay('rising', 60, []);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
expect($service->run())->toBeNull()
|
|
->and(LlmOverlay::query()->count())->toBe(0);
|
|
});
|
|
|
|
it('verifies a URL via GET fallback when HEAD returns 405', function (): void {
|
|
Http::fake([
|
|
'*api.anthropic.com/*' => Http::sequence()
|
|
->push([
|
|
'stop_reason' => 'end_turn',
|
|
'content' => [['type' => 'text', 'text' => 'ok']],
|
|
])
|
|
->push([
|
|
'stop_reason' => 'tool_use',
|
|
'content' => [[
|
|
'type' => 'tool_use',
|
|
'name' => 'submit_overlay',
|
|
'input' => [
|
|
'direction' => 'rising',
|
|
'confidence' => 60,
|
|
'reasoning_short' => 'Hostile-to-HEAD source.',
|
|
'events_cited' => [
|
|
['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/x', 'impact' => 'rising'],
|
|
],
|
|
'agrees_with_ridge' => true,
|
|
'major_impact_event' => false,
|
|
],
|
|
]],
|
|
]),
|
|
'reuters.com/*' => Http::sequence()
|
|
->push('', 405) // HEAD → 405 Method Not Allowed
|
|
->push('partial-body', 200), // GET fallback succeeds
|
|
]);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
$row = $service->run();
|
|
|
|
expect($row)->not->toBeNull()
|
|
->and($row->events_json)->toHaveCount(1);
|
|
});
|
|
|
|
it('rejects the overlay when both HEAD and GET fail', function (): void {
|
|
Http::fake([
|
|
'*api.anthropic.com/*' => Http::sequence()
|
|
->push([
|
|
'stop_reason' => 'end_turn',
|
|
'content' => [['type' => 'text', 'text' => 'ok']],
|
|
])
|
|
->push([
|
|
'stop_reason' => 'tool_use',
|
|
'content' => [[
|
|
'type' => 'tool_use',
|
|
'name' => 'submit_overlay',
|
|
'input' => [
|
|
'direction' => 'rising',
|
|
'confidence' => 60,
|
|
'reasoning_short' => 'Truly dead URL.',
|
|
'events_cited' => [
|
|
['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'],
|
|
],
|
|
'agrees_with_ridge' => true,
|
|
'major_impact_event' => false,
|
|
],
|
|
]],
|
|
]),
|
|
'example.com/*' => Http::sequence()
|
|
->push('', 404) // HEAD → 404
|
|
->push('', 404), // GET → still 404
|
|
]);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
expect($service->run())->toBeNull()
|
|
->and(LlmOverlay::query()->count())->toBe(0);
|
|
});
|
|
|
|
it('rejects the overlay when every cited URL is unreachable', function (): void {
|
|
Http::fake([
|
|
'*api.anthropic.com/*' => Http::sequence()
|
|
->push([
|
|
'stop_reason' => 'end_turn',
|
|
'content' => [['type' => 'text', 'text' => 'ok']],
|
|
])
|
|
->push([
|
|
'stop_reason' => 'tool_use',
|
|
'content' => [[
|
|
'type' => 'tool_use',
|
|
'name' => 'submit_overlay',
|
|
'input' => [
|
|
'direction' => 'rising',
|
|
'confidence' => 60,
|
|
'reasoning_short' => 'Test.',
|
|
'events_cited' => [
|
|
['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'],
|
|
],
|
|
'agrees_with_ridge' => true,
|
|
'major_impact_event' => false,
|
|
],
|
|
]],
|
|
]),
|
|
'example.com/*' => Http::response('', 404),
|
|
]);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
expect($service->run())->toBeNull()
|
|
->and(LlmOverlay::query()->count())->toBe(0);
|
|
});
|
|
|
|
it('persists an overlay row with verified citations and capped confidence', function (): void {
|
|
fakeAnthropicWithOverlay(
|
|
direction: 'rising',
|
|
confidence: 95, // above cap → expect capped to 75
|
|
events: [
|
|
['headline' => 'OPEC cuts output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
|
|
],
|
|
major: true,
|
|
);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
$row = $service->run();
|
|
|
|
expect($row)->not->toBeNull()
|
|
->and($row->direction)->toBe('rising')
|
|
->and($row->confidence)->toBe(75) // capped
|
|
->and($row->major_impact_event)->toBeTrue()
|
|
->and($row->search_used)->toBeTrue()
|
|
->and($row->events_json)->toHaveCount(1);
|
|
});
|
|
|
|
it('honors the 4-hour cooldown for event-driven calls', function (): void {
|
|
Carbon::setTestNow('2026-05-01 10:00:00');
|
|
DB::table('llm_overlays')->insert([
|
|
'ran_at' => Carbon::parse('2026-05-01 08:00:00'),
|
|
'forecast_for_week' => '2026-05-04',
|
|
'direction' => 'rising',
|
|
'confidence' => 60,
|
|
'reasoning' => 'prior',
|
|
'events_json' => json_encode([['headline' => 'x', 'url' => 'https://reuters.com/x']]),
|
|
'agrees_with_ridge' => true,
|
|
'major_impact_event' => false,
|
|
'volatility_flag_on' => false,
|
|
'search_used' => true,
|
|
'created_at' => now(),
|
|
'updated_at' => now(),
|
|
]);
|
|
|
|
fakeAnthropicWithOverlay('falling', 40, [
|
|
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
|
|
]);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
expect($service->run(eventDriven: true))->toBeNull() // <4h since prior
|
|
->and(LlmOverlay::query()->count())->toBe(1); // no new row inserted
|
|
|
|
Carbon::setTestNow();
|
|
});
|
|
|
|
it('always runs (ignores cooldown) when not event-driven', function (): void {
|
|
Carbon::setTestNow('2026-05-01 10:00:00');
|
|
DB::table('llm_overlays')->insert([
|
|
'ran_at' => Carbon::parse('2026-05-01 08:00:00'),
|
|
'forecast_for_week' => '2026-05-04',
|
|
'direction' => 'rising',
|
|
'confidence' => 60,
|
|
'reasoning' => 'prior',
|
|
'events_json' => json_encode([['headline' => 'x', 'url' => 'https://reuters.com/x']]),
|
|
'agrees_with_ridge' => true,
|
|
'major_impact_event' => false,
|
|
'volatility_flag_on' => false,
|
|
'search_used' => true,
|
|
'created_at' => now(),
|
|
'updated_at' => now(),
|
|
]);
|
|
|
|
fakeAnthropicWithOverlay('falling', 40, [
|
|
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
|
|
]);
|
|
|
|
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
|
|
|
expect($service->run())->not->toBeNull()
|
|
->and(LlmOverlay::query()->count())->toBe(2);
|
|
|
|
Carbon::setTestNow();
|
|
});
|