feat(forecasting): build calibrated weekly forecast stack with LLM overlay and volatility detector
Replaces the implementation behind NationalFuelPredictionService — the public JSON contract on /api/stations is preserved, but the engine is new and honest. Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md): 1. Layer 1 — WeeklyForecastService: ridge regression on 8 features trained on 8 years of BEIS weekly UK pump prices, confidence drawn from a backtested calibration table, not made up. 2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over station_prices_current. Never speaks about the future. 3. Layer 3 — verdict via rule gates, not confidence multipliers. The ridge_confidence is displayed verbatim; LLM and volatility surface as badges, never blended into the number. 4. Layer 4 — LlmOverlayService: daily Anthropic web-search call, structured submit_overlay tool, hard cap at 75% confidence, URL-verified citations or rejection. 5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the active flag, OR-combined triggers (Brent move >3%, LLM major impact, station churn (gated), watched_events). Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the 8x8 normal-equation matrix. No external ML dependency. Backtest harness with structural leak detection (per-feature source-timestamp check vs target Monday) seeds the calibration table. Backtest gate (62–68% directional accuracy on the 130-week hold-out) ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change baseline by ~30pp on real data. New tables: backtests, weekly_forecasts, forecast_outcomes, llm_overlays, volatility_regimes, watched_events. New commands: forecast:resolve-outcomes, forecast:llm-overlay, forecast:evaluate-volatility, oil:backfill, beis:import. Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK, forecast:evaluate-volatility hourly, beis:import Mon 09:30, forecast:resolve-outcomes Mon 10:00. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
248
tests/Unit/Services/Forecasting/LlmOverlayServiceTest.php
Normal file
248
tests/Unit/Services/Forecasting/LlmOverlayServiceTest.php
Normal file
@@ -0,0 +1,248 @@
|
||||
<?php
|
||||
|
||||
use App\Models\LlmOverlay;
|
||||
use App\Services\ApiLogger;
|
||||
use App\Services\Forecasting\LlmOverlayService;
|
||||
use App\Services\Forecasting\WeeklyForecastService;
|
||||
use Carbon\Carbon;
|
||||
use Illuminate\Foundation\Testing\RefreshDatabase;
|
||||
use Illuminate\Support\Facades\Cache;
|
||||
use Illuminate\Support\Facades\Config;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
use Illuminate\Support\Facades\Http;
|
||||
|
||||
uses(RefreshDatabase::class);
|
||||
|
||||
beforeEach(function (): void {
|
||||
Cache::flush();
|
||||
Config::set('services.anthropic.api_key', 'test-key');
|
||||
});
|
||||
|
||||
function fakeAnthropicWithOverlay(string $direction, int $confidence, array $events, bool $major = false): void
|
||||
{
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push([
|
||||
'stop_reason' => 'end_turn',
|
||||
'content' => [['type' => 'text', 'text' => 'Search summary.']],
|
||||
])
|
||||
->push([
|
||||
'stop_reason' => 'tool_use',
|
||||
'content' => [[
|
||||
'type' => 'tool_use',
|
||||
'name' => 'submit_overlay',
|
||||
'input' => [
|
||||
'direction' => $direction,
|
||||
'confidence' => $confidence,
|
||||
'reasoning_short' => 'Test reasoning.',
|
||||
'events_cited' => $events,
|
||||
'agrees_with_ridge' => true,
|
||||
'major_impact_event' => $major,
|
||||
],
|
||||
]],
|
||||
]),
|
||||
// URL HEAD verification probes — accept everything by default
|
||||
'*' => Http::response('', 200),
|
||||
]);
|
||||
}
|
||||
|
||||
it('skips when ANTHROPIC_API_KEY is not set', function (): void {
|
||||
Config::set('services.anthropic.api_key', null);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
|
||||
expect($service->run())->toBeNull();
|
||||
});
|
||||
|
||||
it('rejects the overlay when no events are cited', function (): void {
|
||||
fakeAnthropicWithOverlay('rising', 60, []);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
|
||||
expect($service->run())->toBeNull()
|
||||
->and(LlmOverlay::query()->count())->toBe(0);
|
||||
});
|
||||
|
||||
it('verifies a URL via GET fallback when HEAD returns 405', function (): void {
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push([
|
||||
'stop_reason' => 'end_turn',
|
||||
'content' => [['type' => 'text', 'text' => 'ok']],
|
||||
])
|
||||
->push([
|
||||
'stop_reason' => 'tool_use',
|
||||
'content' => [[
|
||||
'type' => 'tool_use',
|
||||
'name' => 'submit_overlay',
|
||||
'input' => [
|
||||
'direction' => 'rising',
|
||||
'confidence' => 60,
|
||||
'reasoning_short' => 'Hostile-to-HEAD source.',
|
||||
'events_cited' => [
|
||||
['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/x', 'impact' => 'rising'],
|
||||
],
|
||||
'agrees_with_ridge' => true,
|
||||
'major_impact_event' => false,
|
||||
],
|
||||
]],
|
||||
]),
|
||||
'reuters.com/*' => Http::sequence()
|
||||
->push('', 405) // HEAD → 405 Method Not Allowed
|
||||
->push('partial-body', 200), // GET fallback succeeds
|
||||
]);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
$row = $service->run();
|
||||
|
||||
expect($row)->not->toBeNull()
|
||||
->and($row->events_json)->toHaveCount(1);
|
||||
});
|
||||
|
||||
it('rejects the overlay when both HEAD and GET fail', function (): void {
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push([
|
||||
'stop_reason' => 'end_turn',
|
||||
'content' => [['type' => 'text', 'text' => 'ok']],
|
||||
])
|
||||
->push([
|
||||
'stop_reason' => 'tool_use',
|
||||
'content' => [[
|
||||
'type' => 'tool_use',
|
||||
'name' => 'submit_overlay',
|
||||
'input' => [
|
||||
'direction' => 'rising',
|
||||
'confidence' => 60,
|
||||
'reasoning_short' => 'Truly dead URL.',
|
||||
'events_cited' => [
|
||||
['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'],
|
||||
],
|
||||
'agrees_with_ridge' => true,
|
||||
'major_impact_event' => false,
|
||||
],
|
||||
]],
|
||||
]),
|
||||
'example.com/*' => Http::sequence()
|
||||
->push('', 404) // HEAD → 404
|
||||
->push('', 404), // GET → still 404
|
||||
]);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
|
||||
expect($service->run())->toBeNull()
|
||||
->and(LlmOverlay::query()->count())->toBe(0);
|
||||
});
|
||||
|
||||
it('rejects the overlay when every cited URL is unreachable', function (): void {
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push([
|
||||
'stop_reason' => 'end_turn',
|
||||
'content' => [['type' => 'text', 'text' => 'ok']],
|
||||
])
|
||||
->push([
|
||||
'stop_reason' => 'tool_use',
|
||||
'content' => [[
|
||||
'type' => 'tool_use',
|
||||
'name' => 'submit_overlay',
|
||||
'input' => [
|
||||
'direction' => 'rising',
|
||||
'confidence' => 60,
|
||||
'reasoning_short' => 'Test.',
|
||||
'events_cited' => [
|
||||
['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'],
|
||||
],
|
||||
'agrees_with_ridge' => true,
|
||||
'major_impact_event' => false,
|
||||
],
|
||||
]],
|
||||
]),
|
||||
'example.com/*' => Http::response('', 404),
|
||||
]);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
|
||||
expect($service->run())->toBeNull()
|
||||
->and(LlmOverlay::query()->count())->toBe(0);
|
||||
});
|
||||
|
||||
it('persists an overlay row with verified citations and capped confidence', function (): void {
|
||||
fakeAnthropicWithOverlay(
|
||||
direction: 'rising',
|
||||
confidence: 95, // above cap → expect capped to 75
|
||||
events: [
|
||||
['headline' => 'OPEC cuts output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
|
||||
],
|
||||
major: true,
|
||||
);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
|
||||
$row = $service->run();
|
||||
|
||||
expect($row)->not->toBeNull()
|
||||
->and($row->direction)->toBe('rising')
|
||||
->and($row->confidence)->toBe(75) // capped
|
||||
->and($row->major_impact_event)->toBeTrue()
|
||||
->and($row->search_used)->toBeTrue()
|
||||
->and($row->events_json)->toHaveCount(1);
|
||||
});
|
||||
|
||||
it('honors the 4-hour cooldown for event-driven calls', function (): void {
|
||||
Carbon::setTestNow('2026-05-01 10:00:00');
|
||||
DB::table('llm_overlays')->insert([
|
||||
'ran_at' => Carbon::parse('2026-05-01 08:00:00'),
|
||||
'forecast_for_week' => '2026-05-04',
|
||||
'direction' => 'rising',
|
||||
'confidence' => 60,
|
||||
'reasoning' => 'prior',
|
||||
'events_json' => json_encode([['headline' => 'x', 'url' => 'https://reuters.com/x']]),
|
||||
'agrees_with_ridge' => true,
|
||||
'major_impact_event' => false,
|
||||
'volatility_flag_on' => false,
|
||||
'search_used' => true,
|
||||
'created_at' => now(),
|
||||
'updated_at' => now(),
|
||||
]);
|
||||
|
||||
fakeAnthropicWithOverlay('falling', 40, [
|
||||
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
|
||||
]);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
|
||||
expect($service->run(eventDriven: true))->toBeNull() // <4h since prior
|
||||
->and(LlmOverlay::query()->count())->toBe(1); // no new row inserted
|
||||
|
||||
Carbon::setTestNow();
|
||||
});
|
||||
|
||||
it('always runs (ignores cooldown) when not event-driven', function (): void {
|
||||
Carbon::setTestNow('2026-05-01 10:00:00');
|
||||
DB::table('llm_overlays')->insert([
|
||||
'ran_at' => Carbon::parse('2026-05-01 08:00:00'),
|
||||
'forecast_for_week' => '2026-05-04',
|
||||
'direction' => 'rising',
|
||||
'confidence' => 60,
|
||||
'reasoning' => 'prior',
|
||||
'events_json' => json_encode([['headline' => 'x', 'url' => 'https://reuters.com/x']]),
|
||||
'agrees_with_ridge' => true,
|
||||
'major_impact_event' => false,
|
||||
'volatility_flag_on' => false,
|
||||
'search_used' => true,
|
||||
'created_at' => now(),
|
||||
'updated_at' => now(),
|
||||
]);
|
||||
|
||||
fakeAnthropicWithOverlay('falling', 40, [
|
||||
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
|
||||
]);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
|
||||
expect($service->run())->not->toBeNull()
|
||||
->and(LlmOverlay::query()->count())->toBe(2);
|
||||
|
||||
Carbon::setTestNow();
|
||||
});
|
||||
Reference in New Issue
Block a user