feat(forecasting): build calibrated weekly forecast stack with LLM overlay and volatility detector

Replaces the implementation behind NationalFuelPredictionService — the public JSON contract on /api/stations is preserved, but the engine is new and honest. Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md): 1. Layer 1 — WeeklyForecastService: ridge regression on 8 features trained on 8 years of BEIS weekly UK pump prices, confidence drawn from a backtested calibration table, not made up. 2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over station_prices_current. Never speaks about the future. 3. Layer 3 — verdict via rule gates, not confidence multipliers. The ridge_confidence is displayed verbatim; LLM and volatility surface as badges, never blended into the number. 4. Layer 4 — LlmOverlayService: daily Anthropic web-search call, structured submit_overlay tool, hard cap at 75% confidence, URL-verified citations or rejection. 5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the active flag, OR-combined triggers (Brent move >3%, LLM major impact, station churn (gated), watched_events). Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the 8x8 normal-equation matrix. No external ML dependency. Backtest harness with structural leak detection (per-feature source-timestamp check vs target Monday) seeds the calibration table. Backtest gate (62–68% directional accuracy on the 130-week hold-out) ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change baseline by ~30pp on real data. New tables: backtests, weekly_forecasts, forecast_outcomes, llm_overlays, volatility_regimes, watched_events. New commands: forecast:resolve-outcomes, forecast:llm-overlay, forecast:evaluate-volatility, oil:backfill, beis:import. Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK, forecast:evaluate-volatility hourly, beis:import Mon 09:30, forecast:resolve-outcomes Mon 10:00. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 08:40:05 +01:00
parent d13a29df01
commit ddd591ad47
63 changed files with 5109 additions and 13 deletions
--- a/tests/Unit/Services/Forecasting/Models/RidgeRegressionModelTest.php
+++ b/tests/Unit/Services/Forecasting/Models/RidgeRegressionModelTest.php
@@ -0,0 +1,152 @@
+<?php
+
+use App\Services\Forecasting\BacktestRunner;
+use App\Services\Forecasting\Features\DeltaUlspLag;
+use App\Services\Forecasting\Features\UlspMinusMa8;
+use App\Services\Forecasting\FeatureSpec;
+use App\Services\Forecasting\Models\NaiveZeroChangeModel;
+use App\Services\Forecasting\Models\RidgeRegressionModel;
+use App\Services\Forecasting\WeeklyPumpPriceLoader;
+use Carbon\Carbon;
+use Illuminate\Foundation\Testing\RefreshDatabase;
+use Illuminate\Support\Facades\DB;
+
+uses(RefreshDatabase::class);
+
+function seedRidgeFixture(int $weeks = 30): void
+{
+    // Synthetic sequence with strong autocorrelation: each week's ULSP
+    // tracks last week's change. Ridge should pick this up.
+    $start = Carbon::parse('2024-01-01');
+    $price = 14000;
+    $lastDelta = 0;
+    for ($i = 0; $i < $weeks; $i++) {
+        // Persistent momentum: this week ≈ last week's delta + small noise.
+        if ($i === 0) {
+            $delta = 50;
+        } else {
+            $delta = (int) round($lastDelta * 0.8 + 10); // mild reversion + drift
+        }
+        $price += $delta;
+        $lastDelta = $delta;
+        DB::table('weekly_pump_prices')->insert([
+            'date' => $start->copy()->addWeeks($i)->toDateString(),
+            'ulsp_pence' => $price,
+            'ulsd_pence' => $price + 800,
+            'ulsp_duty_pence' => 5295,
+            'ulsd_duty_pence' => 5295,
+            'ulsp_vat_pct' => 20,
+            'ulsd_vat_pct' => 20,
+        ]);
+    }
+}
+
+it('train + predict produces a non-zero, finite magnitude', function () {
+    seedRidgeFixture(30);
+    $loader = new WeeklyPumpPriceLoader;
+    $model = new RidgeRegressionModel(
+        spec: new FeatureSpec('ridge-test', [
+            new DeltaUlspLag($loader, lag: 0),
+            new DeltaUlspLag($loader, lag: 1),
+            new UlspMinusMa8($loader),
+        ]),
+        loader: $loader,
+        lambda: 1.0,
+    );
+
+    $training = collect(range(0, 20))->map(fn (int $i): Carbon => Carbon::parse('2024-01-01')->addWeeks($i))->all();
+    $model->train($training);
+
+    $prediction = $model->predict(Carbon::parse('2024-06-03'));
+    expect(is_finite($prediction->magnitudePence))->toBeTrue()
+        ->and($prediction->direction)->toBeIn(['rising', 'falling', 'flat']);
+});
+
+it('coefficients() returns a structured payload after training', function () {
+    seedRidgeFixture(30);
+    $loader = new WeeklyPumpPriceLoader;
+    $features = [
+        new DeltaUlspLag($loader, lag: 0),
+        new DeltaUlspLag($loader, lag: 1),
+    ];
+    $model = new RidgeRegressionModel(
+        spec: new FeatureSpec('ridge-test', $features),
+        loader: $loader,
+        lambda: 1.0,
+    );
+
+    $training = collect(range(0, 20))->map(fn (int $i): Carbon => Carbon::parse('2024-01-01')->addWeeks($i))->all();
+    $model->train($training);
+
+    $c = $model->coefficients();
+    expect($c)->toHaveKey('intercept')
+        ->and($c)->toHaveKey('lambda')
+        ->and($c['lambda'])->toBe(1.0)
+        ->and($c['features'])->toHaveKey('delta_ulsp_lag_0')
+        ->and($c['features']['delta_ulsp_lag_0'])->toHaveKey('beta_standardised')
+        ->and($c['features']['delta_ulsp_lag_0'])->toHaveKey('mean')
+        ->and($c['features']['delta_ulsp_lag_0'])->toHaveKey('std_dev');
+});
+
+it('throws when predict is called before train', function () {
+    $loader = new WeeklyPumpPriceLoader;
+    $model = new RidgeRegressionModel(
+        spec: new FeatureSpec('ridge-test', [new DeltaUlspLag($loader, lag: 0)]),
+        loader: $loader,
+        lambda: 1.0,
+    );
+    $model->predict(Carbon::parse('2024-06-03'));
+})->throws(RuntimeException::class);
+
+it('throws when training data is too thin to fit the model', function () {
+    seedRidgeFixture(8); // not enough training rows after losing first 8 weeks to lags
+    $loader = new WeeklyPumpPriceLoader;
+    $model = new RidgeRegressionModel(
+        spec: new FeatureSpec('ridge-test', [
+            new DeltaUlspLag($loader, lag: 3),
+            new UlspMinusMa8($loader),
+        ]),
+        loader: $loader,
+        lambda: 1.0,
+    );
+
+    $training = collect(range(0, 4))->map(fn (int $i): Carbon => Carbon::parse('2024-01-01')->addWeeks($i))->all();
+    $model->train($training);
+})->throws(RuntimeException::class);
+
+it('beats the naive zero-change baseline on the synthetic fixture', function () {
+    seedRidgeFixture(30);
+    $loader = new WeeklyPumpPriceLoader;
+
+    $features = [
+        new DeltaUlspLag($loader, lag: 0),
+        new UlspMinusMa8($loader),
+    ];
+    $ridge = new RidgeRegressionModel(
+        spec: new FeatureSpec('ridge-test', $features),
+        loader: $loader,
+        lambda: 1.0,
+    );
+    $naive = new NaiveZeroChangeModel;
+
+    $runner = new BacktestRunner;
+
+    $ridgeResult = $runner->run(
+        $ridge,
+        trainStart: Carbon::parse('2024-01-01'),
+        trainEnd: Carbon::parse('2024-04-29'),
+        evalStart: Carbon::parse('2024-05-06'),
+        evalEnd: Carbon::parse('2024-07-22'),
+    );
+
+    $naiveResult = $runner->run(
+        $naive,
+        trainStart: Carbon::parse('2024-01-01'),
+        trainEnd: Carbon::parse('2024-04-29'),
+        evalStart: Carbon::parse('2024-05-06'),
+        evalEnd: Carbon::parse('2024-07-22'),
+    );
+
+    expect((float) $ridgeResult->mae_pence)
+        ->toBeLessThan((float) $naiveResult->mae_pence);
+});