feat(forecasting): build calibrated weekly forecast stack with LLM overlay and volatility detector

Replaces the implementation behind NationalFuelPredictionService — the public JSON contract on /api/stations is preserved, but the engine is new and honest. Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md): 1. Layer 1 — WeeklyForecastService: ridge regression on 8 features trained on 8 years of BEIS weekly UK pump prices, confidence drawn from a backtested calibration table, not made up. 2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over station_prices_current. Never speaks about the future. 3. Layer 3 — verdict via rule gates, not confidence multipliers. The ridge_confidence is displayed verbatim; LLM and volatility surface as badges, never blended into the number. 4. Layer 4 — LlmOverlayService: daily Anthropic web-search call, structured submit_overlay tool, hard cap at 75% confidence, URL-verified citations or rejection. 5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the active flag, OR-combined triggers (Brent move >3%, LLM major impact, station churn (gated), watched_events). Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the 8x8 normal-equation matrix. No external ML dependency. Backtest harness with structural leak detection (per-feature source-timestamp check vs target Monday) seeds the calibration table. Backtest gate (62–68% directional accuracy on the 130-week hold-out) ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change baseline by ~30pp on real data. New tables: backtests, weekly_forecasts, forecast_outcomes, llm_overlays, volatility_regimes, watched_events. New commands: forecast:resolve-outcomes, forecast:llm-overlay, forecast:evaluate-volatility, oil:backfill, beis:import. Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK, forecast:evaluate-volatility hourly, beis:import Mon 09:30, forecast:resolve-outcomes Mon 10:00. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 08:40:05 +01:00
parent d13a29df01
commit ddd591ad47
63 changed files with 5109 additions and 13 deletions
--- a/app/Services/Forecasting/BacktestRunner.php
+++ b/app/Services/Forecasting/BacktestRunner.php
@@ -0,0 +1,162 @@
+<?php
+
+namespace App\Services\Forecasting;
+
+use App\Models\Backtest;
+use App\Services\Forecasting\Contracts\WeeklyForecastModel;
+use Carbon\CarbonInterface;
+use Illuminate\Support\Facades\DB;
+
+/**
+ * Runs a WeeklyForecastModel through a train/eval split and persists
+ * the result to the `backtests` table.
+ *
+ * Pipeline:
+ *   1. Generate the training and eval Monday lists from the date ranges.
+ *   2. Run LeakDetector against every Monday × every feature. Refuse to
+ *      train if any source date is on or after a target Monday.
+ *   3. Train the model.
+ *   4. For each eval Monday: predict, look up actual ΔULSP from
+ *      `weekly_pump_prices`, score directional accuracy + abs error.
+ *   5. Persist a Backtest row, return it.
+ *
+ * The `leak_suspected` flag is a *secondary* smell test (true when
+ * directional_accuracy > 75). Primary leak defence is step 2.
+ */
+final class BacktestRunner
+{
+    private const float FLAT_THRESHOLD_PENCE_X100 = 20.0; // 0.2 p/L
+
+    public function __construct(
+        private readonly LeakDetector $leakDetector = new LeakDetector,
+    ) {}
+
+    public function run(
+        WeeklyForecastModel $model,
+        CarbonInterface $trainStart,
+        CarbonInterface $trainEnd,
+        CarbonInterface $evalStart,
+        CarbonInterface $evalEnd,
+    ): Backtest {
+        $trainingMondays = $this->mondaysBetween($trainStart, $trainEnd);
+        $evalMondays = $this->mondaysBetween($evalStart, $evalEnd);
+
+        $spec = $model->featureSpec();
+        $report = $this->leakDetector->validate($spec, [...$trainingMondays, ...$evalMondays]);
+        if ($report->hasLeaks()) {
+            throw new LeakDetectorException($report);
+        }
+
+        $model->train($trainingMondays);
+
+        $correct = 0;
+        $totalScored = 0;
+        $absErrors = [];
+        $bins = [];
+
+        foreach ($evalMondays as $monday) {
+            $actualDelta = $this->actualDeltaPence($monday);
+            if ($actualDelta === null) {
+                continue;
+            }
+
+            $prediction = $model->predict($monday);
+            $actualDirection = $this->classifyDirection($actualDelta);
+            $hit = $prediction->direction === $actualDirection;
+
+            $totalScored++;
+            $absErrors[] = abs($prediction->magnitudePence - $actualDelta);
+            if ($hit) {
+                $correct++;
+            }
+
+            $bin = $this->bucketForMagnitude($prediction->magnitudePence);
+            $bins[$bin] ??= ['correct' => 0, 'total' => 0];
+            $bins[$bin]['total']++;
+            if ($hit) {
+                $bins[$bin]['correct']++;
+            }
+        }
+
+        $directionalAccuracy = $totalScored === 0
+            ? null
+            : round(($correct / $totalScored) * 100, 2);
+
+        $maePence = $absErrors === []
+            ? null
+            : round((array_sum($absErrors) / count($absErrors)) / 100, 2);
+
+        $calibrationTable = [];
+        foreach ($bins as $key => $b) {
+            $calibrationTable[$key] = round($b['correct'] / $b['total'], 4);
+        }
+
+        return Backtest::create([
+            'model_version' => $spec->modelVersion(),
+            'features_json' => $spec->toArray(),
+            'coefficients_json' => $model->coefficients(),
+            'train_start' => $trainStart->toDateString(),
+            'train_end' => $trainEnd->toDateString(),
+            'eval_start' => $evalStart->toDateString(),
+            'eval_end' => $evalEnd->toDateString(),
+            'directional_accuracy' => $directionalAccuracy,
+            'mae_pence' => $maePence,
+            'calibration_table' => $calibrationTable,
+            'leak_suspected' => $directionalAccuracy !== null && $directionalAccuracy > 75.0,
+            'ran_at' => now(),
+        ]);
+    }
+
+    /** @return array<int, CarbonInterface> */
+    private function mondaysBetween(CarbonInterface $start, CarbonInterface $end): array
+    {
+        $mondays = [];
+        $cursor = $start->copy()->startOfDay();
+        $boundary = $end->copy()->startOfDay();
+
+        while ($cursor->lessThanOrEqualTo($boundary)) {
+            if ($cursor->dayOfWeek === CarbonInterface::MONDAY) {
+                $mondays[] = $cursor->copy();
+            }
+            $cursor = $cursor->addDay();
+        }
+
+        return $mondays;
+    }
+
+    private function actualDeltaPence(CarbonInterface $targetMonday): ?float
+    {
+        $current = DB::table('weekly_pump_prices')
+            ->where('date', $targetMonday->toDateString())
+            ->value('ulsp_pence');
+        $previous = DB::table('weekly_pump_prices')
+            ->where('date', $targetMonday->copy()->subDays(7)->toDateString())
+            ->value('ulsp_pence');
+
+        if ($current === null || $previous === null) {
+            return null;
+        }
+
+        return (float) ($current - $previous);
+    }
+
+    private function classifyDirection(float $deltaPence): string
+    {
+        return match (true) {
+            $deltaPence > self::FLAT_THRESHOLD_PENCE_X100 => 'rising',
+            $deltaPence < -self::FLAT_THRESHOLD_PENCE_X100 => 'falling',
+            default => 'flat',
+        };
+    }
+
+    private function bucketForMagnitude(float $magnitudePence): string
+    {
+        $abs = abs($magnitudePence);
+
+        return match (true) {
+            $abs < 50.0 => '0.0-0.5p',
+            $abs < 100.0 => '0.5-1.0p',
+            default => '1.0p+',
+        };
+    }
+}