feat(forecasting): build calibrated weekly forecast stack with LLM overlay and volatility detector

Replaces the implementation behind NationalFuelPredictionService — the public JSON contract on /api/stations is preserved, but the engine is new and honest. Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md): 1. Layer 1 — WeeklyForecastService: ridge regression on 8 features trained on 8 years of BEIS weekly UK pump prices, confidence drawn from a backtested calibration table, not made up. 2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over station_prices_current. Never speaks about the future. 3. Layer 3 — verdict via rule gates, not confidence multipliers. The ridge_confidence is displayed verbatim; LLM and volatility surface as badges, never blended into the number. 4. Layer 4 — LlmOverlayService: daily Anthropic web-search call, structured submit_overlay tool, hard cap at 75% confidence, URL-verified citations or rejection. 5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the active flag, OR-combined triggers (Brent move >3%, LLM major impact, station churn (gated), watched_events). Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the 8x8 normal-equation matrix. No external ML dependency. Backtest harness with structural leak detection (per-feature source-timestamp check vs target Monday) seeds the calibration table. Backtest gate (62–68% directional accuracy on the 130-week hold-out) ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change baseline by ~30pp on real data. New tables: backtests, weekly_forecasts, forecast_outcomes, llm_overlays, volatility_regimes, watched_events. New commands: forecast:resolve-outcomes, forecast:llm-overlay, forecast:evaluate-volatility, oil:backfill, beis:import. Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK, forecast:evaluate-volatility hourly, beis:import Mon 09:30, forecast:resolve-outcomes Mon 10:00. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 08:40:05 +01:00
parent d13a29df01
commit ddd591ad47
63 changed files with 5109 additions and 13 deletions
--- a/app/Services/Forecasting/ReasoningGenerator.php
+++ b/app/Services/Forecasting/ReasoningGenerator.php
@@ -0,0 +1,103 @@
+<?php
+
+namespace App\Services\Forecasting;
+
+use App\Services\Forecasting\Contracts\ForecastFeature;
+use App\Services\Forecasting\Models\RidgeRegressionModel;
+use Carbon\CarbonInterface;
+
+/**
+ * Phase 6 honesty rule: the reasoning text only references features
+ * the model actually used, ranked by how much each contributed to
+ * this week's prediction.
+ *
+ * Contribution is the standardised (z-score × β) for each feature —
+ * the same number the ridge model summed to produce the prediction.
+ * That makes the explanation literally what the model did, not a
+ * narrative invented post-hoc.
+ */
+final class ReasoningGenerator
+{
+    /** @var array<string, string> */
+    private const array PHRASES = [
+        'delta_ulsp_lag_0' => "last week's pump price move",
+        'delta_ulsp_lag_1' => 'the pump price move two weeks ago',
+        'delta_ulsp_lag_3' => 'the pump price move four weeks ago',
+        'delta_ulsd_lag_0' => "last week's diesel move",
+        'ulsp_minus_ma8' => "the gap between this week's pump price and its 8-week average",
+        'week_of_year_sin' => 'the seasonal pattern',
+        'week_of_year_cos' => 'the seasonal pattern',
+        'is_pre_bank_holiday' => 'an upcoming bank holiday',
+    ];
+
+    /**
+     * @param  array<int, ForecastFeature>  $features
+     */
+    public function generate(
+        RidgeRegressionModel $model,
+        WeeklyPrediction $prediction,
+        array $features,
+        CarbonInterface $targetMonday,
+        int $confidence,
+        bool $flaggedDutyChange,
+        ?float $trailingHitRate,
+    ): string {
+        if ($confidence < 40) {
+            return 'Not enough signal in the historical pattern to call this week — staying silent.';
+        }
+
+        $coeffs = $model->coefficients() ?? [];
+        $features_meta = $coeffs['features'] ?? [];
+
+        $contributions = [];
+        foreach ($features as $f) {
+            $name = $f->name();
+            $meta = $features_meta[$name] ?? null;
+            if ($meta === null) {
+                continue;
+            }
+            $value = $f->valueFor($targetMonday);
+            if ($value === null) {
+                continue;
+            }
+            $z = ($value - $meta['mean']) / ($meta['std_dev'] ?: 1.0);
+            $contributions[$name] = $z * $meta['beta_standardised'];
+        }
+
+        $headline = $this->headline($prediction);
+        $driver = $this->dominantFeatureSentence($contributions);
+        $duty = $flaggedDutyChange
+            ? ' Recent fuel duty change may skew accuracy for the next several weeks.'
+            : '';
+        $accuracy = $trailingHitRate !== null
+            ? sprintf(' Last 13 weeks: %d%% hit rate.', (int) round($trailingHitRate * 100))
+            : '';
+
+        return $headline.' '.$driver.$duty.$accuracy;
+    }
+
+    private function headline(WeeklyPrediction $prediction): string
+    {
+        $absP = round(abs($prediction->magnitudePence) / 100, 1);
+
+        return match ($prediction->direction) {
+            'rising' => sprintf('Model expects pump prices to rise by ~%sp/L next week.', number_format($absP, 1)),
+            'falling' => sprintf('Model expects pump prices to fall by ~%sp/L next week.', number_format($absP, 1)),
+            default => 'Pump prices are likely flat next week.',
+        };
+    }
+
+    /** @param array<string, float> $contributions */
+    private function dominantFeatureSentence(array $contributions): string
+    {
+        if ($contributions === []) {
+            return 'Drawn from the full feature set with no single dominant signal.';
+        }
+
+        uasort($contributions, fn (float $a, float $b): int => abs($b) <=> abs($a));
+        $topName = array_key_first($contributions);
+        $phrase = self::PHRASES[$topName] ?? $topName;
+
+        return sprintf('Driver: %s.', $phrase);
+    }
+}