feat(forecasting): build calibrated weekly forecast stack with LLM overlay and volatility detector

Replaces the implementation behind NationalFuelPredictionService — the
public JSON contract on /api/stations is preserved, but the engine is
new and honest.

Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md):
1. Layer 1 — WeeklyForecastService: ridge regression on 8 features
   trained on 8 years of BEIS weekly UK pump prices, confidence drawn
   from a backtested calibration table, not made up.
2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over
   station_prices_current. Never speaks about the future.
3. Layer 3 — verdict via rule gates, not confidence multipliers. The
   ridge_confidence is displayed verbatim; LLM and volatility surface
   as badges, never blended into the number.
4. Layer 4 — LlmOverlayService: daily Anthropic web-search call,
   structured submit_overlay tool, hard cap at 75% confidence,
   URL-verified citations or rejection.
5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the
   active flag, OR-combined triggers (Brent move >3%, LLM major
   impact, station churn (gated), watched_events).

Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the
8x8 normal-equation matrix. No external ML dependency. Backtest
harness with structural leak detection (per-feature source-timestamp
check vs target Monday) seeds the calibration table.

Backtest gate (62–68% directional accuracy on the 130-week hold-out)
ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change
baseline by ~30pp on real data.

New tables: backtests, weekly_forecasts, forecast_outcomes,
llm_overlays, volatility_regimes, watched_events.

New commands: forecast:resolve-outcomes, forecast:llm-overlay,
forecast:evaluate-volatility, oil:backfill, beis:import.

Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK,
forecast:evaluate-volatility hourly, beis:import Mon 09:30,
forecast:resolve-outcomes Mon 10:00.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ovidiu U
2026-05-03 08:40:05 +01:00
parent d13a29df01
commit ddd591ad47
63 changed files with 5109 additions and 13 deletions

View File

@@ -0,0 +1,201 @@
<?php
namespace App\Services\Forecasting\Models;
use App\Services\Forecasting\Contracts\WeeklyForecastModel;
use App\Services\Forecasting\FeatureSpec;
use App\Services\Forecasting\LinearAlgebra;
use App\Services\Forecasting\WeeklyPrediction;
use App\Services\Forecasting\WeeklyPumpPriceLoader;
use Carbon\CarbonInterface;
use RuntimeException;
/**
* Ridge regression on weekly pump prices.
*
* Target: ΔULSP[t+1] = ULSP[t+1] ULSP[t], in pence × 100.
*
* Pipeline:
* - Build (X, y) from training Mondays. Skip any week where a feature
* value is null OR the actual ΔULSP cannot be computed.
* - Standardise X (z-score per column) and centre y. Keeps features
* on comparable scales so the L2 penalty is fair.
* - Solve β = (XᵀX + λI) ⁻¹ Xᵀy for the standardised problem.
* - Reconstruct intercept = mean(y) (since X is centred).
*
* Prediction:
* - Build feature vector at $targetMonday. If any feature returns
* null, predict 0 (treated as 'flat' downstream).
* - Standardise with the trained scaler, multiply by β, add intercept.
*
* Direction:
* - rising if magnitude > FLAT_THRESHOLD_PENCE_X100
* - falling if magnitude < FLAT_THRESHOLD_PENCE_X100
* - flat otherwise
*/
final class RidgeRegressionModel implements WeeklyForecastModel
{
private const float FLAT_THRESHOLD_PENCE_X100 = 20.0; // 0.2 p/L
/** @var array<int, float>|null Coefficients on standardised features (no intercept). */
private ?array $beta = null;
private ?float $intercept = null;
/** @var array<int, float>|null per-feature mean used for standardisation */
private ?array $featureMeans = null;
/** @var array<int, float>|null per-feature std-dev used for standardisation */
private ?array $featureStdDevs = null;
public function __construct(
private readonly FeatureSpec $spec,
private readonly WeeklyPumpPriceLoader $loader,
public readonly float $lambda = 1.0,
) {}
public function featureSpec(): FeatureSpec
{
return $this->spec;
}
public function train(array $trainingMondays): void
{
$X = [];
$y = [];
foreach ($trainingMondays as $monday) {
$row = [];
$skip = false;
foreach ($this->spec->features as $feature) {
$v = $feature->valueFor($monday);
if ($v === null) {
$skip = true;
break;
}
$row[] = $v;
}
if ($skip) {
continue;
}
$actual = $this->actualDeltaPence($monday);
if ($actual === null) {
continue;
}
$X[] = $row;
$y[] = $actual;
}
if (count($X) < count($this->spec->features) + 2) {
throw new RuntimeException('RidgeRegressionModel: insufficient training rows after dropping incomplete weeks');
}
// Standardise X (z-score) and centre y.
$featureCount = count($X[0]);
$means = array_fill(0, $featureCount, 0.0);
$stds = array_fill(0, $featureCount, 0.0);
$n = count($X);
for ($j = 0; $j < $featureCount; $j++) {
$col = array_column($X, $j);
$means[$j] = array_sum($col) / $n;
$variance = 0.0;
foreach ($col as $v) {
$variance += ($v - $means[$j]) ** 2;
}
$variance /= $n;
$stds[$j] = sqrt($variance);
// Constant features get sd=1 so we don't divide by zero. Their
// contribution is then a constant absorbed by the intercept.
if ($stds[$j] < 1e-12) {
$stds[$j] = 1.0;
}
}
$Xstd = [];
foreach ($X as $row) {
$r = [];
for ($j = 0; $j < $featureCount; $j++) {
$r[] = ($row[$j] - $means[$j]) / $stds[$j];
}
$Xstd[] = $r;
}
$yMean = array_sum($y) / $n;
$yCentred = array_map(fn (float $v): float => $v - $yMean, $y);
$this->beta = LinearAlgebra::ridgeSolve($Xstd, $yCentred, $this->lambda);
$this->intercept = $yMean;
$this->featureMeans = $means;
$this->featureStdDevs = $stds;
}
public function predict(CarbonInterface $targetMonday): WeeklyPrediction
{
if ($this->beta === null) {
throw new RuntimeException('RidgeRegressionModel: predict() called before train()');
}
$row = [];
foreach ($this->spec->features as $feature) {
$v = $feature->valueFor($targetMonday);
if ($v === null) {
return new WeeklyPrediction($targetMonday, 0.0, 'flat');
}
$row[] = $v;
}
$magnitude = $this->intercept;
for ($j = 0, $jc = count($row); $j < $jc; $j++) {
$z = ($row[$j] - $this->featureMeans[$j]) / $this->featureStdDevs[$j];
$magnitude += $z * $this->beta[$j];
}
return new WeeklyPrediction($targetMonday, $magnitude, $this->classifyDirection($magnitude));
}
public function coefficients(): ?array
{
if ($this->beta === null) {
return null;
}
$named = [];
foreach ($this->spec->features as $i => $feature) {
$named[$feature->name()] = [
'beta_standardised' => $this->beta[$i],
'mean' => $this->featureMeans[$i],
'std_dev' => $this->featureStdDevs[$i],
];
}
return [
'intercept' => $this->intercept,
'lambda' => $this->lambda,
'features' => $named,
];
}
private function actualDeltaPence(CarbonInterface $targetMonday): ?float
{
$current = $this->loader->ulspPence($targetMonday->toDateString());
$previous = $this->loader->ulspPence($targetMonday->copy()->subDays(7)->toDateString());
if ($current === null || $previous === null) {
return null;
}
return (float) ($current - $previous);
}
private function classifyDirection(float $magnitude): string
{
return match (true) {
$magnitude > self::FLAT_THRESHOLD_PENCE_X100 => 'rising',
$magnitude < -self::FLAT_THRESHOLD_PENCE_X100 => 'falling',
default => 'flat',
};
}
}