Files
fuel-alert/app/Services/Forecasting/Models/RidgeRegressionModel.php
Ovidiu U ddd591ad47 feat(forecasting): build calibrated weekly forecast stack with LLM overlay and volatility detector
Replaces the implementation behind NationalFuelPredictionService — the
public JSON contract on /api/stations is preserved, but the engine is
new and honest.

Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md):
1. Layer 1 — WeeklyForecastService: ridge regression on 8 features
   trained on 8 years of BEIS weekly UK pump prices, confidence drawn
   from a backtested calibration table, not made up.
2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over
   station_prices_current. Never speaks about the future.
3. Layer 3 — verdict via rule gates, not confidence multipliers. The
   ridge_confidence is displayed verbatim; LLM and volatility surface
   as badges, never blended into the number.
4. Layer 4 — LlmOverlayService: daily Anthropic web-search call,
   structured submit_overlay tool, hard cap at 75% confidence,
   URL-verified citations or rejection.
5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the
   active flag, OR-combined triggers (Brent move >3%, LLM major
   impact, station churn (gated), watched_events).

Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the
8x8 normal-equation matrix. No external ML dependency. Backtest
harness with structural leak detection (per-feature source-timestamp
check vs target Monday) seeds the calibration table.

Backtest gate (62–68% directional accuracy on the 130-week hold-out)
ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change
baseline by ~30pp on real data.

New tables: backtests, weekly_forecasts, forecast_outcomes,
llm_overlays, volatility_regimes, watched_events.

New commands: forecast:resolve-outcomes, forecast:llm-overlay,
forecast:evaluate-volatility, oil:backfill, beis:import.

Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK,
forecast:evaluate-volatility hourly, beis:import Mon 09:30,
forecast:resolve-outcomes Mon 10:00.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 08:40:05 +01:00

202 lines
6.3 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
namespace App\Services\Forecasting\Models;
use App\Services\Forecasting\Contracts\WeeklyForecastModel;
use App\Services\Forecasting\FeatureSpec;
use App\Services\Forecasting\LinearAlgebra;
use App\Services\Forecasting\WeeklyPrediction;
use App\Services\Forecasting\WeeklyPumpPriceLoader;
use Carbon\CarbonInterface;
use RuntimeException;
/**
* Ridge regression on weekly pump prices.
*
* Target: ΔULSP[t+1] = ULSP[t+1] ULSP[t], in pence × 100.
*
* Pipeline:
* - Build (X, y) from training Mondays. Skip any week where a feature
* value is null OR the actual ΔULSP cannot be computed.
* - Standardise X (z-score per column) and centre y. Keeps features
* on comparable scales so the L2 penalty is fair.
* - Solve β = (XᵀX + λI) ⁻¹ Xᵀy for the standardised problem.
* - Reconstruct intercept = mean(y) (since X is centred).
*
* Prediction:
* - Build feature vector at $targetMonday. If any feature returns
* null, predict 0 (treated as 'flat' downstream).
* - Standardise with the trained scaler, multiply by β, add intercept.
*
* Direction:
* - rising if magnitude > FLAT_THRESHOLD_PENCE_X100
* - falling if magnitude < FLAT_THRESHOLD_PENCE_X100
* - flat otherwise
*/
final class RidgeRegressionModel implements WeeklyForecastModel
{
private const float FLAT_THRESHOLD_PENCE_X100 = 20.0; // 0.2 p/L
/** @var array<int, float>|null Coefficients on standardised features (no intercept). */
private ?array $beta = null;
private ?float $intercept = null;
/** @var array<int, float>|null per-feature mean used for standardisation */
private ?array $featureMeans = null;
/** @var array<int, float>|null per-feature std-dev used for standardisation */
private ?array $featureStdDevs = null;
public function __construct(
private readonly FeatureSpec $spec,
private readonly WeeklyPumpPriceLoader $loader,
public readonly float $lambda = 1.0,
) {}
public function featureSpec(): FeatureSpec
{
return $this->spec;
}
public function train(array $trainingMondays): void
{
$X = [];
$y = [];
foreach ($trainingMondays as $monday) {
$row = [];
$skip = false;
foreach ($this->spec->features as $feature) {
$v = $feature->valueFor($monday);
if ($v === null) {
$skip = true;
break;
}
$row[] = $v;
}
if ($skip) {
continue;
}
$actual = $this->actualDeltaPence($monday);
if ($actual === null) {
continue;
}
$X[] = $row;
$y[] = $actual;
}
if (count($X) < count($this->spec->features) + 2) {
throw new RuntimeException('RidgeRegressionModel: insufficient training rows after dropping incomplete weeks');
}
// Standardise X (z-score) and centre y.
$featureCount = count($X[0]);
$means = array_fill(0, $featureCount, 0.0);
$stds = array_fill(0, $featureCount, 0.0);
$n = count($X);
for ($j = 0; $j < $featureCount; $j++) {
$col = array_column($X, $j);
$means[$j] = array_sum($col) / $n;
$variance = 0.0;
foreach ($col as $v) {
$variance += ($v - $means[$j]) ** 2;
}
$variance /= $n;
$stds[$j] = sqrt($variance);
// Constant features get sd=1 so we don't divide by zero. Their
// contribution is then a constant absorbed by the intercept.
if ($stds[$j] < 1e-12) {
$stds[$j] = 1.0;
}
}
$Xstd = [];
foreach ($X as $row) {
$r = [];
for ($j = 0; $j < $featureCount; $j++) {
$r[] = ($row[$j] - $means[$j]) / $stds[$j];
}
$Xstd[] = $r;
}
$yMean = array_sum($y) / $n;
$yCentred = array_map(fn (float $v): float => $v - $yMean, $y);
$this->beta = LinearAlgebra::ridgeSolve($Xstd, $yCentred, $this->lambda);
$this->intercept = $yMean;
$this->featureMeans = $means;
$this->featureStdDevs = $stds;
}
public function predict(CarbonInterface $targetMonday): WeeklyPrediction
{
if ($this->beta === null) {
throw new RuntimeException('RidgeRegressionModel: predict() called before train()');
}
$row = [];
foreach ($this->spec->features as $feature) {
$v = $feature->valueFor($targetMonday);
if ($v === null) {
return new WeeklyPrediction($targetMonday, 0.0, 'flat');
}
$row[] = $v;
}
$magnitude = $this->intercept;
for ($j = 0, $jc = count($row); $j < $jc; $j++) {
$z = ($row[$j] - $this->featureMeans[$j]) / $this->featureStdDevs[$j];
$magnitude += $z * $this->beta[$j];
}
return new WeeklyPrediction($targetMonday, $magnitude, $this->classifyDirection($magnitude));
}
public function coefficients(): ?array
{
if ($this->beta === null) {
return null;
}
$named = [];
foreach ($this->spec->features as $i => $feature) {
$named[$feature->name()] = [
'beta_standardised' => $this->beta[$i],
'mean' => $this->featureMeans[$i],
'std_dev' => $this->featureStdDevs[$i],
];
}
return [
'intercept' => $this->intercept,
'lambda' => $this->lambda,
'features' => $named,
];
}
private function actualDeltaPence(CarbonInterface $targetMonday): ?float
{
$current = $this->loader->ulspPence($targetMonday->toDateString());
$previous = $this->loader->ulspPence($targetMonday->copy()->subDays(7)->toDateString());
if ($current === null || $previous === null) {
return null;
}
return (float) ($current - $previous);
}
private function classifyDirection(float $magnitude): string
{
return match (true) {
$magnitude > self::FLAT_THRESHOLD_PENCE_X100 => 'rising',
$magnitude < -self::FLAT_THRESHOLD_PENCE_X100 => 'falling',
default => 'flat',
};
}
}