Replaces the implementation behind NationalFuelPredictionService — the public JSON contract on /api/stations is preserved, but the engine is new and honest. Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md): 1. Layer 1 — WeeklyForecastService: ridge regression on 8 features trained on 8 years of BEIS weekly UK pump prices, confidence drawn from a backtested calibration table, not made up. 2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over station_prices_current. Never speaks about the future. 3. Layer 3 — verdict via rule gates, not confidence multipliers. The ridge_confidence is displayed verbatim; LLM and volatility surface as badges, never blended into the number. 4. Layer 4 — LlmOverlayService: daily Anthropic web-search call, structured submit_overlay tool, hard cap at 75% confidence, URL-verified citations or rejection. 5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the active flag, OR-combined triggers (Brent move >3%, LLM major impact, station churn (gated), watched_events). Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the 8x8 normal-equation matrix. No external ML dependency. Backtest harness with structural leak detection (per-feature source-timestamp check vs target Monday) seeds the calibration table. Backtest gate (62–68% directional accuracy on the 130-week hold-out) ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change baseline by ~30pp on real data. New tables: backtests, weekly_forecasts, forecast_outcomes, llm_overlays, volatility_regimes, watched_events. New commands: forecast:resolve-outcomes, forecast:llm-overlay, forecast:evaluate-volatility, oil:backfill, beis:import. Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK, forecast:evaluate-volatility hourly, beis:import Mon 09:30, forecast:resolve-outcomes Mon 10:00. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
202 lines
6.3 KiB
PHP
202 lines
6.3 KiB
PHP
<?php
|
||
|
||
namespace App\Services\Forecasting\Models;
|
||
|
||
use App\Services\Forecasting\Contracts\WeeklyForecastModel;
|
||
use App\Services\Forecasting\FeatureSpec;
|
||
use App\Services\Forecasting\LinearAlgebra;
|
||
use App\Services\Forecasting\WeeklyPrediction;
|
||
use App\Services\Forecasting\WeeklyPumpPriceLoader;
|
||
use Carbon\CarbonInterface;
|
||
use RuntimeException;
|
||
|
||
/**
|
||
* Ridge regression on weekly pump prices.
|
||
*
|
||
* Target: ΔULSP[t+1] = ULSP[t+1] − ULSP[t], in pence × 100.
|
||
*
|
||
* Pipeline:
|
||
* - Build (X, y) from training Mondays. Skip any week where a feature
|
||
* value is null OR the actual ΔULSP cannot be computed.
|
||
* - Standardise X (z-score per column) and centre y. Keeps features
|
||
* on comparable scales so the L2 penalty is fair.
|
||
* - Solve β = (XᵀX + λI) ⁻¹ Xᵀy for the standardised problem.
|
||
* - Reconstruct intercept = mean(y) (since X is centred).
|
||
*
|
||
* Prediction:
|
||
* - Build feature vector at $targetMonday. If any feature returns
|
||
* null, predict 0 (treated as 'flat' downstream).
|
||
* - Standardise with the trained scaler, multiply by β, add intercept.
|
||
*
|
||
* Direction:
|
||
* - rising if magnitude > FLAT_THRESHOLD_PENCE_X100
|
||
* - falling if magnitude < −FLAT_THRESHOLD_PENCE_X100
|
||
* - flat otherwise
|
||
*/
|
||
final class RidgeRegressionModel implements WeeklyForecastModel
|
||
{
|
||
private const float FLAT_THRESHOLD_PENCE_X100 = 20.0; // 0.2 p/L
|
||
|
||
/** @var array<int, float>|null Coefficients on standardised features (no intercept). */
|
||
private ?array $beta = null;
|
||
|
||
private ?float $intercept = null;
|
||
|
||
/** @var array<int, float>|null per-feature mean used for standardisation */
|
||
private ?array $featureMeans = null;
|
||
|
||
/** @var array<int, float>|null per-feature std-dev used for standardisation */
|
||
private ?array $featureStdDevs = null;
|
||
|
||
public function __construct(
|
||
private readonly FeatureSpec $spec,
|
||
private readonly WeeklyPumpPriceLoader $loader,
|
||
public readonly float $lambda = 1.0,
|
||
) {}
|
||
|
||
public function featureSpec(): FeatureSpec
|
||
{
|
||
return $this->spec;
|
||
}
|
||
|
||
public function train(array $trainingMondays): void
|
||
{
|
||
$X = [];
|
||
$y = [];
|
||
|
||
foreach ($trainingMondays as $monday) {
|
||
$row = [];
|
||
$skip = false;
|
||
foreach ($this->spec->features as $feature) {
|
||
$v = $feature->valueFor($monday);
|
||
if ($v === null) {
|
||
$skip = true;
|
||
break;
|
||
}
|
||
$row[] = $v;
|
||
}
|
||
if ($skip) {
|
||
continue;
|
||
}
|
||
|
||
$actual = $this->actualDeltaPence($monday);
|
||
if ($actual === null) {
|
||
continue;
|
||
}
|
||
|
||
$X[] = $row;
|
||
$y[] = $actual;
|
||
}
|
||
|
||
if (count($X) < count($this->spec->features) + 2) {
|
||
throw new RuntimeException('RidgeRegressionModel: insufficient training rows after dropping incomplete weeks');
|
||
}
|
||
|
||
// Standardise X (z-score) and centre y.
|
||
$featureCount = count($X[0]);
|
||
$means = array_fill(0, $featureCount, 0.0);
|
||
$stds = array_fill(0, $featureCount, 0.0);
|
||
$n = count($X);
|
||
|
||
for ($j = 0; $j < $featureCount; $j++) {
|
||
$col = array_column($X, $j);
|
||
$means[$j] = array_sum($col) / $n;
|
||
$variance = 0.0;
|
||
foreach ($col as $v) {
|
||
$variance += ($v - $means[$j]) ** 2;
|
||
}
|
||
$variance /= $n;
|
||
$stds[$j] = sqrt($variance);
|
||
// Constant features get sd=1 so we don't divide by zero. Their
|
||
// contribution is then a constant absorbed by the intercept.
|
||
if ($stds[$j] < 1e-12) {
|
||
$stds[$j] = 1.0;
|
||
}
|
||
}
|
||
|
||
$Xstd = [];
|
||
foreach ($X as $row) {
|
||
$r = [];
|
||
for ($j = 0; $j < $featureCount; $j++) {
|
||
$r[] = ($row[$j] - $means[$j]) / $stds[$j];
|
||
}
|
||
$Xstd[] = $r;
|
||
}
|
||
|
||
$yMean = array_sum($y) / $n;
|
||
$yCentred = array_map(fn (float $v): float => $v - $yMean, $y);
|
||
|
||
$this->beta = LinearAlgebra::ridgeSolve($Xstd, $yCentred, $this->lambda);
|
||
$this->intercept = $yMean;
|
||
$this->featureMeans = $means;
|
||
$this->featureStdDevs = $stds;
|
||
}
|
||
|
||
public function predict(CarbonInterface $targetMonday): WeeklyPrediction
|
||
{
|
||
if ($this->beta === null) {
|
||
throw new RuntimeException('RidgeRegressionModel: predict() called before train()');
|
||
}
|
||
|
||
$row = [];
|
||
foreach ($this->spec->features as $feature) {
|
||
$v = $feature->valueFor($targetMonday);
|
||
if ($v === null) {
|
||
return new WeeklyPrediction($targetMonday, 0.0, 'flat');
|
||
}
|
||
$row[] = $v;
|
||
}
|
||
|
||
$magnitude = $this->intercept;
|
||
for ($j = 0, $jc = count($row); $j < $jc; $j++) {
|
||
$z = ($row[$j] - $this->featureMeans[$j]) / $this->featureStdDevs[$j];
|
||
$magnitude += $z * $this->beta[$j];
|
||
}
|
||
|
||
return new WeeklyPrediction($targetMonday, $magnitude, $this->classifyDirection($magnitude));
|
||
}
|
||
|
||
public function coefficients(): ?array
|
||
{
|
||
if ($this->beta === null) {
|
||
return null;
|
||
}
|
||
|
||
$named = [];
|
||
foreach ($this->spec->features as $i => $feature) {
|
||
$named[$feature->name()] = [
|
||
'beta_standardised' => $this->beta[$i],
|
||
'mean' => $this->featureMeans[$i],
|
||
'std_dev' => $this->featureStdDevs[$i],
|
||
];
|
||
}
|
||
|
||
return [
|
||
'intercept' => $this->intercept,
|
||
'lambda' => $this->lambda,
|
||
'features' => $named,
|
||
];
|
||
}
|
||
|
||
private function actualDeltaPence(CarbonInterface $targetMonday): ?float
|
||
{
|
||
$current = $this->loader->ulspPence($targetMonday->toDateString());
|
||
$previous = $this->loader->ulspPence($targetMonday->copy()->subDays(7)->toDateString());
|
||
|
||
if ($current === null || $previous === null) {
|
||
return null;
|
||
}
|
||
|
||
return (float) ($current - $previous);
|
||
}
|
||
|
||
private function classifyDirection(float $magnitude): string
|
||
{
|
||
return match (true) {
|
||
$magnitude > self::FLAT_THRESHOLD_PENCE_X100 => 'rising',
|
||
$magnitude < -self::FLAT_THRESHOLD_PENCE_X100 => 'falling',
|
||
default => 'flat',
|
||
};
|
||
}
|
||
}
|