Files
fuel-alert/tests/Unit/Services/Forecasting/Models/RidgeRegressionModelTest.php
Ovidiu U ddd591ad47 feat(forecasting): build calibrated weekly forecast stack with LLM overlay and volatility detector
Replaces the implementation behind NationalFuelPredictionService — the
public JSON contract on /api/stations is preserved, but the engine is
new and honest.

Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md):
1. Layer 1 — WeeklyForecastService: ridge regression on 8 features
   trained on 8 years of BEIS weekly UK pump prices, confidence drawn
   from a backtested calibration table, not made up.
2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over
   station_prices_current. Never speaks about the future.
3. Layer 3 — verdict via rule gates, not confidence multipliers. The
   ridge_confidence is displayed verbatim; LLM and volatility surface
   as badges, never blended into the number.
4. Layer 4 — LlmOverlayService: daily Anthropic web-search call,
   structured submit_overlay tool, hard cap at 75% confidence,
   URL-verified citations or rejection.
5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the
   active flag, OR-combined triggers (Brent move >3%, LLM major
   impact, station churn (gated), watched_events).

Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the
8x8 normal-equation matrix. No external ML dependency. Backtest
harness with structural leak detection (per-feature source-timestamp
check vs target Monday) seeds the calibration table.

Backtest gate (62–68% directional accuracy on the 130-week hold-out)
ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change
baseline by ~30pp on real data.

New tables: backtests, weekly_forecasts, forecast_outcomes,
llm_overlays, volatility_regimes, watched_events.

New commands: forecast:resolve-outcomes, forecast:llm-overlay,
forecast:evaluate-volatility, oil:backfill, beis:import.

Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK,
forecast:evaluate-volatility hourly, beis:import Mon 09:30,
forecast:resolve-outcomes Mon 10:00.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 08:40:05 +01:00

153 lines
5.2 KiB
PHP

<?php
use App\Services\Forecasting\BacktestRunner;
use App\Services\Forecasting\Features\DeltaUlspLag;
use App\Services\Forecasting\Features\UlspMinusMa8;
use App\Services\Forecasting\FeatureSpec;
use App\Services\Forecasting\Models\NaiveZeroChangeModel;
use App\Services\Forecasting\Models\RidgeRegressionModel;
use App\Services\Forecasting\WeeklyPumpPriceLoader;
use Carbon\Carbon;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Illuminate\Support\Facades\DB;
uses(RefreshDatabase::class);
function seedRidgeFixture(int $weeks = 30): void
{
// Synthetic sequence with strong autocorrelation: each week's ULSP
// tracks last week's change. Ridge should pick this up.
$start = Carbon::parse('2024-01-01');
$price = 14000;
$lastDelta = 0;
for ($i = 0; $i < $weeks; $i++) {
// Persistent momentum: this week ≈ last week's delta + small noise.
if ($i === 0) {
$delta = 50;
} else {
$delta = (int) round($lastDelta * 0.8 + 10); // mild reversion + drift
}
$price += $delta;
$lastDelta = $delta;
DB::table('weekly_pump_prices')->insert([
'date' => $start->copy()->addWeeks($i)->toDateString(),
'ulsp_pence' => $price,
'ulsd_pence' => $price + 800,
'ulsp_duty_pence' => 5295,
'ulsd_duty_pence' => 5295,
'ulsp_vat_pct' => 20,
'ulsd_vat_pct' => 20,
]);
}
}
it('train + predict produces a non-zero, finite magnitude', function () {
seedRidgeFixture(30);
$loader = new WeeklyPumpPriceLoader;
$model = new RidgeRegressionModel(
spec: new FeatureSpec('ridge-test', [
new DeltaUlspLag($loader, lag: 0),
new DeltaUlspLag($loader, lag: 1),
new UlspMinusMa8($loader),
]),
loader: $loader,
lambda: 1.0,
);
$training = collect(range(0, 20))->map(fn (int $i): Carbon => Carbon::parse('2024-01-01')->addWeeks($i))->all();
$model->train($training);
$prediction = $model->predict(Carbon::parse('2024-06-03'));
expect(is_finite($prediction->magnitudePence))->toBeTrue()
->and($prediction->direction)->toBeIn(['rising', 'falling', 'flat']);
});
it('coefficients() returns a structured payload after training', function () {
seedRidgeFixture(30);
$loader = new WeeklyPumpPriceLoader;
$features = [
new DeltaUlspLag($loader, lag: 0),
new DeltaUlspLag($loader, lag: 1),
];
$model = new RidgeRegressionModel(
spec: new FeatureSpec('ridge-test', $features),
loader: $loader,
lambda: 1.0,
);
$training = collect(range(0, 20))->map(fn (int $i): Carbon => Carbon::parse('2024-01-01')->addWeeks($i))->all();
$model->train($training);
$c = $model->coefficients();
expect($c)->toHaveKey('intercept')
->and($c)->toHaveKey('lambda')
->and($c['lambda'])->toBe(1.0)
->and($c['features'])->toHaveKey('delta_ulsp_lag_0')
->and($c['features']['delta_ulsp_lag_0'])->toHaveKey('beta_standardised')
->and($c['features']['delta_ulsp_lag_0'])->toHaveKey('mean')
->and($c['features']['delta_ulsp_lag_0'])->toHaveKey('std_dev');
});
it('throws when predict is called before train', function () {
$loader = new WeeklyPumpPriceLoader;
$model = new RidgeRegressionModel(
spec: new FeatureSpec('ridge-test', [new DeltaUlspLag($loader, lag: 0)]),
loader: $loader,
lambda: 1.0,
);
$model->predict(Carbon::parse('2024-06-03'));
})->throws(RuntimeException::class);
it('throws when training data is too thin to fit the model', function () {
seedRidgeFixture(8); // not enough training rows after losing first 8 weeks to lags
$loader = new WeeklyPumpPriceLoader;
$model = new RidgeRegressionModel(
spec: new FeatureSpec('ridge-test', [
new DeltaUlspLag($loader, lag: 3),
new UlspMinusMa8($loader),
]),
loader: $loader,
lambda: 1.0,
);
$training = collect(range(0, 4))->map(fn (int $i): Carbon => Carbon::parse('2024-01-01')->addWeeks($i))->all();
$model->train($training);
})->throws(RuntimeException::class);
it('beats the naive zero-change baseline on the synthetic fixture', function () {
seedRidgeFixture(30);
$loader = new WeeklyPumpPriceLoader;
$features = [
new DeltaUlspLag($loader, lag: 0),
new UlspMinusMa8($loader),
];
$ridge = new RidgeRegressionModel(
spec: new FeatureSpec('ridge-test', $features),
loader: $loader,
lambda: 1.0,
);
$naive = new NaiveZeroChangeModel;
$runner = new BacktestRunner;
$ridgeResult = $runner->run(
$ridge,
trainStart: Carbon::parse('2024-01-01'),
trainEnd: Carbon::parse('2024-04-29'),
evalStart: Carbon::parse('2024-05-06'),
evalEnd: Carbon::parse('2024-07-22'),
);
$naiveResult = $runner->run(
$naive,
trainStart: Carbon::parse('2024-01-01'),
trainEnd: Carbon::parse('2024-04-29'),
evalStart: Carbon::parse('2024-05-06'),
evalEnd: Carbon::parse('2024-07-22'),
);
expect((float) $ridgeResult->mae_pence)
->toBeLessThan((float) $naiveResult->mae_pence);
});