Files
fuel-alert/tests/Unit/Services/Forecasting/BacktestRunnerTest.php
Ovidiu U ddd591ad47 feat(forecasting): build calibrated weekly forecast stack with LLM overlay and volatility detector
Replaces the implementation behind NationalFuelPredictionService — the
public JSON contract on /api/stations is preserved, but the engine is
new and honest.

Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md):
1. Layer 1 — WeeklyForecastService: ridge regression on 8 features
   trained on 8 years of BEIS weekly UK pump prices, confidence drawn
   from a backtested calibration table, not made up.
2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over
   station_prices_current. Never speaks about the future.
3. Layer 3 — verdict via rule gates, not confidence multipliers. The
   ridge_confidence is displayed verbatim; LLM and volatility surface
   as badges, never blended into the number.
4. Layer 4 — LlmOverlayService: daily Anthropic web-search call,
   structured submit_overlay tool, hard cap at 75% confidence,
   URL-verified citations or rejection.
5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the
   active flag, OR-combined triggers (Brent move >3%, LLM major
   impact, station churn (gated), watched_events).

Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the
8x8 normal-equation matrix. No external ML dependency. Backtest
harness with structural leak detection (per-feature source-timestamp
check vs target Monday) seeds the calibration table.

Backtest gate (62–68% directional accuracy on the 130-week hold-out)
ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change
baseline by ~30pp on real data.

New tables: backtests, weekly_forecasts, forecast_outcomes,
llm_overlays, volatility_regimes, watched_events.

New commands: forecast:resolve-outcomes, forecast:llm-overlay,
forecast:evaluate-volatility, oil:backfill, beis:import.

Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK,
forecast:evaluate-volatility hourly, beis:import Mon 09:30,
forecast:resolve-outcomes Mon 10:00.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-03 08:40:05 +01:00

223 lines
7.1 KiB
PHP

<?php
use App\Models\Backtest;
use App\Services\Forecasting\BacktestRunner;
use App\Services\Forecasting\Contracts\ForecastFeature;
use App\Services\Forecasting\Contracts\WeeklyForecastModel;
use App\Services\Forecasting\FeatureSpec;
use App\Services\Forecasting\LeakDetectorException;
use App\Services\Forecasting\WeeklyPrediction;
use Carbon\Carbon;
use Carbon\CarbonInterface;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Illuminate\Support\Facades\DB;
uses(RefreshDatabase::class);
/**
* Builds a simple feature reading the previous week's value.
* Source date offset is configurable so we can simulate leakage.
*/
function backtestFeature(string $name, int $offsetDays = -7): ForecastFeature
{
return new class($name, $offsetDays) implements ForecastFeature
{
public function __construct(
private readonly string $featureName,
private readonly int $offsetDays,
) {}
public function name(): string
{
return $this->featureName;
}
public function valueFor(CarbonInterface $targetMonday): float
{
return 0.0;
}
public function sourceDates(CarbonInterface $targetMonday): array
{
return [$targetMonday->copy()->addDays($this->offsetDays)];
}
};
}
/**
* Stub model: predicts a fixed magnitude every week. Lets us craft
* specific accuracy / MAE outcomes for assertions.
*/
function stubModel(float $alwaysPredictPence, string $modelLabel = 'stub'): WeeklyForecastModel
{
return new class($alwaysPredictPence, $modelLabel) implements WeeklyForecastModel
{
public function __construct(
private readonly float $alwaysPredictPence,
private readonly string $modelLabel,
) {}
public function featureSpec(): FeatureSpec
{
return new FeatureSpec(
modelLabel: $this->modelLabel,
features: [backtestFeature('lag_1w')],
);
}
public function train(array $trainingMondays): void {}
public function predict(CarbonInterface $targetMonday): WeeklyPrediction
{
return new WeeklyPrediction(
targetMonday: $targetMonday,
magnitudePence: $this->alwaysPredictPence,
direction: match (true) {
$this->alwaysPredictPence > 0.2 => 'rising',
$this->alwaysPredictPence < -0.2 => 'falling',
default => 'flat',
},
);
}
public function coefficients(): ?array
{
return null;
}
};
}
function seedWeeklyPumpPrices(): void
{
// 8 weeks of synthetic prices, gently rising
$start = Carbon::parse('2024-01-01');
for ($i = 0; $i < 8; $i++) {
DB::table('weekly_pump_prices')->insert([
'date' => $start->copy()->addWeeks($i)->toDateString(),
'ulsp_pence' => 14000 + ($i * 100), // each week +1p
'ulsd_pence' => 15000 + ($i * 80),
'ulsp_duty_pence' => 5295,
'ulsd_duty_pence' => 5295,
'ulsp_vat_pct' => 20,
'ulsd_vat_pct' => 20,
]);
}
}
it('refuses to run when the spec has structural leakage', function () {
seedWeeklyPumpPrices();
$leaky = new class implements WeeklyForecastModel
{
public function featureSpec(): FeatureSpec
{
return new FeatureSpec(
modelLabel: 'leaky',
features: [backtestFeature('reads_target_week', 0)],
);
}
public function train(array $trainingMondays): void {}
public function predict(CarbonInterface $targetMonday): WeeklyPrediction
{
return new WeeklyPrediction($targetMonday, 0.0, 'flat');
}
public function coefficients(): ?array
{
return null;
}
};
(new BacktestRunner)->run(
$leaky,
trainStart: Carbon::parse('2024-01-01'),
trainEnd: Carbon::parse('2024-01-29'),
evalStart: Carbon::parse('2024-02-05'),
evalEnd: Carbon::parse('2024-02-19'),
);
})->throws(LeakDetectorException::class);
it('persists a backtest row with metrics for a clean run', function () {
seedWeeklyPumpPrices();
$result = (new BacktestRunner)->run(
stubModel(alwaysPredictPence: 100.0), // always predicts +1p
trainStart: Carbon::parse('2024-01-01'),
trainEnd: Carbon::parse('2024-01-29'),
evalStart: Carbon::parse('2024-02-05'),
evalEnd: Carbon::parse('2024-02-19'),
);
expect($result)->toBeInstanceOf(Backtest::class);
expect(Backtest::query()->count())->toBe(1);
$row = Backtest::query()->first();
expect($row->model_version)->toStartWith('stub-')
->and($row->train_start->toDateString())->toBe('2024-01-01')
->and($row->eval_end->toDateString())->toBe('2024-02-19')
->and($row->ran_at)->not->toBeNull();
});
it('computes 100% directional accuracy when stub always nails the direction', function () {
seedWeeklyPumpPrices();
// Series rises by 1p every week, so direction is always 'rising'.
// Stub always predicts +1p (rising) → direction should always match.
$result = (new BacktestRunner)->run(
stubModel(alwaysPredictPence: 100.0),
trainStart: Carbon::parse('2024-01-01'),
trainEnd: Carbon::parse('2024-01-29'),
evalStart: Carbon::parse('2024-02-05'),
evalEnd: Carbon::parse('2024-02-19'),
);
expect((float) $result->directional_accuracy)->toBe(100.0);
});
it('computes 0% directional accuracy when stub always picks the wrong direction', function () {
seedWeeklyPumpPrices();
// Series rises every week, but stub predicts -1p (falling) → 0% accuracy.
$result = (new BacktestRunner)->run(
stubModel(alwaysPredictPence: -100.0),
trainStart: Carbon::parse('2024-01-01'),
trainEnd: Carbon::parse('2024-01-29'),
evalStart: Carbon::parse('2024-02-05'),
evalEnd: Carbon::parse('2024-02-19'),
);
expect((float) $result->directional_accuracy)->toBe(0.0);
});
it('flags leak_suspected when directional accuracy exceeds 75%', function () {
seedWeeklyPumpPrices();
$result = (new BacktestRunner)->run(
stubModel(alwaysPredictPence: 100.0), // always right → 100%
trainStart: Carbon::parse('2024-01-01'),
trainEnd: Carbon::parse('2024-01-29'),
evalStart: Carbon::parse('2024-02-05'),
evalEnd: Carbon::parse('2024-02-19'),
);
expect($result->leak_suspected)->toBeTrue();
});
it('does not flag leak_suspected for realistic accuracy', function () {
seedWeeklyPumpPrices();
// Use same direction as data so we get reasonable but not suspicious accuracy.
// Stub flat → wrong every week (data is rising) → 0%, well below 75.
$result = (new BacktestRunner)->run(
stubModel(alwaysPredictPence: 0.0),
trainStart: Carbon::parse('2024-01-01'),
trainEnd: Carbon::parse('2024-01-29'),
evalStart: Carbon::parse('2024-02-05'),
evalEnd: Carbon::parse('2024-02-19'),
);
expect($result->leak_suspected)->toBeFalse();
});