Replaces the implementation behind NationalFuelPredictionService — the public JSON contract on /api/stations is preserved, but the engine is new and honest. Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md): 1. Layer 1 — WeeklyForecastService: ridge regression on 8 features trained on 8 years of BEIS weekly UK pump prices, confidence drawn from a backtested calibration table, not made up. 2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over station_prices_current. Never speaks about the future. 3. Layer 3 — verdict via rule gates, not confidence multipliers. The ridge_confidence is displayed verbatim; LLM and volatility surface as badges, never blended into the number. 4. Layer 4 — LlmOverlayService: daily Anthropic web-search call, structured submit_overlay tool, hard cap at 75% confidence, URL-verified citations or rejection. 5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the active flag, OR-combined triggers (Brent move >3%, LLM major impact, station churn (gated), watched_events). Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the 8x8 normal-equation matrix. No external ML dependency. Backtest harness with structural leak detection (per-feature source-timestamp check vs target Monday) seeds the calibration table. Backtest gate (62–68% directional accuracy on the 130-week hold-out) ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change baseline by ~30pp on real data. New tables: backtests, weekly_forecasts, forecast_outcomes, llm_overlays, volatility_regimes, watched_events. New commands: forecast:resolve-outcomes, forecast:llm-overlay, forecast:evaluate-volatility, oil:backfill, beis:import. Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK, forecast:evaluate-volatility hourly, beis:import Mon 09:30, forecast:resolve-outcomes Mon 10:00. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
223 lines
7.1 KiB
PHP
223 lines
7.1 KiB
PHP
<?php
|
|
|
|
use App\Models\Backtest;
|
|
use App\Services\Forecasting\BacktestRunner;
|
|
use App\Services\Forecasting\Contracts\ForecastFeature;
|
|
use App\Services\Forecasting\Contracts\WeeklyForecastModel;
|
|
use App\Services\Forecasting\FeatureSpec;
|
|
use App\Services\Forecasting\LeakDetectorException;
|
|
use App\Services\Forecasting\WeeklyPrediction;
|
|
use Carbon\Carbon;
|
|
use Carbon\CarbonInterface;
|
|
use Illuminate\Foundation\Testing\RefreshDatabase;
|
|
use Illuminate\Support\Facades\DB;
|
|
|
|
uses(RefreshDatabase::class);
|
|
|
|
/**
|
|
* Builds a simple feature reading the previous week's value.
|
|
* Source date offset is configurable so we can simulate leakage.
|
|
*/
|
|
function backtestFeature(string $name, int $offsetDays = -7): ForecastFeature
|
|
{
|
|
return new class($name, $offsetDays) implements ForecastFeature
|
|
{
|
|
public function __construct(
|
|
private readonly string $featureName,
|
|
private readonly int $offsetDays,
|
|
) {}
|
|
|
|
public function name(): string
|
|
{
|
|
return $this->featureName;
|
|
}
|
|
|
|
public function valueFor(CarbonInterface $targetMonday): float
|
|
{
|
|
return 0.0;
|
|
}
|
|
|
|
public function sourceDates(CarbonInterface $targetMonday): array
|
|
{
|
|
return [$targetMonday->copy()->addDays($this->offsetDays)];
|
|
}
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Stub model: predicts a fixed magnitude every week. Lets us craft
|
|
* specific accuracy / MAE outcomes for assertions.
|
|
*/
|
|
function stubModel(float $alwaysPredictPence, string $modelLabel = 'stub'): WeeklyForecastModel
|
|
{
|
|
return new class($alwaysPredictPence, $modelLabel) implements WeeklyForecastModel
|
|
{
|
|
public function __construct(
|
|
private readonly float $alwaysPredictPence,
|
|
private readonly string $modelLabel,
|
|
) {}
|
|
|
|
public function featureSpec(): FeatureSpec
|
|
{
|
|
return new FeatureSpec(
|
|
modelLabel: $this->modelLabel,
|
|
features: [backtestFeature('lag_1w')],
|
|
);
|
|
}
|
|
|
|
public function train(array $trainingMondays): void {}
|
|
|
|
public function predict(CarbonInterface $targetMonday): WeeklyPrediction
|
|
{
|
|
return new WeeklyPrediction(
|
|
targetMonday: $targetMonday,
|
|
magnitudePence: $this->alwaysPredictPence,
|
|
direction: match (true) {
|
|
$this->alwaysPredictPence > 0.2 => 'rising',
|
|
$this->alwaysPredictPence < -0.2 => 'falling',
|
|
default => 'flat',
|
|
},
|
|
);
|
|
}
|
|
|
|
public function coefficients(): ?array
|
|
{
|
|
return null;
|
|
}
|
|
};
|
|
}
|
|
|
|
function seedWeeklyPumpPrices(): void
|
|
{
|
|
// 8 weeks of synthetic prices, gently rising
|
|
$start = Carbon::parse('2024-01-01');
|
|
for ($i = 0; $i < 8; $i++) {
|
|
DB::table('weekly_pump_prices')->insert([
|
|
'date' => $start->copy()->addWeeks($i)->toDateString(),
|
|
'ulsp_pence' => 14000 + ($i * 100), // each week +1p
|
|
'ulsd_pence' => 15000 + ($i * 80),
|
|
'ulsp_duty_pence' => 5295,
|
|
'ulsd_duty_pence' => 5295,
|
|
'ulsp_vat_pct' => 20,
|
|
'ulsd_vat_pct' => 20,
|
|
]);
|
|
}
|
|
}
|
|
|
|
it('refuses to run when the spec has structural leakage', function () {
|
|
seedWeeklyPumpPrices();
|
|
|
|
$leaky = new class implements WeeklyForecastModel
|
|
{
|
|
public function featureSpec(): FeatureSpec
|
|
{
|
|
return new FeatureSpec(
|
|
modelLabel: 'leaky',
|
|
features: [backtestFeature('reads_target_week', 0)],
|
|
);
|
|
}
|
|
|
|
public function train(array $trainingMondays): void {}
|
|
|
|
public function predict(CarbonInterface $targetMonday): WeeklyPrediction
|
|
{
|
|
return new WeeklyPrediction($targetMonday, 0.0, 'flat');
|
|
}
|
|
|
|
public function coefficients(): ?array
|
|
{
|
|
return null;
|
|
}
|
|
};
|
|
|
|
(new BacktestRunner)->run(
|
|
$leaky,
|
|
trainStart: Carbon::parse('2024-01-01'),
|
|
trainEnd: Carbon::parse('2024-01-29'),
|
|
evalStart: Carbon::parse('2024-02-05'),
|
|
evalEnd: Carbon::parse('2024-02-19'),
|
|
);
|
|
})->throws(LeakDetectorException::class);
|
|
|
|
it('persists a backtest row with metrics for a clean run', function () {
|
|
seedWeeklyPumpPrices();
|
|
|
|
$result = (new BacktestRunner)->run(
|
|
stubModel(alwaysPredictPence: 100.0), // always predicts +1p
|
|
trainStart: Carbon::parse('2024-01-01'),
|
|
trainEnd: Carbon::parse('2024-01-29'),
|
|
evalStart: Carbon::parse('2024-02-05'),
|
|
evalEnd: Carbon::parse('2024-02-19'),
|
|
);
|
|
|
|
expect($result)->toBeInstanceOf(Backtest::class);
|
|
expect(Backtest::query()->count())->toBe(1);
|
|
|
|
$row = Backtest::query()->first();
|
|
expect($row->model_version)->toStartWith('stub-')
|
|
->and($row->train_start->toDateString())->toBe('2024-01-01')
|
|
->and($row->eval_end->toDateString())->toBe('2024-02-19')
|
|
->and($row->ran_at)->not->toBeNull();
|
|
});
|
|
|
|
it('computes 100% directional accuracy when stub always nails the direction', function () {
|
|
seedWeeklyPumpPrices();
|
|
|
|
// Series rises by 1p every week, so direction is always 'rising'.
|
|
// Stub always predicts +1p (rising) → direction should always match.
|
|
$result = (new BacktestRunner)->run(
|
|
stubModel(alwaysPredictPence: 100.0),
|
|
trainStart: Carbon::parse('2024-01-01'),
|
|
trainEnd: Carbon::parse('2024-01-29'),
|
|
evalStart: Carbon::parse('2024-02-05'),
|
|
evalEnd: Carbon::parse('2024-02-19'),
|
|
);
|
|
|
|
expect((float) $result->directional_accuracy)->toBe(100.0);
|
|
});
|
|
|
|
it('computes 0% directional accuracy when stub always picks the wrong direction', function () {
|
|
seedWeeklyPumpPrices();
|
|
|
|
// Series rises every week, but stub predicts -1p (falling) → 0% accuracy.
|
|
$result = (new BacktestRunner)->run(
|
|
stubModel(alwaysPredictPence: -100.0),
|
|
trainStart: Carbon::parse('2024-01-01'),
|
|
trainEnd: Carbon::parse('2024-01-29'),
|
|
evalStart: Carbon::parse('2024-02-05'),
|
|
evalEnd: Carbon::parse('2024-02-19'),
|
|
);
|
|
|
|
expect((float) $result->directional_accuracy)->toBe(0.0);
|
|
});
|
|
|
|
it('flags leak_suspected when directional accuracy exceeds 75%', function () {
|
|
seedWeeklyPumpPrices();
|
|
|
|
$result = (new BacktestRunner)->run(
|
|
stubModel(alwaysPredictPence: 100.0), // always right → 100%
|
|
trainStart: Carbon::parse('2024-01-01'),
|
|
trainEnd: Carbon::parse('2024-01-29'),
|
|
evalStart: Carbon::parse('2024-02-05'),
|
|
evalEnd: Carbon::parse('2024-02-19'),
|
|
);
|
|
|
|
expect($result->leak_suspected)->toBeTrue();
|
|
});
|
|
|
|
it('does not flag leak_suspected for realistic accuracy', function () {
|
|
seedWeeklyPumpPrices();
|
|
|
|
// Use same direction as data so we get reasonable but not suspicious accuracy.
|
|
// Stub flat → wrong every week (data is rising) → 0%, well below 75.
|
|
$result = (new BacktestRunner)->run(
|
|
stubModel(alwaysPredictPence: 0.0),
|
|
trainStart: Carbon::parse('2024-01-01'),
|
|
trainEnd: Carbon::parse('2024-01-29'),
|
|
evalStart: Carbon::parse('2024-02-05'),
|
|
evalEnd: Carbon::parse('2024-02-19'),
|
|
);
|
|
|
|
expect($result->leak_suspected)->toBeFalse();
|
|
});
|