feat(forecasting): build calibrated weekly forecast stack with LLM overlay and volatility detector
Replaces the implementation behind NationalFuelPredictionService — the public JSON contract on /api/stations is preserved, but the engine is new and honest. Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md): 1. Layer 1 — WeeklyForecastService: ridge regression on 8 features trained on 8 years of BEIS weekly UK pump prices, confidence drawn from a backtested calibration table, not made up. 2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over station_prices_current. Never speaks about the future. 3. Layer 3 — verdict via rule gates, not confidence multipliers. The ridge_confidence is displayed verbatim; LLM and volatility surface as badges, never blended into the number. 4. Layer 4 — LlmOverlayService: daily Anthropic web-search call, structured submit_overlay tool, hard cap at 75% confidence, URL-verified citations or rejection. 5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the active flag, OR-combined triggers (Brent move >3%, LLM major impact, station churn (gated), watched_events). Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the 8x8 normal-equation matrix. No external ML dependency. Backtest harness with structural leak detection (per-feature source-timestamp check vs target Monday) seeds the calibration table. Backtest gate (62–68% directional accuracy on the 130-week hold-out) ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change baseline by ~30pp on real data. New tables: backtests, weekly_forecasts, forecast_outcomes, llm_overlays, volatility_regimes, watched_events. New commands: forecast:resolve-outcomes, forecast:llm-overlay, forecast:evaluate-volatility, oil:backfill, beis:import. Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK, forecast:evaluate-volatility hourly, beis:import Mon 09:30, forecast:resolve-outcomes Mon 10:00. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
162
app/Services/Forecasting/BacktestRunner.php
Normal file
162
app/Services/Forecasting/BacktestRunner.php
Normal file
@@ -0,0 +1,162 @@
|
||||
<?php
|
||||
|
||||
namespace App\Services\Forecasting;
|
||||
|
||||
use App\Models\Backtest;
|
||||
use App\Services\Forecasting\Contracts\WeeklyForecastModel;
|
||||
use Carbon\CarbonInterface;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
|
||||
/**
|
||||
* Runs a WeeklyForecastModel through a train/eval split and persists
|
||||
* the result to the `backtests` table.
|
||||
*
|
||||
* Pipeline:
|
||||
* 1. Generate the training and eval Monday lists from the date ranges.
|
||||
* 2. Run LeakDetector against every Monday × every feature. Refuse to
|
||||
* train if any source date is on or after a target Monday.
|
||||
* 3. Train the model.
|
||||
* 4. For each eval Monday: predict, look up actual ΔULSP from
|
||||
* `weekly_pump_prices`, score directional accuracy + abs error.
|
||||
* 5. Persist a Backtest row, return it.
|
||||
*
|
||||
* The `leak_suspected` flag is a *secondary* smell test (true when
|
||||
* directional_accuracy > 75). Primary leak defence is step 2.
|
||||
*/
|
||||
final class BacktestRunner
|
||||
{
|
||||
private const float FLAT_THRESHOLD_PENCE_X100 = 20.0; // 0.2 p/L
|
||||
|
||||
public function __construct(
|
||||
private readonly LeakDetector $leakDetector = new LeakDetector,
|
||||
) {}
|
||||
|
||||
public function run(
|
||||
WeeklyForecastModel $model,
|
||||
CarbonInterface $trainStart,
|
||||
CarbonInterface $trainEnd,
|
||||
CarbonInterface $evalStart,
|
||||
CarbonInterface $evalEnd,
|
||||
): Backtest {
|
||||
$trainingMondays = $this->mondaysBetween($trainStart, $trainEnd);
|
||||
$evalMondays = $this->mondaysBetween($evalStart, $evalEnd);
|
||||
|
||||
$spec = $model->featureSpec();
|
||||
$report = $this->leakDetector->validate($spec, [...$trainingMondays, ...$evalMondays]);
|
||||
if ($report->hasLeaks()) {
|
||||
throw new LeakDetectorException($report);
|
||||
}
|
||||
|
||||
$model->train($trainingMondays);
|
||||
|
||||
$correct = 0;
|
||||
$totalScored = 0;
|
||||
$absErrors = [];
|
||||
$bins = [];
|
||||
|
||||
foreach ($evalMondays as $monday) {
|
||||
$actualDelta = $this->actualDeltaPence($monday);
|
||||
if ($actualDelta === null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$prediction = $model->predict($monday);
|
||||
$actualDirection = $this->classifyDirection($actualDelta);
|
||||
$hit = $prediction->direction === $actualDirection;
|
||||
|
||||
$totalScored++;
|
||||
$absErrors[] = abs($prediction->magnitudePence - $actualDelta);
|
||||
if ($hit) {
|
||||
$correct++;
|
||||
}
|
||||
|
||||
$bin = $this->bucketForMagnitude($prediction->magnitudePence);
|
||||
$bins[$bin] ??= ['correct' => 0, 'total' => 0];
|
||||
$bins[$bin]['total']++;
|
||||
if ($hit) {
|
||||
$bins[$bin]['correct']++;
|
||||
}
|
||||
}
|
||||
|
||||
$directionalAccuracy = $totalScored === 0
|
||||
? null
|
||||
: round(($correct / $totalScored) * 100, 2);
|
||||
|
||||
$maePence = $absErrors === []
|
||||
? null
|
||||
: round((array_sum($absErrors) / count($absErrors)) / 100, 2);
|
||||
|
||||
$calibrationTable = [];
|
||||
foreach ($bins as $key => $b) {
|
||||
$calibrationTable[$key] = round($b['correct'] / $b['total'], 4);
|
||||
}
|
||||
|
||||
return Backtest::create([
|
||||
'model_version' => $spec->modelVersion(),
|
||||
'features_json' => $spec->toArray(),
|
||||
'coefficients_json' => $model->coefficients(),
|
||||
'train_start' => $trainStart->toDateString(),
|
||||
'train_end' => $trainEnd->toDateString(),
|
||||
'eval_start' => $evalStart->toDateString(),
|
||||
'eval_end' => $evalEnd->toDateString(),
|
||||
'directional_accuracy' => $directionalAccuracy,
|
||||
'mae_pence' => $maePence,
|
||||
'calibration_table' => $calibrationTable,
|
||||
'leak_suspected' => $directionalAccuracy !== null && $directionalAccuracy > 75.0,
|
||||
'ran_at' => now(),
|
||||
]);
|
||||
}
|
||||
|
||||
/** @return array<int, CarbonInterface> */
|
||||
private function mondaysBetween(CarbonInterface $start, CarbonInterface $end): array
|
||||
{
|
||||
$mondays = [];
|
||||
$cursor = $start->copy()->startOfDay();
|
||||
$boundary = $end->copy()->startOfDay();
|
||||
|
||||
while ($cursor->lessThanOrEqualTo($boundary)) {
|
||||
if ($cursor->dayOfWeek === CarbonInterface::MONDAY) {
|
||||
$mondays[] = $cursor->copy();
|
||||
}
|
||||
$cursor = $cursor->addDay();
|
||||
}
|
||||
|
||||
return $mondays;
|
||||
}
|
||||
|
||||
private function actualDeltaPence(CarbonInterface $targetMonday): ?float
|
||||
{
|
||||
$current = DB::table('weekly_pump_prices')
|
||||
->where('date', $targetMonday->toDateString())
|
||||
->value('ulsp_pence');
|
||||
$previous = DB::table('weekly_pump_prices')
|
||||
->where('date', $targetMonday->copy()->subDays(7)->toDateString())
|
||||
->value('ulsp_pence');
|
||||
|
||||
if ($current === null || $previous === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (float) ($current - $previous);
|
||||
}
|
||||
|
||||
private function classifyDirection(float $deltaPence): string
|
||||
{
|
||||
return match (true) {
|
||||
$deltaPence > self::FLAT_THRESHOLD_PENCE_X100 => 'rising',
|
||||
$deltaPence < -self::FLAT_THRESHOLD_PENCE_X100 => 'falling',
|
||||
default => 'flat',
|
||||
};
|
||||
}
|
||||
|
||||
private function bucketForMagnitude(float $magnitudePence): string
|
||||
{
|
||||
$abs = abs($magnitudePence);
|
||||
|
||||
return match (true) {
|
||||
$abs < 50.0 => '0.0-0.5p',
|
||||
$abs < 100.0 => '0.5-1.0p',
|
||||
default => '1.0p+',
|
||||
};
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user