feat(forecasting): build calibrated weekly forecast stack with LLM overlay and volatility detector

Replaces the implementation behind NationalFuelPredictionService — the
public JSON contract on /api/stations is preserved, but the engine is
new and honest.

Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md):
1. Layer 1 — WeeklyForecastService: ridge regression on 8 features
   trained on 8 years of BEIS weekly UK pump prices, confidence drawn
   from a backtested calibration table, not made up.
2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over
   station_prices_current. Never speaks about the future.
3. Layer 3 — verdict via rule gates, not confidence multipliers. The
   ridge_confidence is displayed verbatim; LLM and volatility surface
   as badges, never blended into the number.
4. Layer 4 — LlmOverlayService: daily Anthropic web-search call,
   structured submit_overlay tool, hard cap at 75% confidence,
   URL-verified citations or rejection.
5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the
   active flag, OR-combined triggers (Brent move >3%, LLM major
   impact, station churn (gated), watched_events).

Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the
8x8 normal-equation matrix. No external ML dependency. Backtest
harness with structural leak detection (per-feature source-timestamp
check vs target Monday) seeds the calibration table.

Backtest gate (62–68% directional accuracy on the 130-week hold-out)
ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change
baseline by ~30pp on real data.

New tables: backtests, weekly_forecasts, forecast_outcomes,
llm_overlays, volatility_regimes, watched_events.

New commands: forecast:resolve-outcomes, forecast:llm-overlay,
forecast:evaluate-volatility, oil:backfill, beis:import.

Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK,
forecast:evaluate-volatility hourly, beis:import Mon 09:30,
forecast:resolve-outcomes Mon 10:00.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ovidiu U
2026-05-03 08:40:05 +01:00
parent d13a29df01
commit ddd591ad47
63 changed files with 5109 additions and 13 deletions

View File

@@ -0,0 +1,147 @@
<?php
namespace App\Services\Forecasting;
use App\Services\HaversineQuery;
use Illuminate\Support\Facades\DB;
/**
* Layer 2 descriptive snapshot of the present.
*
* Pure SQL aggregates against `station_prices_current` + Haversine on
* `stations.lat / lng`. No ML, no history, no surprises. Layer 2 never
* speaks about the future.
*
* Used by Phase 4's WeeklyForecastService to enrich the public payload
* with descriptive "your area" cards alongside the headline forecast.
*/
final class LocalSnapshotService
{
/**
* Snapshot for a coordinate (e.g. user's postcode-resolved lat/lng).
*
* @return array{
* national_avg_pence: ?float,
* local_avg_pence: ?float,
* local_minus_national_pence: ?float,
* cheapest_nearby: array<int, array{node_id: string, name: ?string, brand: ?string, price_pence: int, distance_km: float}>,
* supermarket_avg_pence: ?float,
* major_avg_pence: ?float,
* supermarket_gap_pence: ?float,
* stations_within_radius: int
* }
*/
public function snapshot(string $fuelType, float $lat, float $lng, int $radiusKm = 25): array
{
$nationalAvg = $this->nationalAverage($fuelType);
$localAvg = $this->localAverage($fuelType, $lat, $lng, 50);
$cheapest = $this->cheapestNearby($fuelType, $lat, $lng, $radiusKm, 5);
[$superAvg, $majorAvg] = $this->brandSplit($fuelType, $lat, $lng, $radiusKm);
$stationCount = $this->stationCountWithin($fuelType, $lat, $lng, $radiusKm);
return [
'national_avg_pence' => $nationalAvg,
'local_avg_pence' => $localAvg,
'local_minus_national_pence' => $localAvg !== null && $nationalAvg !== null
? round($localAvg - $nationalAvg, 1)
: null,
'cheapest_nearby' => $cheapest,
'supermarket_avg_pence' => $superAvg,
'major_avg_pence' => $majorAvg,
'supermarket_gap_pence' => $superAvg !== null && $majorAvg !== null
? round($superAvg - $majorAvg, 1)
: null,
'stations_within_radius' => $stationCount,
];
}
private function nationalAverage(string $fuelType): ?float
{
$avg = DB::table('station_prices_current')
->where('fuel_type', $fuelType)
->avg('price_pence');
return $avg === null ? null : round((float) $avg / 100, 1);
}
private function localAverage(string $fuelType, float $lat, float $lng, int $km): ?float
{
[$within, $bindings] = HaversineQuery::withinKm($lat, $lng, $km);
$avg = DB::table('station_prices_current')
->join('stations', 'station_prices_current.station_id', '=', 'stations.node_id')
->where('station_prices_current.fuel_type', $fuelType)
->whereRaw($within, $bindings)
->avg('station_prices_current.price_pence');
return $avg === null ? null : round((float) $avg / 100, 1);
}
/**
* @return array<int, array{node_id: string, name: ?string, brand: ?string, price_pence: int, distance_km: float}>
*/
private function cheapestNearby(string $fuelType, float $lat, float $lng, int $km, int $limit): array
{
[$distance, $distanceBindings] = HaversineQuery::distanceKm($lat, $lng);
[$within, $withinBindings] = HaversineQuery::withinKm($lat, $lng, $km);
$rows = DB::table('station_prices_current')
->join('stations', 'station_prices_current.station_id', '=', 'stations.node_id')
->where('station_prices_current.fuel_type', $fuelType)
->whereRaw($within, $withinBindings)
->selectRaw(
'stations.node_id, stations.trading_name as name, stations.brand_name as brand, '
.'station_prices_current.price_pence, '.$distance.' as distance_km',
$distanceBindings,
)
->orderBy('station_prices_current.price_pence')
->limit($limit)
->get();
return $rows->map(fn ($r): array => [
'node_id' => (string) $r->node_id,
'name' => $r->name === null ? null : (string) $r->name,
'brand' => $r->brand === null ? null : (string) $r->brand,
'price_pence' => (int) $r->price_pence,
'distance_km' => round((float) $r->distance_km, 2),
])->all();
}
/** @return array{0: ?float, 1: ?float} [supermarket_avg, major_avg] */
private function brandSplit(string $fuelType, float $lat, float $lng, int $km): array
{
[$within, $bindings] = HaversineQuery::withinKm($lat, $lng, $km);
$rows = DB::table('station_prices_current')
->join('stations', 'station_prices_current.station_id', '=', 'stations.node_id')
->where('station_prices_current.fuel_type', $fuelType)
->whereRaw($within, $bindings)
->selectRaw('stations.is_supermarket, AVG(station_prices_current.price_pence) as avg_pence')
->groupBy('stations.is_supermarket')
->get();
$super = null;
$major = null;
foreach ($rows as $r) {
$avg = round((float) $r->avg_pence / 100, 1);
if ((int) $r->is_supermarket === 1) {
$super = $avg;
} else {
$major = $avg;
}
}
return [$super, $major];
}
private function stationCountWithin(string $fuelType, float $lat, float $lng, int $km): int
{
[$within, $bindings] = HaversineQuery::withinKm($lat, $lng, $km);
return DB::table('station_prices_current')
->join('stations', 'station_prices_current.station_id', '=', 'stations.node_id')
->where('station_prices_current.fuel_type', $fuelType)
->whereRaw($within, $bindings)
->count();
}
}