feat(forecasting): build calibrated weekly forecast stack with LLM overlay and volatility detector
Replaces the implementation behind NationalFuelPredictionService — the public JSON contract on /api/stations is preserved, but the engine is new and honest. Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md): 1. Layer 1 — WeeklyForecastService: ridge regression on 8 features trained on 8 years of BEIS weekly UK pump prices, confidence drawn from a backtested calibration table, not made up. 2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over station_prices_current. Never speaks about the future. 3. Layer 3 — verdict via rule gates, not confidence multipliers. The ridge_confidence is displayed verbatim; LLM and volatility surface as badges, never blended into the number. 4. Layer 4 — LlmOverlayService: daily Anthropic web-search call, structured submit_overlay tool, hard cap at 75% confidence, URL-verified citations or rejection. 5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the active flag, OR-combined triggers (Brent move >3%, LLM major impact, station churn (gated), watched_events). Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the 8x8 normal-equation matrix. No external ML dependency. Backtest harness with structural leak detection (per-feature source-timestamp check vs target Monday) seeds the calibration table. Backtest gate (62–68% directional accuracy on the 130-week hold-out) ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change baseline by ~30pp on real data. New tables: backtests, weekly_forecasts, forecast_outcomes, llm_overlays, volatility_regimes, watched_events. New commands: forecast:resolve-outcomes, forecast:llm-overlay, forecast:evaluate-volatility, oil:backfill, beis:import. Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK, forecast:evaluate-volatility hourly, beis:import Mon 09:30, forecast:resolve-outcomes Mon 10:00. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
147
app/Services/Forecasting/LocalSnapshotService.php
Normal file
147
app/Services/Forecasting/LocalSnapshotService.php
Normal file
@@ -0,0 +1,147 @@
|
||||
<?php
|
||||
|
||||
namespace App\Services\Forecasting;
|
||||
|
||||
use App\Services\HaversineQuery;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
|
||||
/**
|
||||
* Layer 2 — descriptive snapshot of the present.
|
||||
*
|
||||
* Pure SQL aggregates against `station_prices_current` + Haversine on
|
||||
* `stations.lat / lng`. No ML, no history, no surprises. Layer 2 never
|
||||
* speaks about the future.
|
||||
*
|
||||
* Used by Phase 4's WeeklyForecastService to enrich the public payload
|
||||
* with descriptive "your area" cards alongside the headline forecast.
|
||||
*/
|
||||
final class LocalSnapshotService
|
||||
{
|
||||
/**
|
||||
* Snapshot for a coordinate (e.g. user's postcode-resolved lat/lng).
|
||||
*
|
||||
* @return array{
|
||||
* national_avg_pence: ?float,
|
||||
* local_avg_pence: ?float,
|
||||
* local_minus_national_pence: ?float,
|
||||
* cheapest_nearby: array<int, array{node_id: string, name: ?string, brand: ?string, price_pence: int, distance_km: float}>,
|
||||
* supermarket_avg_pence: ?float,
|
||||
* major_avg_pence: ?float,
|
||||
* supermarket_gap_pence: ?float,
|
||||
* stations_within_radius: int
|
||||
* }
|
||||
*/
|
||||
public function snapshot(string $fuelType, float $lat, float $lng, int $radiusKm = 25): array
|
||||
{
|
||||
$nationalAvg = $this->nationalAverage($fuelType);
|
||||
$localAvg = $this->localAverage($fuelType, $lat, $lng, 50);
|
||||
$cheapest = $this->cheapestNearby($fuelType, $lat, $lng, $radiusKm, 5);
|
||||
[$superAvg, $majorAvg] = $this->brandSplit($fuelType, $lat, $lng, $radiusKm);
|
||||
$stationCount = $this->stationCountWithin($fuelType, $lat, $lng, $radiusKm);
|
||||
|
||||
return [
|
||||
'national_avg_pence' => $nationalAvg,
|
||||
'local_avg_pence' => $localAvg,
|
||||
'local_minus_national_pence' => $localAvg !== null && $nationalAvg !== null
|
||||
? round($localAvg - $nationalAvg, 1)
|
||||
: null,
|
||||
'cheapest_nearby' => $cheapest,
|
||||
'supermarket_avg_pence' => $superAvg,
|
||||
'major_avg_pence' => $majorAvg,
|
||||
'supermarket_gap_pence' => $superAvg !== null && $majorAvg !== null
|
||||
? round($superAvg - $majorAvg, 1)
|
||||
: null,
|
||||
'stations_within_radius' => $stationCount,
|
||||
];
|
||||
}
|
||||
|
||||
private function nationalAverage(string $fuelType): ?float
|
||||
{
|
||||
$avg = DB::table('station_prices_current')
|
||||
->where('fuel_type', $fuelType)
|
||||
->avg('price_pence');
|
||||
|
||||
return $avg === null ? null : round((float) $avg / 100, 1);
|
||||
}
|
||||
|
||||
private function localAverage(string $fuelType, float $lat, float $lng, int $km): ?float
|
||||
{
|
||||
[$within, $bindings] = HaversineQuery::withinKm($lat, $lng, $km);
|
||||
|
||||
$avg = DB::table('station_prices_current')
|
||||
->join('stations', 'station_prices_current.station_id', '=', 'stations.node_id')
|
||||
->where('station_prices_current.fuel_type', $fuelType)
|
||||
->whereRaw($within, $bindings)
|
||||
->avg('station_prices_current.price_pence');
|
||||
|
||||
return $avg === null ? null : round((float) $avg / 100, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array<int, array{node_id: string, name: ?string, brand: ?string, price_pence: int, distance_km: float}>
|
||||
*/
|
||||
private function cheapestNearby(string $fuelType, float $lat, float $lng, int $km, int $limit): array
|
||||
{
|
||||
[$distance, $distanceBindings] = HaversineQuery::distanceKm($lat, $lng);
|
||||
[$within, $withinBindings] = HaversineQuery::withinKm($lat, $lng, $km);
|
||||
|
||||
$rows = DB::table('station_prices_current')
|
||||
->join('stations', 'station_prices_current.station_id', '=', 'stations.node_id')
|
||||
->where('station_prices_current.fuel_type', $fuelType)
|
||||
->whereRaw($within, $withinBindings)
|
||||
->selectRaw(
|
||||
'stations.node_id, stations.trading_name as name, stations.brand_name as brand, '
|
||||
.'station_prices_current.price_pence, '.$distance.' as distance_km',
|
||||
$distanceBindings,
|
||||
)
|
||||
->orderBy('station_prices_current.price_pence')
|
||||
->limit($limit)
|
||||
->get();
|
||||
|
||||
return $rows->map(fn ($r): array => [
|
||||
'node_id' => (string) $r->node_id,
|
||||
'name' => $r->name === null ? null : (string) $r->name,
|
||||
'brand' => $r->brand === null ? null : (string) $r->brand,
|
||||
'price_pence' => (int) $r->price_pence,
|
||||
'distance_km' => round((float) $r->distance_km, 2),
|
||||
])->all();
|
||||
}
|
||||
|
||||
/** @return array{0: ?float, 1: ?float} [supermarket_avg, major_avg] */
|
||||
private function brandSplit(string $fuelType, float $lat, float $lng, int $km): array
|
||||
{
|
||||
[$within, $bindings] = HaversineQuery::withinKm($lat, $lng, $km);
|
||||
|
||||
$rows = DB::table('station_prices_current')
|
||||
->join('stations', 'station_prices_current.station_id', '=', 'stations.node_id')
|
||||
->where('station_prices_current.fuel_type', $fuelType)
|
||||
->whereRaw($within, $bindings)
|
||||
->selectRaw('stations.is_supermarket, AVG(station_prices_current.price_pence) as avg_pence')
|
||||
->groupBy('stations.is_supermarket')
|
||||
->get();
|
||||
|
||||
$super = null;
|
||||
$major = null;
|
||||
foreach ($rows as $r) {
|
||||
$avg = round((float) $r->avg_pence / 100, 1);
|
||||
if ((int) $r->is_supermarket === 1) {
|
||||
$super = $avg;
|
||||
} else {
|
||||
$major = $avg;
|
||||
}
|
||||
}
|
||||
|
||||
return [$super, $major];
|
||||
}
|
||||
|
||||
private function stationCountWithin(string $fuelType, float $lat, float $lng, int $km): int
|
||||
{
|
||||
[$within, $bindings] = HaversineQuery::withinKm($lat, $lng, $km);
|
||||
|
||||
return DB::table('station_prices_current')
|
||||
->join('stations', 'station_prices_current.station_id', '=', 'stations.node_id')
|
||||
->where('station_prices_current.fuel_type', $fuelType)
|
||||
->whereRaw($within, $bindings)
|
||||
->count();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user