Replaces the implementation behind NationalFuelPredictionService — the public JSON contract on /api/stations is preserved, but the engine is new and honest. Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md): 1. Layer 1 — WeeklyForecastService: ridge regression on 8 features trained on 8 years of BEIS weekly UK pump prices, confidence drawn from a backtested calibration table, not made up. 2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over station_prices_current. Never speaks about the future. 3. Layer 3 — verdict via rule gates, not confidence multipliers. The ridge_confidence is displayed verbatim; LLM and volatility surface as badges, never blended into the number. 4. Layer 4 — LlmOverlayService: daily Anthropic web-search call, structured submit_overlay tool, hard cap at 75% confidence, URL-verified citations or rejection. 5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the active flag, OR-combined triggers (Brent move >3%, LLM major impact, station churn (gated), watched_events). Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the 8x8 normal-equation matrix. No external ML dependency. Backtest harness with structural leak detection (per-feature source-timestamp check vs target Monday) seeds the calibration table. Backtest gate (62–68% directional accuracy on the 130-week hold-out) ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change baseline by ~30pp on real data. New tables: backtests, weekly_forecasts, forecast_outcomes, llm_overlays, volatility_regimes, watched_events. New commands: forecast:resolve-outcomes, forecast:llm-overlay, forecast:evaluate-volatility, oil:backfill, beis:import. Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK, forecast:evaluate-volatility hourly, beis:import Mon 09:30, forecast:resolve-outcomes Mon 10:00. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
139 lines
3.9 KiB
PHP
139 lines
3.9 KiB
PHP
<?php
|
|
|
|
namespace App\Services\Forecasting;
|
|
|
|
use DateTime;
|
|
use Illuminate\Support\Facades\Cache;
|
|
use Illuminate\Support\Facades\DB;
|
|
use Illuminate\Support\Facades\Http;
|
|
use RuntimeException;
|
|
|
|
/**
|
|
* Pulls the latest "Weekly road fuel prices (CSV) 2018 to 2026"
|
|
* attachment from gov.uk's content API and upserts into
|
|
* `weekly_pump_prices`.
|
|
*
|
|
* Idempotent: re-running on a day with no new publication is a no-op
|
|
* (rows match by primary key `date`, content is unchanged).
|
|
*
|
|
* The forecast cache is busted at the end so the next API hit retrains
|
|
* the ridge model on the fresh row.
|
|
*/
|
|
final class BeisImporter
|
|
{
|
|
private const string API_URL = 'https://www.gov.uk/api/content/government/statistics/weekly-road-fuel-prices';
|
|
|
|
private const string ATTACHMENT_TITLE = 'Weekly road fuel prices (CSV) 2018 to 2026';
|
|
|
|
/**
|
|
* @return array{
|
|
* csv_url: string,
|
|
* parsed: int,
|
|
* upserted: int,
|
|
* latest_date: string,
|
|
* }
|
|
*/
|
|
public function import(): array
|
|
{
|
|
$url = $this->resolveCsvUrl();
|
|
$csv = $this->downloadCsv($url);
|
|
$rows = $this->parse($csv);
|
|
|
|
if ($rows === []) {
|
|
throw new RuntimeException('BEIS CSV parsed empty — check delimiter / encoding');
|
|
}
|
|
|
|
DB::table('weekly_pump_prices')->upsert(
|
|
$rows,
|
|
['date'],
|
|
['ulsp_pence', 'ulsd_pence', 'ulsp_duty_pence', 'ulsd_duty_pence', 'ulsp_vat_pct', 'ulsd_vat_pct'],
|
|
);
|
|
|
|
Cache::flush();
|
|
|
|
$latest = (string) collect($rows)->pluck('date')->sortDesc()->first();
|
|
|
|
return [
|
|
'csv_url' => $url,
|
|
'parsed' => count($rows),
|
|
'upserted' => count($rows),
|
|
'latest_date' => $latest,
|
|
];
|
|
}
|
|
|
|
private function resolveCsvUrl(): string
|
|
{
|
|
$response = Http::timeout(15)->acceptJson()->get(self::API_URL);
|
|
$response->throw();
|
|
|
|
$attachments = $response->json('details.attachments', []);
|
|
foreach ($attachments as $a) {
|
|
if (($a['title'] ?? null) === self::ATTACHMENT_TITLE) {
|
|
$url = $a['url'] ?? null;
|
|
if (! is_string($url) || $url === '') {
|
|
throw new RuntimeException('BEIS attachment had empty URL');
|
|
}
|
|
|
|
return $url;
|
|
}
|
|
}
|
|
|
|
throw new RuntimeException(sprintf(
|
|
'gov.uk content API did not return an attachment titled %s',
|
|
self::ATTACHMENT_TITLE,
|
|
));
|
|
}
|
|
|
|
private function downloadCsv(string $url): string
|
|
{
|
|
$response = Http::timeout(60)->get($url);
|
|
$response->throw();
|
|
|
|
return $response->body();
|
|
}
|
|
|
|
/**
|
|
* @return array<int, array<string, int|string>>
|
|
*/
|
|
private function parse(string $csv): array
|
|
{
|
|
$rows = [];
|
|
$lines = preg_split('/\r\n|\r|\n/', $csv);
|
|
if ($lines === false || count($lines) < 2) {
|
|
return [];
|
|
}
|
|
|
|
// Skip header.
|
|
array_shift($lines);
|
|
|
|
foreach ($lines as $line) {
|
|
$line = trim($line);
|
|
if ($line === '') {
|
|
continue;
|
|
}
|
|
|
|
$cols = str_getcsv($line, escape: '\\');
|
|
if (count($cols) < 7) {
|
|
continue;
|
|
}
|
|
|
|
$date = DateTime::createFromFormat('d/m/Y', trim($cols[0]));
|
|
if ($date === false) {
|
|
continue;
|
|
}
|
|
|
|
$rows[] = [
|
|
'date' => $date->format('Y-m-d'),
|
|
'ulsp_pence' => (int) round(((float) $cols[1]) * 100),
|
|
'ulsd_pence' => (int) round(((float) $cols[2]) * 100),
|
|
'ulsp_duty_pence' => (int) round(((float) $cols[3]) * 100),
|
|
'ulsd_duty_pence' => (int) round(((float) $cols[4]) * 100),
|
|
'ulsp_vat_pct' => (int) $cols[5],
|
|
'ulsd_vat_pct' => (int) $cols[6],
|
|
];
|
|
}
|
|
|
|
return $rows;
|
|
}
|
|
}
|