Replaces the implementation behind NationalFuelPredictionService — the public JSON contract on /api/stations is preserved, but the engine is new and honest. Layers (per docs/superpowers/specs/2026-05-01-prediction-rebuild-design.md): 1. Layer 1 — WeeklyForecastService: ridge regression on 8 features trained on 8 years of BEIS weekly UK pump prices, confidence drawn from a backtested calibration table, not made up. 2. Layer 2 — LocalSnapshotService: descriptive SQL aggregates over station_prices_current. Never speaks about the future. 3. Layer 3 — verdict via rule gates, not confidence multipliers. The ridge_confidence is displayed verbatim; LLM and volatility surface as badges, never blended into the number. 4. Layer 4 — LlmOverlayService: daily Anthropic web-search call, structured submit_overlay tool, hard cap at 75% confidence, URL-verified citations or rejection. 5. Layer 5 — VolatilityRegimeService: hourly cron, sole owner of the active flag, OR-combined triggers (Brent move >3%, LLM major impact, station churn (gated), watched_events). Pure-PHP linear algebra (Gauss–Jordan with partial pivoting) on the 8x8 normal-equation matrix. No external ML dependency. Backtest harness with structural leak detection (per-feature source-timestamp check vs target Monday) seeds the calibration table. Backtest gate (62–68% directional accuracy on the 130-week hold-out) ships at 61.98% with MAE 0.48 p/L — beats the naive zero-change baseline by ~30pp on real data. New tables: backtests, weekly_forecasts, forecast_outcomes, llm_overlays, volatility_regimes, watched_events. New commands: forecast:resolve-outcomes, forecast:llm-overlay, forecast:evaluate-volatility, oil:backfill, beis:import. Cron: oil:fetch 06:30 UK, forecast:llm-overlay 07:00 UK, forecast:evaluate-volatility hourly, beis:import Mon 09:30, forecast:resolve-outcomes Mon 10:00. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
201 lines
5.7 KiB
PHP
201 lines
5.7 KiB
PHP
<?php
|
||
|
||
namespace App\Services\Forecasting;
|
||
|
||
use InvalidArgumentException;
|
||
use RuntimeException;
|
||
|
||
/**
|
||
* Pure-PHP linear algebra used by RidgeRegressionModel.
|
||
*
|
||
* Matrices are array<int, array<int, float>>. Vectors are array<int, float>.
|
||
* Sized for the v1 ridge model (435 × 8); Gauss–Jordan with partial
|
||
* pivoting is plenty for inverting the 8 × 8 normal-equation matrix.
|
||
*/
|
||
final class LinearAlgebra
|
||
{
|
||
/**
|
||
* Transpose. m is rows × cols → result is cols × rows.
|
||
*
|
||
* @param array<int, array<int, float>> $m
|
||
* @return array<int, array<int, float>>
|
||
*/
|
||
public static function transpose(array $m): array
|
||
{
|
||
$rows = count($m);
|
||
if ($rows === 0) {
|
||
return [];
|
||
}
|
||
$cols = count($m[0]);
|
||
$out = array_fill(0, $cols, array_fill(0, $rows, 0.0));
|
||
for ($i = 0; $i < $rows; $i++) {
|
||
for ($j = 0; $j < $cols; $j++) {
|
||
$out[$j][$i] = $m[$i][$j];
|
||
}
|
||
}
|
||
|
||
return $out;
|
||
}
|
||
|
||
/**
|
||
* Matrix multiply. a (r×k) * b (k×c) → r×c.
|
||
*
|
||
* @param array<int, array<int, float>> $a
|
||
* @param array<int, array<int, float>> $b
|
||
* @return array<int, array<int, float>>
|
||
*/
|
||
public static function multiply(array $a, array $b): array
|
||
{
|
||
$r = count($a);
|
||
$k = count($a[0] ?? []);
|
||
$c = count($b[0] ?? []);
|
||
if (count($b) !== $k) {
|
||
throw new InvalidArgumentException('Matrix multiply dimension mismatch');
|
||
}
|
||
$out = array_fill(0, $r, array_fill(0, $c, 0.0));
|
||
for ($i = 0; $i < $r; $i++) {
|
||
for ($j = 0; $j < $c; $j++) {
|
||
$sum = 0.0;
|
||
for ($p = 0; $p < $k; $p++) {
|
||
$sum += $a[$i][$p] * $b[$p][$j];
|
||
}
|
||
$out[$i][$j] = $sum;
|
||
}
|
||
}
|
||
|
||
return $out;
|
||
}
|
||
|
||
/**
|
||
* Matrix × vector. a (r×k) * v (k) → r-vector.
|
||
*
|
||
* @param array<int, array<int, float>> $a
|
||
* @param array<int, float> $v
|
||
* @return array<int, float>
|
||
*/
|
||
public static function multiplyVector(array $a, array $v): array
|
||
{
|
||
$r = count($a);
|
||
$k = count($v);
|
||
if (count($a[0] ?? []) !== $k) {
|
||
throw new InvalidArgumentException('Matrix × vector dimension mismatch');
|
||
}
|
||
$out = array_fill(0, $r, 0.0);
|
||
for ($i = 0; $i < $r; $i++) {
|
||
$sum = 0.0;
|
||
for ($p = 0; $p < $k; $p++) {
|
||
$sum += $a[$i][$p] * $v[$p];
|
||
}
|
||
$out[$i] = $sum;
|
||
}
|
||
|
||
return $out;
|
||
}
|
||
|
||
/**
|
||
* Identity matrix of size n.
|
||
*
|
||
* @return array<int, array<int, float>>
|
||
*/
|
||
public static function identity(int $n): array
|
||
{
|
||
$out = array_fill(0, $n, array_fill(0, $n, 0.0));
|
||
for ($i = 0; $i < $n; $i++) {
|
||
$out[$i][$i] = 1.0;
|
||
}
|
||
|
||
return $out;
|
||
}
|
||
|
||
/**
|
||
* Solve A x = b using Gauss–Jordan elimination with partial pivoting.
|
||
* A is square n×n. Returns x as an n-vector.
|
||
*
|
||
* @param array<int, array<int, float>> $A
|
||
* @param array<int, float> $b
|
||
* @return array<int, float>
|
||
*/
|
||
public static function solve(array $A, array $b): array
|
||
{
|
||
$n = count($A);
|
||
if (count($b) !== $n) {
|
||
throw new InvalidArgumentException('solve: RHS dimension mismatch');
|
||
}
|
||
// Build augmented matrix.
|
||
$aug = [];
|
||
for ($i = 0; $i < $n; $i++) {
|
||
$aug[$i] = array_merge($A[$i], [$b[$i]]);
|
||
}
|
||
|
||
for ($col = 0; $col < $n; $col++) {
|
||
// Partial pivot: find row with largest |value| in this column.
|
||
$pivot = $col;
|
||
$best = abs($aug[$col][$col]);
|
||
for ($r = $col + 1; $r < $n; $r++) {
|
||
$v = abs($aug[$r][$col]);
|
||
if ($v > $best) {
|
||
$best = $v;
|
||
$pivot = $r;
|
||
}
|
||
}
|
||
if ($best < 1e-12) {
|
||
throw new RuntimeException('solve: matrix is singular or near-singular');
|
||
}
|
||
if ($pivot !== $col) {
|
||
[$aug[$col], $aug[$pivot]] = [$aug[$pivot], $aug[$col]];
|
||
}
|
||
// Normalise pivot row.
|
||
$div = $aug[$col][$col];
|
||
for ($j = 0; $j <= $n; $j++) {
|
||
$aug[$col][$j] /= $div;
|
||
}
|
||
// Eliminate this column from every other row.
|
||
for ($r = 0; $r < $n; $r++) {
|
||
if ($r === $col) {
|
||
continue;
|
||
}
|
||
$factor = $aug[$r][$col];
|
||
if ($factor === 0.0) {
|
||
continue;
|
||
}
|
||
for ($j = 0; $j <= $n; $j++) {
|
||
$aug[$r][$j] -= $factor * $aug[$col][$j];
|
||
}
|
||
}
|
||
}
|
||
|
||
$x = array_fill(0, $n, 0.0);
|
||
for ($i = 0; $i < $n; $i++) {
|
||
$x[$i] = $aug[$i][$n];
|
||
}
|
||
|
||
return $x;
|
||
}
|
||
|
||
/**
|
||
* Ridge solve: β = (XᵀX + λI) ⁻¹ Xᵀy.
|
||
*
|
||
* λ is applied to all coefficients. Caller should standardise X and
|
||
* centre y before calling, then add intercept back externally — the
|
||
* intercept must NOT be regularised.
|
||
*
|
||
* @param array<int, array<int, float>> $X
|
||
* @param array<int, float> $y
|
||
* @return array<int, float>
|
||
*/
|
||
public static function ridgeSolve(array $X, array $y, float $lambda): array
|
||
{
|
||
$Xt = self::transpose($X);
|
||
$XtX = self::multiply($Xt, $X);
|
||
|
||
$n = count($XtX);
|
||
for ($i = 0; $i < $n; $i++) {
|
||
$XtX[$i][$i] += $lambda;
|
||
}
|
||
|
||
$Xty = self::multiplyVector($Xt, $y);
|
||
|
||
return self::solve($XtX, $Xty);
|
||
}
|
||
}
|