Implements PredictionRequest (fuel_type validation with ValueError→ValidationException), PredictionController delegating to NationalFuelPredictionService, and 5 feature tests. Also fixes LEAST() MySQL-only function to a CASE WHEN expression for SQLite test compatibility. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
490 lines
19 KiB
PHP
490 lines
19 KiB
PHP
<?php
|
|
|
|
namespace App\Services;
|
|
|
|
use App\Enums\FuelType;
|
|
use App\Models\StationPriceCurrent;
|
|
use Illuminate\Support\Facades\DB;
|
|
|
|
class NationalFuelPredictionService
|
|
{
|
|
private const float R_SQUARED_THRESHOLD = 0.5;
|
|
|
|
private const float SLOPE_THRESHOLD_PENCE = 0.3;
|
|
|
|
private const int PREDICTION_HORIZON_DAYS = 7;
|
|
|
|
/**
|
|
* @return array{
|
|
* fuel_type: string,
|
|
* current_avg: float,
|
|
* predicted_direction: string,
|
|
* predicted_change_pence: float,
|
|
* confidence_score: float,
|
|
* confidence_label: string,
|
|
* action: string,
|
|
* reasoning: string,
|
|
* prediction_horizon_days: int,
|
|
* region_key: string,
|
|
* methodology: string,
|
|
* signals: array
|
|
* }
|
|
*/
|
|
public function predict(FuelType $fuelType, ?float $lat = null, ?float $lng = null): array
|
|
{
|
|
$currentAvg = $this->getCurrentNationalAverage($fuelType);
|
|
$trend = $this->computeTrendSignal($fuelType);
|
|
$dayOfWeek = $this->computeDayOfWeekSignal($fuelType);
|
|
$brandBehaviour = $this->computeBrandBehaviourSignal($fuelType);
|
|
$stickiness = $this->computeStickinessSignal($fuelType);
|
|
|
|
$nationalMomentum = $this->disabledSignal('National momentum disabled for national predictions');
|
|
$regionalMomentum = $lat !== null && $lng !== null
|
|
? $this->computeRegionalMomentumSignal($fuelType, $lat, $lng)
|
|
: $this->disabledSignal('No coordinates provided for regional momentum analysis');
|
|
|
|
$signals = compact('trend', 'dayOfWeek', 'brandBehaviour', 'nationalMomentum', 'regionalMomentum', 'stickiness');
|
|
|
|
[$direction, $confidenceScore] = $this->aggregateSignals($signals);
|
|
|
|
$slope = $trend['slope'] ?? 0.0;
|
|
$predictedChangePence = round($slope * self::PREDICTION_HORIZON_DAYS, 1);
|
|
|
|
$confidenceLabel = match (true) {
|
|
$confidenceScore >= 70 => 'high',
|
|
$confidenceScore >= 40 => 'medium',
|
|
default => 'low',
|
|
};
|
|
|
|
$action = match ($direction) {
|
|
'up' => 'fill_now',
|
|
'down' => 'wait',
|
|
default => 'no_signal',
|
|
};
|
|
|
|
return [
|
|
'fuel_type' => $fuelType->value,
|
|
'current_avg' => $currentAvg,
|
|
'predicted_direction' => $direction,
|
|
'predicted_change_pence' => $predictedChangePence,
|
|
'confidence_score' => $confidenceScore,
|
|
'confidence_label' => $confidenceLabel,
|
|
'action' => $action,
|
|
'reasoning' => $this->buildReasoning($direction, $slope, $trend, $brandBehaviour),
|
|
'prediction_horizon_days' => self::PREDICTION_HORIZON_DAYS,
|
|
'region_key' => 'national',
|
|
'methodology' => 'multi_signal_live_fallback',
|
|
'signals' => [
|
|
'trend' => $trend,
|
|
'day_of_week' => $dayOfWeek,
|
|
'brand_behaviour' => $brandBehaviour,
|
|
'national_momentum' => $nationalMomentum,
|
|
'regional_momentum' => $regionalMomentum,
|
|
'price_stickiness' => $stickiness,
|
|
],
|
|
];
|
|
}
|
|
|
|
private function getCurrentNationalAverage(FuelType $fuelType): float
|
|
{
|
|
$avg = StationPriceCurrent::where('fuel_type', $fuelType->value)->avg('price_pence');
|
|
|
|
return $avg !== null ? round((float) $avg / 100, 1) : 0.0;
|
|
}
|
|
|
|
/**
|
|
* Linear regression on daily national average prices.
|
|
* Tries 5-day lookback first; falls back to 14-day if R² < threshold.
|
|
*
|
|
* @return array{score: float, confidence: float, direction: string, detail: string, data_points: int, enabled: bool, slope: float, r_squared: float}
|
|
*/
|
|
private function computeTrendSignal(FuelType $fuelType): array
|
|
{
|
|
foreach ([5, 14] as $lookbackDays) {
|
|
$rows = DB::table('station_prices')
|
|
->where('fuel_type', $fuelType->value)
|
|
->where('price_effective_at', '>=', now()->subDays($lookbackDays))
|
|
->selectRaw('DATE(price_effective_at) as day, AVG(price_pence) as avg_price')
|
|
->groupBy('day')
|
|
->orderBy('day')
|
|
->get();
|
|
|
|
if ($rows->count() < 2) {
|
|
continue;
|
|
}
|
|
|
|
$regression = $this->linearRegression($rows->pluck('avg_price')->map(fn ($v) => (float) $v)->values()->all());
|
|
|
|
if ($regression['r_squared'] >= self::R_SQUARED_THRESHOLD) {
|
|
$slope = $regression['slope'];
|
|
$direction = match (true) {
|
|
$slope >= self::SLOPE_THRESHOLD_PENCE => 'up',
|
|
$slope <= -self::SLOPE_THRESHOLD_PENCE => 'down',
|
|
default => 'stable',
|
|
};
|
|
$absSlope = abs($slope);
|
|
$score = $direction === 'stable' ? 0.0 : min(1.0, $absSlope / 2.0) * ($slope > 0 ? 1 : -1);
|
|
$projected = round($slope * $lookbackDays, 1);
|
|
$detail = $direction === 'stable'
|
|
? "Prices flat over {$lookbackDays} days (slope: {$slope}p/day, R²={$regression['r_squared']})"
|
|
: sprintf(
|
|
'%s at %sp/day over %d days (R²=%s, ~%s%sp in %dd)',
|
|
$slope > 0 ? 'Rising' : 'Falling',
|
|
abs(round($slope, 2)),
|
|
$lookbackDays,
|
|
round($regression['r_squared'], 2),
|
|
$projected > 0 ? '+' : '',
|
|
$projected,
|
|
self::PREDICTION_HORIZON_DAYS,
|
|
);
|
|
|
|
if ($lookbackDays === 5) {
|
|
$detail .= ' [Adaptive lookback active]';
|
|
}
|
|
|
|
return [
|
|
'score' => $score,
|
|
'confidence' => min(1.0, $regression['r_squared']),
|
|
'direction' => $direction,
|
|
'detail' => $detail,
|
|
'data_points' => $rows->count(),
|
|
'enabled' => true,
|
|
'slope' => round($slope, 3),
|
|
'r_squared' => round($regression['r_squared'], 3),
|
|
];
|
|
}
|
|
}
|
|
|
|
return [
|
|
'score' => 0.0,
|
|
'confidence' => 0.0,
|
|
'direction' => 'stable',
|
|
'detail' => 'Insufficient price history or noisy data (R² below threshold)',
|
|
'data_points' => 0,
|
|
'enabled' => false,
|
|
'slope' => 0.0,
|
|
'r_squared' => 0.0,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Compare today's average price against the per-weekday average over 90 days.
|
|
* Requires 56+ days of history to activate.
|
|
*
|
|
* @return array{score: float, confidence: float, direction: string, detail: string, data_points: int, enabled: bool}
|
|
*/
|
|
private function computeDayOfWeekSignal(FuelType $fuelType): array
|
|
{
|
|
$isSqlite = DB::connection()->getDriverName() === 'sqlite';
|
|
$dowExpr = $isSqlite
|
|
? "(CAST(strftime('%w', price_effective_at) AS INTEGER) + 1)"
|
|
: 'DAYOFWEEK(price_effective_at)';
|
|
|
|
$rows = DB::table('station_prices')
|
|
->where('fuel_type', $fuelType->value)
|
|
->where('price_effective_at', '>=', now()->subDays(90))
|
|
->selectRaw("{$dowExpr} as dow, DATE(price_effective_at) as day, AVG(price_pence) as avg_price")
|
|
->groupBy('dow', 'day')
|
|
->get();
|
|
|
|
$uniqueDays = $rows->pluck('day')->unique()->count();
|
|
|
|
if ($uniqueDays < 56) {
|
|
return $this->disabledSignal("Insufficient history for day-of-week pattern ({$uniqueDays} days, need 56)");
|
|
}
|
|
|
|
$dowAverages = $rows->groupBy('dow')->map(fn ($g) => $g->avg('avg_price'));
|
|
$weekAvg = $dowAverages->avg();
|
|
$todayDow = (int) now()->format('w') + 1; // PHP 0=Sun → MySQL 1=Sun
|
|
$todayAvg = $dowAverages->get($todayDow, $weekAvg);
|
|
$cheapestDow = $dowAverages->keys()->sortBy(fn ($k) => $dowAverages[$k])->first();
|
|
$dayNames = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'];
|
|
$cheapestDayName = $dayNames[($cheapestDow - 1) % 7] ?? 'Unknown';
|
|
$weekRange = round(($dowAverages->max() - $dowAverages->min()) / 100, 1);
|
|
$tomorrowDelta = round(($dowAverages->get(($todayDow % 7) + 1, $weekAvg) - $todayAvg) / 100, 1);
|
|
|
|
$direction = match (true) {
|
|
($todayAvg - $weekAvg) / 100 >= 1.5 => 'up',
|
|
($weekAvg - $todayAvg) / 100 >= 1.5 => 'down',
|
|
default => 'stable',
|
|
};
|
|
|
|
$score = $direction === 'stable' ? 0.0 : ($direction === 'up' ? 1.0 : -1.0);
|
|
|
|
return [
|
|
'score' => $score,
|
|
'confidence' => min(1.0, $uniqueDays / 90),
|
|
'direction' => $direction,
|
|
'detail' => "Cheapest day: {$cheapestDayName}. Weekly range: {$weekRange}p. Tomorrow typically {$tomorrowDelta}p less than today.",
|
|
'data_points' => $uniqueDays,
|
|
'enabled' => true,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Compare supermarket vs non-supermarket 7-day price trend.
|
|
* Detects divergence where one group has moved but the other hasn't yet.
|
|
*
|
|
* @return array{score: float, confidence: float, direction: string, detail: string, data_points: int, enabled: bool}
|
|
*/
|
|
private function computeBrandBehaviourSignal(FuelType $fuelType): array
|
|
{
|
|
$rows = DB::table('station_prices')
|
|
->join('stations', 'station_prices.station_id', '=', 'stations.node_id')
|
|
->where('station_prices.fuel_type', $fuelType->value)
|
|
->where('station_prices.price_effective_at', '>=', now()->subDays(7))
|
|
->selectRaw('stations.is_supermarket, DATE(station_prices.price_effective_at) as day, AVG(station_prices.price_pence) as avg_price')
|
|
->groupBy('stations.is_supermarket', 'day')
|
|
->orderBy('day')
|
|
->get();
|
|
|
|
$supermarket = $rows->where('is_supermarket', 1)->values();
|
|
$major = $rows->where('is_supermarket', 0)->values();
|
|
|
|
if ($supermarket->count() < 2 || $major->count() < 2) {
|
|
return $this->disabledSignal('Insufficient brand data for comparison');
|
|
}
|
|
|
|
$supermarketSlope = $this->linearRegression($supermarket->pluck('avg_price')->map(fn ($v) => (float) $v)->values()->all())['slope'];
|
|
$majorSlope = $this->linearRegression($major->pluck('avg_price')->map(fn ($v) => (float) $v)->values()->all())['slope'];
|
|
|
|
$divergence = round(abs($supermarketSlope - $majorSlope) * 7, 1);
|
|
$supermarketChange = round($supermarketSlope * 7, 1);
|
|
$majorChange = round($majorSlope * 7, 1);
|
|
|
|
if ($divergence < 1.0) {
|
|
return [
|
|
'score' => 0.0,
|
|
'confidence' => 0.5,
|
|
'direction' => 'stable',
|
|
'detail' => 'Supermarkets and majors moving in sync.',
|
|
'data_points' => $rows->count(),
|
|
'enabled' => true,
|
|
];
|
|
}
|
|
|
|
$leaderChange = abs($supermarketChange) > abs($majorChange) ? $supermarketChange : $majorChange;
|
|
$direction = $leaderChange > 0 ? 'up' : 'down';
|
|
$leader = abs($supermarketChange) > abs($majorChange) ? 'Supermarkets' : 'Majors';
|
|
$follower = $leader === 'Supermarkets' ? 'majors' : 'supermarkets';
|
|
$leaderAbs = abs($leaderChange);
|
|
$followerChange = $leader === 'Supermarkets' ? abs($majorChange) : abs($supermarketChange);
|
|
|
|
return [
|
|
'score' => $direction === 'up' ? 1.0 : -1.0,
|
|
'confidence' => min(1.0, $divergence / 5.0),
|
|
'direction' => $direction,
|
|
'detail' => "{$leader} ".($leaderChange > 0 ? 'rose' : 'fell')." {$leaderAbs}p vs {$follower} {$followerChange}p (divergence: {$divergence}p). Expect {$follower} to follow.",
|
|
'data_points' => $rows->count(),
|
|
'enabled' => true,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Average hold duration (days between price changes) as a confidence modifier.
|
|
* Requires 30+ days of history. Returns a score between -0.1 and +0.1.
|
|
*
|
|
* @return array{score: float, confidence: float, direction: string, detail: string, data_points: int, enabled: bool}
|
|
*/
|
|
private function computeStickinessSignal(FuelType $fuelType): array
|
|
{
|
|
$isSqlite = DB::connection()->getDriverName() === 'sqlite';
|
|
$diffExpr = $isSqlite
|
|
? 'CAST((julianday(MAX(price_effective_at)) - julianday(MIN(price_effective_at))) AS INTEGER)'
|
|
: 'DATEDIFF(MAX(price_effective_at), MIN(price_effective_at))';
|
|
|
|
$rows = DB::table('station_prices')
|
|
->where('fuel_type', $fuelType->value)
|
|
->where('price_effective_at', '>=', now()->subDays(30))
|
|
->selectRaw("station_id, COUNT(*) as changes, {$diffExpr} as span_days")
|
|
->groupBy('station_id')
|
|
->having('changes', '>', 1)
|
|
->having('span_days', '>', 0)
|
|
->get();
|
|
|
|
if ($rows->count() < 10) {
|
|
return $this->disabledSignal('Insufficient stickiness data (need 10+ stations with price history)');
|
|
}
|
|
|
|
$avgHoldDays = $rows->avg(fn ($r) => $r->span_days / ($r->changes - 1));
|
|
$avgHoldDays = round((float) $avgHoldDays, 1);
|
|
|
|
$score = match (true) {
|
|
$avgHoldDays < 2 => -0.1,
|
|
$avgHoldDays > 5 => 0.1,
|
|
default => 0.0,
|
|
};
|
|
|
|
$detail = match (true) {
|
|
$avgHoldDays < 2 => "Volatile prices (avg hold: {$avgHoldDays} days) — harder to predict.",
|
|
$avgHoldDays > 5 => "Sticky prices (avg hold: {$avgHoldDays} days) — more predictable.",
|
|
default => "Normal hold period (avg: {$avgHoldDays} days).",
|
|
};
|
|
|
|
return [
|
|
'score' => $score,
|
|
'confidence' => min(1.0, $rows->count() / 200),
|
|
'direction' => 'stable',
|
|
'detail' => $detail,
|
|
'data_points' => $rows->count(),
|
|
'enabled' => true,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Placeholder for regional momentum signal (requires lat/lng).
|
|
* Compares local station prices vs national average trend.
|
|
*
|
|
* @return array{score: float, confidence: float, direction: string, detail: string, data_points: int, enabled: bool}
|
|
*/
|
|
private function computeRegionalMomentumSignal(FuelType $fuelType, float $lat, float $lng): array
|
|
{
|
|
// Regional momentum: compare trend of stations within 50km vs national trend
|
|
$rows = DB::table('station_prices')
|
|
->join('stations', 'station_prices.station_id', '=', 'stations.node_id')
|
|
->where('station_prices.fuel_type', $fuelType->value)
|
|
->where('station_prices.price_effective_at', '>=', now()->subDays(14))
|
|
->whereRaw('(6371 * acos(CASE WHEN (cos(radians(?)) * cos(radians(lat)) * cos(radians(lng) - radians(?)) + sin(radians(?)) * sin(radians(lat))) > 1.0 THEN 1.0 ELSE (cos(radians(?)) * cos(radians(lat)) * cos(radians(lng) - radians(?)) + sin(radians(?)) * sin(radians(lat))) END)) <= 50', [$lat, $lng, $lat, $lat, $lng, $lat])
|
|
->selectRaw('DATE(station_prices.price_effective_at) as day, AVG(station_prices.price_pence) as avg_price')
|
|
->groupBy('day')
|
|
->orderBy('day')
|
|
->get();
|
|
|
|
if ($rows->count() < 3) {
|
|
return $this->disabledSignal('Insufficient regional data');
|
|
}
|
|
|
|
$regionalRegression = $this->linearRegression($rows->pluck('avg_price')->map(fn ($v) => (float) $v)->values()->all());
|
|
$direction = match (true) {
|
|
$regionalRegression['slope'] >= self::SLOPE_THRESHOLD_PENCE => 'up',
|
|
$regionalRegression['slope'] <= -self::SLOPE_THRESHOLD_PENCE => 'down',
|
|
default => 'stable',
|
|
};
|
|
|
|
return [
|
|
'score' => $direction === 'stable' ? 0.0 : ($direction === 'up' ? 0.7 : -0.7),
|
|
'confidence' => min(1.0, $regionalRegression['r_squared']),
|
|
'direction' => $direction,
|
|
'detail' => 'Regional trend: '.round($regionalRegression['slope'], 2).'p/day (R²='.round($regionalRegression['r_squared'], 2).')',
|
|
'data_points' => $rows->count(),
|
|
'enabled' => true,
|
|
];
|
|
}
|
|
|
|
/** @return array{score: float, confidence: float, direction: string, detail: string, data_points: int, enabled: bool} */
|
|
private function disabledSignal(string $detail): array
|
|
{
|
|
return [
|
|
'score' => 0.0,
|
|
'confidence' => 0.0,
|
|
'direction' => 'stable',
|
|
'detail' => $detail,
|
|
'data_points' => 0,
|
|
'enabled' => false,
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Weighted aggregate of enabled signals.
|
|
* Returns [direction string, confidence score 0-100].
|
|
*
|
|
* @param array<string, array{score: float, confidence: float, enabled: bool}> $signals
|
|
* @return array{0: string, 1: float}
|
|
*/
|
|
private function aggregateSignals(array $signals): array
|
|
{
|
|
$weights = [
|
|
'trend' => 0.45,
|
|
'dayOfWeek' => 0.20,
|
|
'brandBehaviour' => 0.25,
|
|
'stickiness' => 0.10,
|
|
];
|
|
|
|
$weightedSum = 0.0;
|
|
$totalWeight = 0.0;
|
|
|
|
foreach ($weights as $key => $weight) {
|
|
$signal = $signals[$key] ?? null;
|
|
if ($signal && $signal['enabled']) {
|
|
$weightedSum += $signal['score'] * $signal['confidence'] * $weight;
|
|
$totalWeight += $weight;
|
|
}
|
|
}
|
|
|
|
if ($totalWeight < 0.01) {
|
|
return ['stable', 0.0];
|
|
}
|
|
|
|
$normalised = $weightedSum / $totalWeight;
|
|
$confidenceScore = round(min(100.0, abs($normalised) * 100), 1);
|
|
|
|
$direction = match (true) {
|
|
$normalised >= 0.1 => 'up',
|
|
$normalised <= -0.1 => 'down',
|
|
default => 'stable',
|
|
};
|
|
|
|
return [$direction, $confidenceScore];
|
|
}
|
|
|
|
/**
|
|
* Least-squares linear regression.
|
|
* x is the array index (day number), y is the price value.
|
|
*
|
|
* @param float[] $values
|
|
* @return array{slope: float, r_squared: float}
|
|
*/
|
|
private function linearRegression(array $values): array
|
|
{
|
|
$n = count($values);
|
|
if ($n < 2) {
|
|
return ['slope' => 0.0, 'r_squared' => 0.0];
|
|
}
|
|
|
|
$xMean = ($n - 1) / 2.0;
|
|
$yMean = array_sum($values) / $n;
|
|
|
|
$numerator = 0.0;
|
|
$denominator = 0.0;
|
|
|
|
foreach ($values as $i => $y) {
|
|
$x = $i - $xMean;
|
|
$numerator += $x * ($y - $yMean);
|
|
$denominator += $x * $x;
|
|
}
|
|
|
|
$slope = $denominator > 0.0 ? $numerator / $denominator : 0.0;
|
|
|
|
$ssRes = 0.0;
|
|
$ssTot = 0.0;
|
|
foreach ($values as $i => $y) {
|
|
$predicted = $yMean + $slope * ($i - $xMean);
|
|
$ssRes += ($y - $predicted) ** 2;
|
|
$ssTot += ($y - $yMean) ** 2;
|
|
}
|
|
|
|
$rSquared = $ssTot > 0.0 ? max(0.0, 1.0 - ($ssRes / $ssTot)) : 0.0;
|
|
|
|
return ['slope' => $slope, 'r_squared' => $rSquared];
|
|
}
|
|
|
|
private function buildReasoning(string $direction, float $slope, array $trend, array $brandBehaviour): string
|
|
{
|
|
$parts = [];
|
|
|
|
if ($trend['enabled'] && abs($slope) >= self::SLOPE_THRESHOLD_PENCE) {
|
|
$parts[] = $trend['detail'];
|
|
}
|
|
|
|
if ($brandBehaviour['enabled'] && $brandBehaviour['direction'] !== 'stable') {
|
|
$parts[] = $brandBehaviour['detail'];
|
|
}
|
|
|
|
if (empty($parts)) {
|
|
return 'No clear pattern — fill up at the cheapest station near you now.';
|
|
}
|
|
|
|
return implode(' ', $parts);
|
|
}
|
|
}
|