feat: add NationalFuelPredictionService with trend, day-of-week, brand-behaviour, and stickiness signals

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Ovidiu U
2026-04-04 19:13:00 +01:00
parent a30dbdfbba
commit 1c548eae87
2 changed files with 596 additions and 0 deletions

View File

@@ -0,0 +1,489 @@
<?php
namespace App\Services;
use App\Enums\FuelType;
use App\Models\StationPriceCurrent;
use Illuminate\Support\Facades\DB;
class NationalFuelPredictionService
{
private const float R_SQUARED_THRESHOLD = 0.5;
private const float SLOPE_THRESHOLD_PENCE = 0.3;
private const int PREDICTION_HORIZON_DAYS = 7;
/**
* @return array{
* fuel_type: string,
* current_avg: float,
* predicted_direction: string,
* predicted_change_pence: float,
* confidence_score: float,
* confidence_label: string,
* action: string,
* reasoning: string,
* prediction_horizon_days: int,
* region_key: string,
* methodology: string,
* signals: array
* }
*/
public function predict(FuelType $fuelType, ?float $lat = null, ?float $lng = null): array
{
$currentAvg = $this->getCurrentNationalAverage($fuelType);
$trend = $this->computeTrendSignal($fuelType);
$dayOfWeek = $this->computeDayOfWeekSignal($fuelType);
$brandBehaviour = $this->computeBrandBehaviourSignal($fuelType);
$stickiness = $this->computeStickinessSignal($fuelType);
$nationalMomentum = $this->disabledSignal('National momentum disabled for national predictions');
$regionalMomentum = $lat !== null && $lng !== null
? $this->computeRegionalMomentumSignal($fuelType, $lat, $lng)
: $this->disabledSignal('No coordinates provided for regional momentum analysis');
$signals = compact('trend', 'dayOfWeek', 'brandBehaviour', 'nationalMomentum', 'regionalMomentum', 'stickiness');
[$direction, $confidenceScore] = $this->aggregateSignals($signals);
$slope = $trend['slope'] ?? 0.0;
$predictedChangePence = round($slope * self::PREDICTION_HORIZON_DAYS, 1);
$confidenceLabel = match (true) {
$confidenceScore >= 70 => 'high',
$confidenceScore >= 40 => 'medium',
default => 'low',
};
$action = match ($direction) {
'up' => 'fill_now',
'down' => 'wait',
default => 'no_signal',
};
return [
'fuel_type' => $fuelType->value,
'current_avg' => $currentAvg,
'predicted_direction' => $direction,
'predicted_change_pence' => $predictedChangePence,
'confidence_score' => $confidenceScore,
'confidence_label' => $confidenceLabel,
'action' => $action,
'reasoning' => $this->buildReasoning($direction, $slope, $trend, $brandBehaviour),
'prediction_horizon_days' => self::PREDICTION_HORIZON_DAYS,
'region_key' => 'national',
'methodology' => 'multi_signal_live_fallback',
'signals' => [
'trend' => $trend,
'day_of_week' => $dayOfWeek,
'brand_behaviour' => $brandBehaviour,
'national_momentum' => $nationalMomentum,
'regional_momentum' => $regionalMomentum,
'price_stickiness' => $stickiness,
],
];
}
private function getCurrentNationalAverage(FuelType $fuelType): float
{
$avg = StationPriceCurrent::where('fuel_type', $fuelType->value)->avg('price_pence');
return $avg !== null ? round((float) $avg / 100, 1) : 0.0;
}
/**
* Linear regression on daily national average prices.
* Tries 5-day lookback first; falls back to 14-day if < threshold.
*
* @return array{score: float, confidence: float, direction: string, detail: string, data_points: int, enabled: bool, slope: float, r_squared: float}
*/
private function computeTrendSignal(FuelType $fuelType): array
{
foreach ([5, 14] as $lookbackDays) {
$rows = DB::table('station_prices')
->where('fuel_type', $fuelType->value)
->where('price_effective_at', '>=', now()->subDays($lookbackDays))
->selectRaw('DATE(price_effective_at) as day, AVG(price_pence) as avg_price')
->groupBy('day')
->orderBy('day')
->get();
if ($rows->count() < 2) {
continue;
}
$regression = $this->linearRegression($rows->pluck('avg_price')->map(fn ($v) => (float) $v)->values()->all());
if ($regression['r_squared'] >= self::R_SQUARED_THRESHOLD) {
$slope = $regression['slope'];
$direction = match (true) {
$slope >= self::SLOPE_THRESHOLD_PENCE => 'up',
$slope <= -self::SLOPE_THRESHOLD_PENCE => 'down',
default => 'stable',
};
$absSlope = abs($slope);
$score = $direction === 'stable' ? 0.0 : min(1.0, $absSlope / 2.0) * ($slope > 0 ? 1 : -1);
$projected = round($slope * $lookbackDays, 1);
$detail = $direction === 'stable'
? "Prices flat over {$lookbackDays} days (slope: {$slope}p/day, R²={$regression['r_squared']})"
: sprintf(
'%s at %sp/day over %d days (R²=%s, ~%s%sp in %dd)',
$slope > 0 ? 'Rising' : 'Falling',
abs(round($slope, 2)),
$lookbackDays,
round($regression['r_squared'], 2),
$projected > 0 ? '+' : '',
$projected,
self::PREDICTION_HORIZON_DAYS,
);
if ($lookbackDays === 5) {
$detail .= ' [Adaptive lookback active]';
}
return [
'score' => $score,
'confidence' => min(1.0, $regression['r_squared']),
'direction' => $direction,
'detail' => $detail,
'data_points' => $rows->count(),
'enabled' => true,
'slope' => round($slope, 3),
'r_squared' => round($regression['r_squared'], 3),
];
}
}
return [
'score' => 0.0,
'confidence' => 0.0,
'direction' => 'stable',
'detail' => 'Insufficient price history or noisy data (R² below threshold)',
'data_points' => 0,
'enabled' => false,
'slope' => 0.0,
'r_squared' => 0.0,
];
}
/**
* Compare today's average price against the per-weekday average over 90 days.
* Requires 56+ days of history to activate.
*
* @return array{score: float, confidence: float, direction: string, detail: string, data_points: int, enabled: bool}
*/
private function computeDayOfWeekSignal(FuelType $fuelType): array
{
$isSqlite = DB::connection()->getDriverName() === 'sqlite';
$dowExpr = $isSqlite
? "(CAST(strftime('%w', price_effective_at) AS INTEGER) + 1)"
: 'DAYOFWEEK(price_effective_at)';
$rows = DB::table('station_prices')
->where('fuel_type', $fuelType->value)
->where('price_effective_at', '>=', now()->subDays(90))
->selectRaw("{$dowExpr} as dow, DATE(price_effective_at) as day, AVG(price_pence) as avg_price")
->groupBy('dow', 'day')
->get();
$uniqueDays = $rows->pluck('day')->unique()->count();
if ($uniqueDays < 56) {
return $this->disabledSignal("Insufficient history for day-of-week pattern ({$uniqueDays} days, need 56)");
}
$dowAverages = $rows->groupBy('dow')->map(fn ($g) => $g->avg('avg_price'));
$weekAvg = $dowAverages->avg();
$todayDow = (int) now()->format('w') + 1; // PHP 0=Sun → MySQL 1=Sun
$todayAvg = $dowAverages->get($todayDow, $weekAvg);
$cheapestDow = $dowAverages->keys()->sortBy(fn ($k) => $dowAverages[$k])->first();
$dayNames = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'];
$cheapestDayName = $dayNames[($cheapestDow - 1) % 7] ?? 'Unknown';
$weekRange = round(($dowAverages->max() - $dowAverages->min()) / 100, 1);
$tomorrowDelta = round(($dowAverages->get(($todayDow % 7) + 1, $weekAvg) - $todayAvg) / 100, 1);
$direction = match (true) {
($todayAvg - $weekAvg) / 100 >= 1.5 => 'up',
($weekAvg - $todayAvg) / 100 >= 1.5 => 'down',
default => 'stable',
};
$score = $direction === 'stable' ? 0.0 : ($direction === 'up' ? 1.0 : -1.0);
return [
'score' => $score,
'confidence' => min(1.0, $uniqueDays / 90),
'direction' => $direction,
'detail' => "Cheapest day: {$cheapestDayName}. Weekly range: {$weekRange}p. Tomorrow typically {$tomorrowDelta}p less than today.",
'data_points' => $uniqueDays,
'enabled' => true,
];
}
/**
* Compare supermarket vs non-supermarket 7-day price trend.
* Detects divergence where one group has moved but the other hasn't yet.
*
* @return array{score: float, confidence: float, direction: string, detail: string, data_points: int, enabled: bool}
*/
private function computeBrandBehaviourSignal(FuelType $fuelType): array
{
$rows = DB::table('station_prices')
->join('stations', 'station_prices.station_id', '=', 'stations.node_id')
->where('station_prices.fuel_type', $fuelType->value)
->where('station_prices.price_effective_at', '>=', now()->subDays(7))
->selectRaw('stations.is_supermarket, DATE(station_prices.price_effective_at) as day, AVG(station_prices.price_pence) as avg_price')
->groupBy('stations.is_supermarket', 'day')
->orderBy('day')
->get();
$supermarket = $rows->where('is_supermarket', 1)->values();
$major = $rows->where('is_supermarket', 0)->values();
if ($supermarket->count() < 2 || $major->count() < 2) {
return $this->disabledSignal('Insufficient brand data for comparison');
}
$supermarketSlope = $this->linearRegression($supermarket->pluck('avg_price')->map(fn ($v) => (float) $v)->values()->all())['slope'];
$majorSlope = $this->linearRegression($major->pluck('avg_price')->map(fn ($v) => (float) $v)->values()->all())['slope'];
$divergence = round(abs($supermarketSlope - $majorSlope) * 7, 1);
$supermarketChange = round($supermarketSlope * 7, 1);
$majorChange = round($majorSlope * 7, 1);
if ($divergence < 1.0) {
return [
'score' => 0.0,
'confidence' => 0.5,
'direction' => 'stable',
'detail' => 'Supermarkets and majors moving in sync.',
'data_points' => $rows->count(),
'enabled' => true,
];
}
$leaderChange = abs($supermarketChange) > abs($majorChange) ? $supermarketChange : $majorChange;
$direction = $leaderChange > 0 ? 'up' : 'down';
$leader = abs($supermarketChange) > abs($majorChange) ? 'Supermarkets' : 'Majors';
$follower = $leader === 'Supermarkets' ? 'majors' : 'supermarkets';
$leaderAbs = abs($leaderChange);
$followerChange = $leader === 'Supermarkets' ? abs($majorChange) : abs($supermarketChange);
return [
'score' => $direction === 'up' ? 1.0 : -1.0,
'confidence' => min(1.0, $divergence / 5.0),
'direction' => $direction,
'detail' => "{$leader} ".($leaderChange > 0 ? 'rose' : 'fell')." {$leaderAbs}p vs {$follower} {$followerChange}p (divergence: {$divergence}p). Expect {$follower} to follow.",
'data_points' => $rows->count(),
'enabled' => true,
];
}
/**
* Average hold duration (days between price changes) as a confidence modifier.
* Requires 30+ days of history. Returns a score between -0.1 and +0.1.
*
* @return array{score: float, confidence: float, direction: string, detail: string, data_points: int, enabled: bool}
*/
private function computeStickinessSignal(FuelType $fuelType): array
{
$isSqlite = DB::connection()->getDriverName() === 'sqlite';
$diffExpr = $isSqlite
? 'CAST((julianday(MAX(price_effective_at)) - julianday(MIN(price_effective_at))) AS INTEGER)'
: 'DATEDIFF(MAX(price_effective_at), MIN(price_effective_at))';
$rows = DB::table('station_prices')
->where('fuel_type', $fuelType->value)
->where('price_effective_at', '>=', now()->subDays(30))
->selectRaw("station_id, COUNT(*) as changes, {$diffExpr} as span_days")
->groupBy('station_id')
->having('changes', '>', 1)
->having('span_days', '>', 0)
->get();
if ($rows->count() < 10) {
return $this->disabledSignal('Insufficient stickiness data (need 10+ stations with price history)');
}
$avgHoldDays = $rows->avg(fn ($r) => $r->span_days / ($r->changes - 1));
$avgHoldDays = round((float) $avgHoldDays, 1);
$score = match (true) {
$avgHoldDays < 2 => -0.1,
$avgHoldDays > 5 => 0.1,
default => 0.0,
};
$detail = match (true) {
$avgHoldDays < 2 => "Volatile prices (avg hold: {$avgHoldDays} days) — harder to predict.",
$avgHoldDays > 5 => "Sticky prices (avg hold: {$avgHoldDays} days) — more predictable.",
default => "Normal hold period (avg: {$avgHoldDays} days).",
};
return [
'score' => $score,
'confidence' => min(1.0, $rows->count() / 200),
'direction' => 'stable',
'detail' => $detail,
'data_points' => $rows->count(),
'enabled' => true,
];
}
/**
* Placeholder for regional momentum signal (requires lat/lng).
* Compares local station prices vs national average trend.
*
* @return array{score: float, confidence: float, direction: string, detail: string, data_points: int, enabled: bool}
*/
private function computeRegionalMomentumSignal(FuelType $fuelType, float $lat, float $lng): array
{
// Regional momentum: compare trend of stations within 50km vs national trend
$rows = DB::table('station_prices')
->join('stations', 'station_prices.station_id', '=', 'stations.node_id')
->where('station_prices.fuel_type', $fuelType->value)
->where('station_prices.price_effective_at', '>=', now()->subDays(14))
->whereRaw('(6371 * acos(LEAST(1.0, cos(radians(?)) * cos(radians(lat)) * cos(radians(lng) - radians(?)) + sin(radians(?)) * sin(radians(lat))))) <= 50', [$lat, $lng, $lat])
->selectRaw('DATE(station_prices.price_effective_at) as day, AVG(station_prices.price_pence) as avg_price')
->groupBy('day')
->orderBy('day')
->get();
if ($rows->count() < 3) {
return $this->disabledSignal('Insufficient regional data');
}
$regionalRegression = $this->linearRegression($rows->pluck('avg_price')->map(fn ($v) => (float) $v)->values()->all());
$direction = match (true) {
$regionalRegression['slope'] >= self::SLOPE_THRESHOLD_PENCE => 'up',
$regionalRegression['slope'] <= -self::SLOPE_THRESHOLD_PENCE => 'down',
default => 'stable',
};
return [
'score' => $direction === 'stable' ? 0.0 : ($direction === 'up' ? 0.7 : -0.7),
'confidence' => min(1.0, $regionalRegression['r_squared']),
'direction' => $direction,
'detail' => 'Regional trend: '.round($regionalRegression['slope'], 2).'p/day (R²='.round($regionalRegression['r_squared'], 2).')',
'data_points' => $rows->count(),
'enabled' => true,
];
}
/** @return array{score: float, confidence: float, direction: string, detail: string, data_points: int, enabled: bool} */
private function disabledSignal(string $detail): array
{
return [
'score' => 0.0,
'confidence' => 0.0,
'direction' => 'stable',
'detail' => $detail,
'data_points' => 0,
'enabled' => false,
];
}
/**
* Weighted aggregate of enabled signals.
* Returns [direction string, confidence score 0-100].
*
* @param array<string, array{score: float, confidence: float, enabled: bool}> $signals
* @return array{0: string, 1: float}
*/
private function aggregateSignals(array $signals): array
{
$weights = [
'trend' => 0.45,
'dayOfWeek' => 0.20,
'brandBehaviour' => 0.25,
'stickiness' => 0.10,
];
$weightedSum = 0.0;
$totalWeight = 0.0;
foreach ($weights as $key => $weight) {
$signal = $signals[$key] ?? null;
if ($signal && $signal['enabled']) {
$weightedSum += $signal['score'] * $signal['confidence'] * $weight;
$totalWeight += $weight;
}
}
if ($totalWeight < 0.01) {
return ['stable', 0.0];
}
$normalised = $weightedSum / $totalWeight;
$confidenceScore = round(min(100.0, abs($normalised) * 100), 1);
$direction = match (true) {
$normalised >= 0.1 => 'up',
$normalised <= -0.1 => 'down',
default => 'stable',
};
return [$direction, $confidenceScore];
}
/**
* Least-squares linear regression.
* x is the array index (day number), y is the price value.
*
* @param float[] $values
* @return array{slope: float, r_squared: float}
*/
private function linearRegression(array $values): array
{
$n = count($values);
if ($n < 2) {
return ['slope' => 0.0, 'r_squared' => 0.0];
}
$xMean = ($n - 1) / 2.0;
$yMean = array_sum($values) / $n;
$numerator = 0.0;
$denominator = 0.0;
foreach ($values as $i => $y) {
$x = $i - $xMean;
$numerator += $x * ($y - $yMean);
$denominator += $x * $x;
}
$slope = $denominator > 0.0 ? $numerator / $denominator : 0.0;
$ssRes = 0.0;
$ssTot = 0.0;
foreach ($values as $i => $y) {
$predicted = $yMean + $slope * ($i - $xMean);
$ssRes += ($y - $predicted) ** 2;
$ssTot += ($y - $yMean) ** 2;
}
$rSquared = $ssTot > 0.0 ? max(0.0, 1.0 - ($ssRes / $ssTot)) : 0.0;
return ['slope' => $slope, 'r_squared' => $rSquared];
}
private function buildReasoning(string $direction, float $slope, array $trend, array $brandBehaviour): string
{
$parts = [];
if ($trend['enabled'] && abs($slope) >= self::SLOPE_THRESHOLD_PENCE) {
$parts[] = $trend['detail'];
}
if ($brandBehaviour['enabled'] && $brandBehaviour['direction'] !== 'stable') {
$parts[] = $brandBehaviour['detail'];
}
if (empty($parts)) {
return 'No clear pattern — fill up at the cheapest station near you now.';
}
return implode(' ', $parts);
}
}