fuel-alert/tests/Unit/Services/Forecasting/BacktestRunnerTest.php

<?php

use App\Models\Backtest;
use App\Services\Forecasting\BacktestRunner;
use App\Services\Forecasting\Contracts\ForecastFeature;
use App\Services\Forecasting\Contracts\WeeklyForecastModel;
use App\Services\Forecasting\FeatureSpec;
use App\Services\Forecasting\LeakDetectorException;
use App\Services\Forecasting\WeeklyPrediction;
use Carbon\Carbon;
use Carbon\CarbonInterface;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Illuminate\Support\Facades\DB;

uses(RefreshDatabase::class);

/**
 * Builds a simple feature reading the previous week's value.
 * Source date offset is configurable so we can simulate leakage.
 */
function backtestFeature(string $name, int $offsetDays = -7): ForecastFeature
{
    return new class($name, $offsetDays) implements ForecastFeature
    {
        public function __construct(
            private readonly string $featureName,
            private readonly int $offsetDays,
        ) {}

        public function name(): string
        {
            return $this->featureName;
        }

        public function valueFor(CarbonInterface $targetMonday): float
        {
            return 0.0;
        }

        public function sourceDates(CarbonInterface $targetMonday): array
        {
            return [$targetMonday->copy()->addDays($this->offsetDays)];
        }
    };
}

/**
 * Stub model: predicts a fixed magnitude every week. Lets us craft
 * specific accuracy / MAE outcomes for assertions.
 */
function stubModel(float $alwaysPredictPence, string $modelLabel = 'stub'): WeeklyForecastModel
{
    return new class($alwaysPredictPence, $modelLabel) implements WeeklyForecastModel
    {
        public function __construct(
            private readonly float $alwaysPredictPence,
            private readonly string $modelLabel,
        ) {}

        public function featureSpec(): FeatureSpec
        {
            return new FeatureSpec(
                modelLabel: $this->modelLabel,
                features: [backtestFeature('lag_1w')],
            );
        }

        public function train(array $trainingMondays): void {}

        public function predict(CarbonInterface $targetMonday): WeeklyPrediction
        {
            return new WeeklyPrediction(
                targetMonday: $targetMonday,
                magnitudePence: $this->alwaysPredictPence,
                direction: match (true) {
                    $this->alwaysPredictPence > 0.2 => 'rising',
                    $this->alwaysPredictPence < -0.2 => 'falling',
                    default => 'flat',
                },
            );
        }

        public function coefficients(): ?array
        {
            return null;
        }
    };
}

function seedWeeklyPumpPrices(): void
{
    // 8 weeks of synthetic prices, gently rising
    $start = Carbon::parse('2024-01-01');
    for ($i = 0; $i < 8; $i++) {
        DB::table('weekly_pump_prices')->insert([
            'date' => $start->copy()->addWeeks($i)->toDateString(),
            'ulsp_pence' => 14000 + ($i * 100), // each week +1p
            'ulsd_pence' => 15000 + ($i * 80),
            'ulsp_duty_pence' => 5295,
            'ulsd_duty_pence' => 5295,
            'ulsp_vat_pct' => 20,
            'ulsd_vat_pct' => 20,
        ]);
    }
}

it('refuses to run when the spec has structural leakage', function () {
    seedWeeklyPumpPrices();

    $leaky = new class implements WeeklyForecastModel
    {
        public function featureSpec(): FeatureSpec
        {
            return new FeatureSpec(
                modelLabel: 'leaky',
                features: [backtestFeature('reads_target_week', 0)],
            );
        }

        public function train(array $trainingMondays): void {}

        public function predict(CarbonInterface $targetMonday): WeeklyPrediction
        {
            return new WeeklyPrediction($targetMonday, 0.0, 'flat');
        }

        public function coefficients(): ?array
        {
            return null;
        }
    };

    (new BacktestRunner)->run(
        $leaky,
        trainStart: Carbon::parse('2024-01-01'),
        trainEnd: Carbon::parse('2024-01-29'),
        evalStart: Carbon::parse('2024-02-05'),
        evalEnd: Carbon::parse('2024-02-19'),
    );
})->throws(LeakDetectorException::class);

it('persists a backtest row with metrics for a clean run', function () {
    seedWeeklyPumpPrices();

    $result = (new BacktestRunner)->run(
        stubModel(alwaysPredictPence: 100.0), // always predicts +1p
        trainStart: Carbon::parse('2024-01-01'),
        trainEnd: Carbon::parse('2024-01-29'),
        evalStart: Carbon::parse('2024-02-05'),
        evalEnd: Carbon::parse('2024-02-19'),
    );

    expect($result)->toBeInstanceOf(Backtest::class);
    expect(Backtest::query()->count())->toBe(1);

    $row = Backtest::query()->first();
    expect($row->model_version)->toStartWith('stub-')
        ->and($row->train_start->toDateString())->toBe('2024-01-01')
        ->and($row->eval_end->toDateString())->toBe('2024-02-19')
        ->and($row->ran_at)->not->toBeNull();
});

it('computes 100% directional accuracy when stub always nails the direction', function () {
    seedWeeklyPumpPrices();

    // Series rises by 1p every week, so direction is always 'rising'.
    // Stub always predicts +1p (rising) → direction should always match.
    $result = (new BacktestRunner)->run(
        stubModel(alwaysPredictPence: 100.0),
        trainStart: Carbon::parse('2024-01-01'),
        trainEnd: Carbon::parse('2024-01-29'),
        evalStart: Carbon::parse('2024-02-05'),
        evalEnd: Carbon::parse('2024-02-19'),
    );

    expect((float) $result->directional_accuracy)->toBe(100.0);
});

it('computes 0% directional accuracy when stub always picks the wrong direction', function () {
    seedWeeklyPumpPrices();

    // Series rises every week, but stub predicts -1p (falling) → 0% accuracy.
    $result = (new BacktestRunner)->run(
        stubModel(alwaysPredictPence: -100.0),
        trainStart: Carbon::parse('2024-01-01'),
        trainEnd: Carbon::parse('2024-01-29'),
        evalStart: Carbon::parse('2024-02-05'),
        evalEnd: Carbon::parse('2024-02-19'),
    );

    expect((float) $result->directional_accuracy)->toBe(0.0);
});

it('flags leak_suspected when directional accuracy exceeds 75%', function () {
    seedWeeklyPumpPrices();

    $result = (new BacktestRunner)->run(
        stubModel(alwaysPredictPence: 100.0), // always right → 100%
        trainStart: Carbon::parse('2024-01-01'),
        trainEnd: Carbon::parse('2024-01-29'),
        evalStart: Carbon::parse('2024-02-05'),
        evalEnd: Carbon::parse('2024-02-19'),
    );

    expect($result->leak_suspected)->toBeTrue();
});

it('does not flag leak_suspected for realistic accuracy', function () {
    seedWeeklyPumpPrices();

    // Use same direction as data so we get reasonable but not suspicious accuracy.
    // Stub flat → wrong every week (data is rising) → 0%, well below 75.
    $result = (new BacktestRunner)->run(
        stubModel(alwaysPredictPence: 0.0),
        trainStart: Carbon::parse('2024-01-01'),
        trainEnd: Carbon::parse('2024-01-29'),
        evalStart: Carbon::parse('2024-02-05'),
        evalEnd: Carbon::parse('2024-02-19'),
    );

    expect($result->leak_suspected)->toBeFalse();
});