fix(forecasting): persist LLM overlay under Tier-1 ITPM via two-call architecture
The daily forecast:llm-overlay command was being skipped because the previous single-conversation flow consumed more than Tier-1's 50,000 input-tokens-per- minute Anthropic bucket. The web_search tool auto-caches its results (~55k tokens) and requires `encrypted_content` intact when those blocks are resent, so the prior retry-on-missing-citations path either 429'd or 400'd on the second call. LlmOverlayService now runs two independent API calls. Phase 1 invokes the web_search tool and we discard the transcript after harvesting the URLs + titles from the returned web_search_tool_result blocks. Phase 2 is a fresh conversation containing the forecast context and the harvested headlines as plain text, with a forced submit_overlay tool call. events_cited is now optional in the tool schema — Haiku's flaky compliance no longer matters because citations come from the search results, not the model's transcription. Model-tagged events (with directional impact) merge with harvested-only entries (impact: 'neutral'), deduped by URL. Between phases the service reads anthropic-ratelimit-input-tokens-remaining / …-reset from Phase 1's headers and sleeps proportionally — only long enough for the SUBMIT_TOKEN_BUDGET worth of refill, not for the full bucket reset, capped at 65 seconds. ApiLogger now captures usage.input_tokens, usage.output_tokens, cache_read_input_tokens, cache_creation_input_tokens, plus the rate-limit remaining/reset headers on every Anthropic response. New nullable columns on api_logs make rate-limit diagnostics directly queryable. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -7,7 +7,21 @@ use Illuminate\Database\Eloquent\Attributes\Fillable;
|
||||
use Illuminate\Database\Eloquent\Factories\HasFactory;
|
||||
use Illuminate\Database\Eloquent\Model;
|
||||
|
||||
#[Fillable(['service', 'method', 'url', 'status_code', 'duration_ms', 'error', 'response_body'])]
|
||||
#[Fillable([
|
||||
'service',
|
||||
'method',
|
||||
'url',
|
||||
'status_code',
|
||||
'duration_ms',
|
||||
'error',
|
||||
'response_body',
|
||||
'input_tokens',
|
||||
'output_tokens',
|
||||
'cache_read_tokens',
|
||||
'cache_write_tokens',
|
||||
'ratelimit_remaining',
|
||||
'ratelimit_reset_at',
|
||||
])]
|
||||
class ApiLog extends Model
|
||||
{
|
||||
/** @use HasFactory<ApiLogFactory> */
|
||||
@@ -19,6 +33,7 @@ class ApiLog extends Model
|
||||
{
|
||||
return [
|
||||
'created_at' => 'datetime',
|
||||
'ratelimit_reset_at' => 'datetime',
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,10 +34,12 @@ class ApiLogger
|
||||
$statusCode = null;
|
||||
$error = null;
|
||||
$responseBody = null;
|
||||
$usage = [];
|
||||
|
||||
try {
|
||||
$response = $request();
|
||||
$statusCode = $response->status();
|
||||
$usage = $this->extractUsage($response);
|
||||
|
||||
if ($response->failed()) {
|
||||
$body = $response->body();
|
||||
@@ -53,6 +55,7 @@ class ApiLogger
|
||||
// doesn't. Pull the body when it's available.
|
||||
if ($e instanceof RequestException) {
|
||||
$responseBody = $this->truncate($e->response->body());
|
||||
$usage = $this->extractUsage($e->response);
|
||||
}
|
||||
|
||||
throw $e;
|
||||
@@ -65,6 +68,7 @@ class ApiLogger
|
||||
'duration_ms' => (int) round((microtime(true) - $start) * 1000),
|
||||
'error' => $error,
|
||||
'response_body' => $responseBody,
|
||||
...$usage,
|
||||
]);
|
||||
}
|
||||
}
|
||||
@@ -75,4 +79,39 @@ class ApiLogger
|
||||
? substr($body, 0, self::RESPONSE_BODY_CAP)
|
||||
: $body;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull token-usage and rate-limit telemetry from a provider response.
|
||||
*
|
||||
* Today only Anthropic exposes both. Other providers return mostly
|
||||
* NULLs — callers don't need to know which is which.
|
||||
*
|
||||
* @return array<string, int|string|null>
|
||||
*/
|
||||
private function extractUsage(?Response $response): array
|
||||
{
|
||||
if ($response === null) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$usage = $response->json('usage');
|
||||
$tokens = is_array($usage) ? $usage : [];
|
||||
|
||||
$reset = $response->header('anthropic-ratelimit-input-tokens-reset');
|
||||
$remaining = $response->header('anthropic-ratelimit-input-tokens-remaining');
|
||||
|
||||
return [
|
||||
'input_tokens' => $this->intOrNull($tokens['input_tokens'] ?? null),
|
||||
'output_tokens' => $this->intOrNull($tokens['output_tokens'] ?? null),
|
||||
'cache_read_tokens' => $this->intOrNull($tokens['cache_read_input_tokens'] ?? null),
|
||||
'cache_write_tokens' => $this->intOrNull($tokens['cache_creation_input_tokens'] ?? null),
|
||||
'ratelimit_remaining' => $this->intOrNull($remaining !== '' ? $remaining : null),
|
||||
'ratelimit_reset_at' => $reset !== '' ? $reset : null,
|
||||
];
|
||||
}
|
||||
|
||||
private function intOrNull(mixed $value): ?int
|
||||
{
|
||||
return is_numeric($value) ? (int) $value : null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,9 @@ use App\Models\BrentPrice;
|
||||
use App\Models\LlmOverlay;
|
||||
use App\Models\VolatilityRegime;
|
||||
use App\Services\ApiLogger;
|
||||
use Carbon\CarbonImmutable;
|
||||
use Carbon\CarbonInterface;
|
||||
use Illuminate\Http\Client\Response;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
use Illuminate\Support\Facades\Http;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
@@ -15,9 +17,21 @@ use Throwable;
|
||||
/**
|
||||
* Layer 4 — daily news-aware overlay on the calibrated ridge forecast.
|
||||
*
|
||||
* Calls Anthropic Haiku with the web_search tool, then forces a
|
||||
* submit_overlay tool call to get structured output. Cites events with
|
||||
* URLs; URLs are verified before storing. Empty citations → rejection.
|
||||
* Runs as two independent Anthropic API calls:
|
||||
* Phase 1 — web_search tool only; we capture the URLs/titles from
|
||||
* the returned web_search_tool_result blocks.
|
||||
* Phase 2 — fresh conversation containing those URLs+titles as plain
|
||||
* text plus a forced submit_overlay tool call.
|
||||
*
|
||||
* Phase 1's transcript is never sent back to Phase 2. Anthropic's
|
||||
* web_search auto-caches the encrypted page text (~55k tokens per
|
||||
* search) and requires it intact when web_search_tool_result blocks
|
||||
* are resent. Threading it through to Phase 2 either blows the Tier-1
|
||||
* 50k ITPM bucket or 400s if we try to strip it. Two clean calls keep
|
||||
* Phase 2 around 3k input tokens.
|
||||
*
|
||||
* Citations are harvested directly from Phase 1's web_search_tool_result
|
||||
* blocks — Haiku is unreliable about populating `events_cited` itself.
|
||||
*
|
||||
* Read-only with respect to the volatility flag — Layer 4 writes its
|
||||
* `llm_overlays` row; Layer 5's hourly cron picks it up and decides
|
||||
@@ -31,6 +45,15 @@ final class LlmOverlayService
|
||||
|
||||
private const int COOLDOWN_HOURS = 4;
|
||||
|
||||
private const int MAX_SEARCH_TURNS = 2;
|
||||
|
||||
/**
|
||||
* Approximate input-token cost of Phase 2 (system + tool schema +
|
||||
* forecast context + harvested URL list). If Phase 1 leaves
|
||||
* remaining ITPM below this, wait for the bucket to refill.
|
||||
*/
|
||||
private const int SUBMIT_TOKEN_BUDGET = 4_000;
|
||||
|
||||
public function __construct(
|
||||
private readonly ApiLogger $apiLogger,
|
||||
private readonly WeeklyForecastService $weeklyForecast,
|
||||
@@ -55,19 +78,24 @@ final class LlmOverlayService
|
||||
$forecast = $this->weeklyForecast->currentForecast();
|
||||
$context = $this->buildContext($forecast);
|
||||
|
||||
$rawResult = $this->callAnthropic($context);
|
||||
if ($rawResult === null) {
|
||||
$callResult = $this->callAnthropic($context);
|
||||
if ($callResult === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$verifiedEvents = $this->verifyCitedUrls($rawResult['events_cited'] ?? []);
|
||||
$rawResult = $callResult['raw'];
|
||||
$harvested = $callResult['harvested'];
|
||||
|
||||
$mergedEvents = $this->mergeEvents($rawResult['events_cited'] ?? [], $harvested);
|
||||
$verifiedEvents = $this->verifyCitedUrls($mergedEvents);
|
||||
|
||||
if ($verifiedEvents === []) {
|
||||
Log::warning('LlmOverlayService: no verified citations, rejecting overlay', [
|
||||
'events_cited_count' => count($rawResult['events_cited'] ?? []),
|
||||
'model_events' => $rawResult['events_cited'] ?? null,
|
||||
'harvested_urls' => array_column($harvested, 'url'),
|
||||
'direction' => $rawResult['direction'] ?? null,
|
||||
'confidence' => $rawResult['confidence'] ?? null,
|
||||
'reasoning_short' => $rawResult['reasoning_short'] ?? null,
|
||||
'raw_result' => $rawResult,
|
||||
]);
|
||||
|
||||
return null;
|
||||
@@ -131,70 +159,44 @@ final class LlmOverlayService
|
||||
];
|
||||
}
|
||||
|
||||
/** @return array<string, mixed>|null */
|
||||
/**
|
||||
* Two independent API calls:
|
||||
*
|
||||
* Phase 1 — runs the web_search tool, captures the assistant's
|
||||
* returned `web_search_tool_result` blocks, then
|
||||
* discards the transcript.
|
||||
*
|
||||
* Phase 2 — issues a brand-new conversation with the harvested
|
||||
* URLs/titles flattened into a plain-text user message
|
||||
* and forces a `submit_overlay` tool call.
|
||||
*
|
||||
* Why not one stitched conversation: Anthropic auto-caches web_search
|
||||
* results into ITPM (≈55k tokens for a 1-search call) and requires
|
||||
* `encrypted_content` intact when those blocks are sent back.
|
||||
* Resending the Phase 1 transcript to Phase 2 either rate-limits us
|
||||
* (29k+ tokens twice → exceeds the Tier-1 50k ITPM bucket) or 400s
|
||||
* if we strip the encrypted blob. A fresh Phase 2 sends ~3k tokens
|
||||
* total — small enough to fit in the recovered bucket after a
|
||||
* short adaptive sleep.
|
||||
*
|
||||
* @return array{raw: array<string, mixed>, harvested: array<int, array{url: string, title: string}>}|null
|
||||
*/
|
||||
private function callAnthropic(array $context): ?array
|
||||
{
|
||||
$messages = [['role' => 'user', 'content' => $this->prompt($context)]];
|
||||
|
||||
try {
|
||||
// Phase 1: web search loop. Append the assistant turn after every
|
||||
// successful response, then decide whether to keep looping —
|
||||
// this guarantees the messages array stays well-formed regardless
|
||||
// of whether we exit via `break` or by exhausting iterations.
|
||||
for ($i = 0, $response = null; $i < 5; $i++) {
|
||||
$response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(45)
|
||||
->withHeaders($this->headers())
|
||||
->post(self::URL, [
|
||||
'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'),
|
||||
'max_tokens' => 1024,
|
||||
'tools' => [['type' => 'web_search_20250305', 'name' => 'web_search']],
|
||||
'messages' => $messages,
|
||||
]));
|
||||
|
||||
if (! $response->successful()) {
|
||||
Log::error('LlmOverlayService: search request failed', ['status' => $response->status()]);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
$messages[] = ['role' => 'assistant', 'content' => $response->json('content')];
|
||||
|
||||
if ($response->json('stop_reason') !== 'pause_turn') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$messages[] = ['role' => 'user', 'content' => 'Now submit your overlay using the submit_overlay tool. Cite at least one event with a URL.'];
|
||||
|
||||
// Phase 2: forced structured output
|
||||
$submitResponse = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20)
|
||||
->withHeaders($this->headers())
|
||||
->post(self::URL, [
|
||||
'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'),
|
||||
'max_tokens' => 512,
|
||||
'tools' => [$this->submitOverlayTool()],
|
||||
'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'],
|
||||
'messages' => $messages,
|
||||
]));
|
||||
|
||||
if (! $submitResponse->successful()) {
|
||||
Log::error('LlmOverlayService: submit request failed', ['status' => $submitResponse->status()]);
|
||||
|
||||
$phase1 = $this->runWebSearch($context);
|
||||
if ($phase1 === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$submitContent = $submitResponse->json('content') ?? [];
|
||||
$rawResult = $this->extractToolInput($submitContent);
|
||||
$this->waitForRateLimitIfNeeded($phase1['response']);
|
||||
|
||||
// Haiku sometimes calls submit_overlay without `events_cited` even
|
||||
// though the schema marks it required. Confirmed in laravel.log on
|
||||
// 2026-05-12: tool_use input had only direction/confidence/reasoning.
|
||||
// Retry once with an explicit tool_result error.
|
||||
if ($this->citationsMissing($rawResult)) {
|
||||
$rawResult = $this->retrySubmitWithCitationError($messages, $submitContent) ?? $rawResult;
|
||||
$rawResult = $this->runSubmit($context, $phase1['harvested']);
|
||||
if ($rawResult === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return $rawResult;
|
||||
return ['raw' => $rawResult, 'harvested' => $phase1['harvested']];
|
||||
} catch (Throwable $e) {
|
||||
Log::error('LlmOverlayService: callAnthropic failed', ['error' => $e->getMessage()]);
|
||||
|
||||
@@ -202,6 +204,239 @@ final class LlmOverlayService
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Phase 1: ask the model to search for news and capture the
|
||||
* web_search_tool_result blocks. Returns the harvested citations
|
||||
* and the final response (whose rate-limit headers tell us when
|
||||
* the ITPM bucket will be replenished for Phase 2).
|
||||
*
|
||||
* @return array{harvested: array<int, array{url: string, title: string}>, response: Response}|null
|
||||
*/
|
||||
private function runWebSearch(array $context): ?array
|
||||
{
|
||||
$messages = [['role' => 'user', 'content' => $this->searchUserMessage($context)]];
|
||||
$response = null;
|
||||
|
||||
for ($i = 0; $i < self::MAX_SEARCH_TURNS; $i++) {
|
||||
$response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(45)
|
||||
->withHeaders($this->headers())
|
||||
->post(self::URL, [
|
||||
'model' => $this->model(),
|
||||
'max_tokens' => 1024,
|
||||
'system' => $this->searchSystem(),
|
||||
'tools' => [['type' => 'web_search_20250305', 'name' => 'web_search']],
|
||||
'messages' => $messages,
|
||||
]));
|
||||
|
||||
if (! $response->successful()) {
|
||||
Log::error('LlmOverlayService: search request failed', [
|
||||
'status' => $response->status(),
|
||||
'body' => substr($response->body(), 0, 500),
|
||||
]);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
$messages[] = ['role' => 'assistant', 'content' => $response->json('content')];
|
||||
|
||||
if ($response->json('stop_reason') !== 'pause_turn') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($response === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return [
|
||||
'harvested' => $this->harvestSearchResults($messages),
|
||||
'response' => $response,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Phase 2: fresh API call — no Phase 1 transcript — with the
|
||||
* harvested citations as plain text and a forced submit_overlay
|
||||
* tool call.
|
||||
*
|
||||
* @param array<int, array{url: string, title: string}> $harvested
|
||||
* @return array<string, mixed>|null
|
||||
*/
|
||||
private function runSubmit(array $context, array $harvested): ?array
|
||||
{
|
||||
$response = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20)
|
||||
->withHeaders($this->headers())
|
||||
->post(self::URL, [
|
||||
'model' => $this->model(),
|
||||
'max_tokens' => 512,
|
||||
'system' => $this->submitSystem(),
|
||||
'tools' => [$this->submitOverlayTool()],
|
||||
'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'],
|
||||
'messages' => [['role' => 'user', 'content' => $this->submitUserMessage($context, $harvested)]],
|
||||
]));
|
||||
|
||||
if (! $response->successful()) {
|
||||
Log::error('LlmOverlayService: submit request failed', [
|
||||
'status' => $response->status(),
|
||||
'body' => substr($response->body(), 0, 500),
|
||||
]);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
$rawResult = $this->extractToolInput($response->json('content') ?? []);
|
||||
if ($rawResult === null) {
|
||||
Log::warning('LlmOverlayService: submit response missing tool_use block');
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return $rawResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Anthropic's web_search burns ≈55k input tokens (mostly auto-cached
|
||||
* search results) on Phase 1. At Tier 1's 50k ITPM the bucket can
|
||||
* be at zero immediately afterwards. Read the rate-limit headers
|
||||
* and sleep until the bucket has refilled enough for Phase 2.
|
||||
* Capped at 65s so the daily cron never hangs longer than a minute.
|
||||
*/
|
||||
private function waitForRateLimitIfNeeded(Response $response): void
|
||||
{
|
||||
$remaining = (int) $response->header('anthropic-ratelimit-input-tokens-remaining');
|
||||
if ($response->header('anthropic-ratelimit-input-tokens-remaining') === ''
|
||||
|| $remaining >= self::SUBMIT_TOKEN_BUDGET) {
|
||||
return;
|
||||
}
|
||||
|
||||
$resetAt = $response->header('anthropic-ratelimit-input-tokens-reset');
|
||||
$bucketSize = (int) $response->header('anthropic-ratelimit-input-tokens-limit');
|
||||
if ($resetAt === '' || $bucketSize <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
$secondsUntilFullReset = max(0, CarbonImmutable::parse($resetAt)->getTimestamp() - now()->getTimestamp());
|
||||
} catch (Throwable) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Anthropic's bucket refills linearly. We don't need to wait for
|
||||
// the full reset — only enough for SUBMIT_TOKEN_BUDGET tokens to
|
||||
// become available. Sleep proportionally + a small safety margin,
|
||||
// hard-capped at 65s.
|
||||
$tokensNeeded = self::SUBMIT_TOKEN_BUDGET - $remaining;
|
||||
$proportional = (int) ceil(($tokensNeeded / $bucketSize) * $secondsUntilFullReset);
|
||||
$waitSeconds = max(1, min(65, $proportional + 2));
|
||||
|
||||
Log::info('LlmOverlayService: waiting for ITPM bucket refill before submit', [
|
||||
'remaining' => $remaining,
|
||||
'wait_seconds' => $waitSeconds,
|
||||
'full_reset_in' => $secondsUntilFullReset,
|
||||
]);
|
||||
|
||||
sleep($waitSeconds);
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk every assistant turn and extract `{url, title}` from each
|
||||
* `web_search_tool_result` block. Anthropic's web_search returns
|
||||
* these blocks directly — they are the authoritative citation
|
||||
* source, not anything the model transcribes back to us.
|
||||
*
|
||||
* @param array<int, array<string, mixed>> $messages
|
||||
* @return array<int, array{url: string, title: string}>
|
||||
*/
|
||||
private function harvestSearchResults(array $messages): array
|
||||
{
|
||||
$byUrl = [];
|
||||
foreach ($messages as $message) {
|
||||
if (($message['role'] ?? null) !== 'assistant') {
|
||||
continue;
|
||||
}
|
||||
$content = $message['content'] ?? [];
|
||||
if (! is_array($content)) {
|
||||
continue;
|
||||
}
|
||||
foreach ($content as $block) {
|
||||
if (! is_array($block) || ($block['type'] ?? null) !== 'web_search_tool_result') {
|
||||
continue;
|
||||
}
|
||||
$results = $block['content'] ?? [];
|
||||
if (! is_array($results)) {
|
||||
continue;
|
||||
}
|
||||
foreach ($results as $result) {
|
||||
if (! is_array($result) || ($result['type'] ?? null) !== 'web_search_result') {
|
||||
continue;
|
||||
}
|
||||
$url = (string) ($result['url'] ?? '');
|
||||
if ($url === '' || isset($byUrl[$url])) {
|
||||
continue;
|
||||
}
|
||||
$byUrl[$url] = ['url' => $url, 'title' => (string) ($result['title'] ?? '')];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return array_values($byUrl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge model-provided events_cited with citations harvested from
|
||||
* `web_search_tool_result`. Model entries (which include `impact`
|
||||
* tagging) take precedence on URL collision; harvested-only entries
|
||||
* default to `impact: 'neutral'`.
|
||||
*
|
||||
* @param array<int, mixed> $modelEvents
|
||||
* @param array<int, array{url: string, title: string}> $harvested
|
||||
* @return array<int, array<string, mixed>>
|
||||
*/
|
||||
private function mergeEvents(array $modelEvents, array $harvested): array
|
||||
{
|
||||
$byUrl = [];
|
||||
|
||||
foreach ($modelEvents as $event) {
|
||||
if (! is_array($event)) {
|
||||
continue;
|
||||
}
|
||||
$url = (string) ($event['url'] ?? '');
|
||||
if ($url === '') {
|
||||
continue;
|
||||
}
|
||||
$byUrl[$url] = [
|
||||
'headline' => (string) ($event['headline'] ?? ''),
|
||||
'source' => (string) ($event['source'] ?? ''),
|
||||
'url' => $url,
|
||||
'impact' => in_array($event['impact'] ?? null, ['rising', 'falling', 'neutral'], true)
|
||||
? $event['impact']
|
||||
: 'neutral',
|
||||
];
|
||||
}
|
||||
|
||||
foreach ($harvested as $result) {
|
||||
$url = $result['url'];
|
||||
if (isset($byUrl[$url])) {
|
||||
continue;
|
||||
}
|
||||
$byUrl[$url] = [
|
||||
'headline' => $result['title'],
|
||||
'source' => $this->domainOf($url),
|
||||
'url' => $url,
|
||||
'impact' => 'neutral',
|
||||
];
|
||||
}
|
||||
|
||||
return array_values($byUrl);
|
||||
}
|
||||
|
||||
private function domainOf(string $url): string
|
||||
{
|
||||
$host = parse_url($url, PHP_URL_HOST);
|
||||
|
||||
return is_string($host) ? preg_replace('/^www\./', '', $host) : '';
|
||||
}
|
||||
|
||||
private function verificationUserAgent(): string
|
||||
{
|
||||
$appUrl = rtrim((string) config('app.url'), '/');
|
||||
@@ -320,37 +555,61 @@ final class LlmOverlayService
|
||||
return config('services.anthropic.api_key');
|
||||
}
|
||||
|
||||
private function prompt(array $context): string
|
||||
private function model(): string
|
||||
{
|
||||
$json = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
|
||||
|
||||
return <<<PROMPT
|
||||
You are providing a daily news-aware overlay for a UK weekly pump-price forecast.
|
||||
|
||||
The calibrated ridge model has already produced a directional call from price history.
|
||||
Your job is to search recent oil/fuel news and decide whether to AGREE or DISAGREE
|
||||
— and most importantly, surface any major-impact event that the ridge model can't see
|
||||
from price history alone.
|
||||
return config('services.anthropic.model', 'claude-haiku-4-5-20251001');
|
||||
}
|
||||
|
||||
private function searchSystem(): string
|
||||
{
|
||||
return <<<'PROMPT'
|
||||
You are researching news that may affect this week's UK pump-price forecast.
|
||||
Search recent news (last 48 hours) for:
|
||||
- OPEC+ production decisions or unexpected announcements
|
||||
- Geopolitical events affecting oil supply (sanctions, conflict, shipping disruption)
|
||||
- Major refinery outages or pipeline incidents
|
||||
- US/EU inventory reports that materially moved Brent
|
||||
|
||||
Context for this week:
|
||||
$json
|
||||
|
||||
After searching, you will be asked to submit_overlay with direction, confidence
|
||||
(capped at $this->confidenceCap), short reasoning, cited events with URLs,
|
||||
agrees_with_ridge, and major_impact_event.
|
||||
|
||||
Citing events with REAL URLs is mandatory. An empty citation array will be
|
||||
rejected and the overlay discarded.
|
||||
Return only the search results — you will be asked to summarise separately.
|
||||
PROMPT;
|
||||
}
|
||||
|
||||
private string $confidenceCap = '75';
|
||||
private function searchUserMessage(array $context): string
|
||||
{
|
||||
$json = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
|
||||
|
||||
return "Use web_search to find oil/fuel news from the last 48 hours that could move UK pump prices this week.\n\nContext for this week:\n\n".$json;
|
||||
}
|
||||
|
||||
private function submitSystem(): string
|
||||
{
|
||||
$cap = self::CONFIDENCE_CAP;
|
||||
|
||||
return <<<PROMPT
|
||||
You are providing a news-aware directional overlay for a UK weekly pump-price forecast.
|
||||
Decide whether to AGREE or DISAGREE with the ridge model based on the news headlines
|
||||
provided in the user message. Cap confidence at $cap.
|
||||
Include events_cited (with impact tags) for any specific headline that drove your
|
||||
reasoning; you may leave events_cited empty if the news is unremarkable.
|
||||
PROMPT;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, array{url: string, title: string}> $harvested
|
||||
*/
|
||||
private function submitUserMessage(array $context, array $harvested): string
|
||||
{
|
||||
$contextJson = json_encode($context, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
|
||||
|
||||
if ($harvested === []) {
|
||||
$headlines = '(none — no relevant news found)';
|
||||
} else {
|
||||
$headlines = collect($harvested)
|
||||
->map(fn (array $r): string => '- '.$r['title'].' — '.$r['url'])
|
||||
->implode("\n");
|
||||
}
|
||||
|
||||
return "Context for this week:\n\n".$contextJson."\n\nNews headlines found:\n".$headlines."\n\nNow call submit_overlay with your decision.";
|
||||
}
|
||||
|
||||
/** @return array<string, mixed> */
|
||||
private function submitOverlayTool(): array
|
||||
@@ -366,7 +625,7 @@ final class LlmOverlayService
|
||||
'reasoning_short' => ['type' => 'string', 'description' => '1–2 sentences.'],
|
||||
'events_cited' => [
|
||||
'type' => 'array',
|
||||
'minItems' => 1,
|
||||
'description' => 'Optional. Events that drove your reasoning, with directional impact. Citations are otherwise harvested from web_search_tool_result.',
|
||||
'items' => [
|
||||
'type' => 'object',
|
||||
'properties' => [
|
||||
@@ -381,7 +640,7 @@ final class LlmOverlayService
|
||||
'agrees_with_ridge' => ['type' => 'boolean'],
|
||||
'major_impact_event' => ['type' => 'boolean'],
|
||||
],
|
||||
'required' => ['direction', 'confidence', 'reasoning_short', 'events_cited', 'agrees_with_ridge', 'major_impact_event'],
|
||||
'required' => ['direction', 'confidence', 'reasoning_short', 'agrees_with_ridge', 'major_impact_event'],
|
||||
],
|
||||
];
|
||||
}
|
||||
@@ -396,57 +655,4 @@ final class LlmOverlayService
|
||||
|
||||
return $block['input'] ?? null;
|
||||
}
|
||||
|
||||
/** @param array<string, mixed>|null $rawResult */
|
||||
private function citationsMissing(?array $rawResult): bool
|
||||
{
|
||||
return $rawResult === null
|
||||
|| ! isset($rawResult['events_cited'])
|
||||
|| ! is_array($rawResult['events_cited'])
|
||||
|| $rawResult['events_cited'] === [];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, mixed> $messages
|
||||
* @param array<int, mixed> $failedSubmitContent
|
||||
* @return array<string, mixed>|null
|
||||
*/
|
||||
private function retrySubmitWithCitationError(array $messages, array $failedSubmitContent): ?array
|
||||
{
|
||||
$toolUseId = collect($failedSubmitContent)->firstWhere('type', 'tool_use')['id'] ?? null;
|
||||
|
||||
if ($toolUseId === null) {
|
||||
Log::warning('LlmOverlayService: cannot retry — no tool_use id in failed submit');
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
Log::info('LlmOverlayService: retrying submit with citation error', ['tool_use_id' => $toolUseId]);
|
||||
|
||||
$messages[] = ['role' => 'assistant', 'content' => $failedSubmitContent];
|
||||
$messages[] = ['role' => 'user', 'content' => [[
|
||||
'type' => 'tool_result',
|
||||
'tool_use_id' => $toolUseId,
|
||||
'content' => 'events_cited was missing or empty. Resubmit submit_overlay with at least one event from your earlier web search results, including its real URL, headline, source, and impact.',
|
||||
'is_error' => true,
|
||||
]]];
|
||||
|
||||
$retryResponse = $this->apiLogger->send('anthropic', 'POST', self::URL, fn () => Http::timeout(20)
|
||||
->withHeaders($this->headers())
|
||||
->post(self::URL, [
|
||||
'model' => config('services.anthropic.model', 'claude-haiku-4-5-20251001'),
|
||||
'max_tokens' => 512,
|
||||
'tools' => [$this->submitOverlayTool()],
|
||||
'tool_choice' => ['type' => 'tool', 'name' => 'submit_overlay'],
|
||||
'messages' => $messages,
|
||||
]));
|
||||
|
||||
if (! $retryResponse->successful()) {
|
||||
Log::error('LlmOverlayService: retry submit failed', ['status' => $retryResponse->status()]);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return $this->extractToolInput($retryResponse->json('content') ?? []);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
<?php
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
/**
|
||||
* Capture token usage and rate-limit headers from token-metering
|
||||
* providers (today: Anthropic). These columns let us see the
|
||||
* cumulative input-tokens-per-minute trajectory directly in
|
||||
* api_logs rather than inferring it from request counts.
|
||||
*/
|
||||
public function up(): void
|
||||
{
|
||||
Schema::table('api_logs', function (Blueprint $table) {
|
||||
$table->unsignedInteger('input_tokens')->nullable()->after('response_body')
|
||||
->comment('Input tokens billed (Anthropic usage.input_tokens). NULL for providers that do not report usage.');
|
||||
$table->unsignedInteger('output_tokens')->nullable()->after('input_tokens')
|
||||
->comment('Output tokens billed (Anthropic usage.output_tokens).');
|
||||
$table->unsignedInteger('cache_read_tokens')->nullable()->after('output_tokens')
|
||||
->comment('Cache-hit tokens (Anthropic usage.cache_read_input_tokens). Do not count toward ITPM on most models.');
|
||||
$table->unsignedInteger('cache_write_tokens')->nullable()->after('cache_read_tokens')
|
||||
->comment('Cache-write tokens (Anthropic usage.cache_creation_input_tokens). Count toward ITPM.');
|
||||
$table->unsignedInteger('ratelimit_remaining')->nullable()->after('cache_write_tokens')
|
||||
->comment('Provider-reported input-tokens remaining in the rolling window (anthropic-ratelimit-input-tokens-remaining).');
|
||||
$table->dateTime('ratelimit_reset_at')->nullable()->after('ratelimit_remaining')
|
||||
->comment('When the input-tokens bucket will be fully replenished (anthropic-ratelimit-input-tokens-reset, RFC 3339).');
|
||||
});
|
||||
}
|
||||
|
||||
public function down(): void
|
||||
{
|
||||
Schema::table('api_logs', function (Blueprint $table) {
|
||||
$table->dropColumn([
|
||||
'input_tokens',
|
||||
'output_tokens',
|
||||
'cache_read_tokens',
|
||||
'cache_write_tokens',
|
||||
'ratelimit_remaining',
|
||||
'ratelimit_reset_at',
|
||||
]);
|
||||
});
|
||||
}
|
||||
};
|
||||
@@ -119,3 +119,57 @@ it('captures response_body when an HTTP RequestException is thrown', function ()
|
||||
|
||||
expect(ApiLog::first()->response_body)->toBe('upstream details');
|
||||
});
|
||||
|
||||
it('captures Anthropic usage tokens from a successful response', function (): void {
|
||||
Http::fake(['https://api.anthropic.com/v1/messages' => Http::response([
|
||||
'content' => [],
|
||||
'usage' => [
|
||||
'input_tokens' => 1234,
|
||||
'output_tokens' => 56,
|
||||
'cache_creation_input_tokens' => 8000,
|
||||
'cache_read_input_tokens' => 12000,
|
||||
],
|
||||
])]);
|
||||
|
||||
$this->apiLogger->send('anthropic', 'POST', 'https://api.anthropic.com/v1/messages',
|
||||
fn () => Http::post('https://api.anthropic.com/v1/messages'));
|
||||
|
||||
$log = ApiLog::first();
|
||||
expect($log->input_tokens)->toBe(1234)
|
||||
->and($log->output_tokens)->toBe(56)
|
||||
->and($log->cache_write_tokens)->toBe(8000)
|
||||
->and($log->cache_read_tokens)->toBe(12000);
|
||||
});
|
||||
|
||||
it('captures rate-limit headers from any provider response', function (): void {
|
||||
Http::fake(['https://api.anthropic.com/v1/messages' => Http::response(
|
||||
['content' => [], 'usage' => ['input_tokens' => 100, 'output_tokens' => 10]],
|
||||
200,
|
||||
[
|
||||
'anthropic-ratelimit-input-tokens-remaining' => '38000',
|
||||
'anthropic-ratelimit-input-tokens-reset' => '2026-05-14T12:41:00Z',
|
||||
],
|
||||
)]);
|
||||
|
||||
$this->apiLogger->send('anthropic', 'POST', 'https://api.anthropic.com/v1/messages',
|
||||
fn () => Http::post('https://api.anthropic.com/v1/messages'));
|
||||
|
||||
$log = ApiLog::first();
|
||||
expect($log->ratelimit_remaining)->toBe(38000)
|
||||
->and($log->ratelimit_reset_at?->toIso8601String())->toBe('2026-05-14T12:41:00+00:00');
|
||||
});
|
||||
|
||||
it('leaves token columns null for services without usage data', function (): void {
|
||||
Http::fake(['https://example.com/x' => Http::response(['ok' => true])]);
|
||||
|
||||
$this->apiLogger->send('test_service', 'GET', 'https://example.com/x',
|
||||
fn () => Http::get('https://example.com/x'));
|
||||
|
||||
$log = ApiLog::first();
|
||||
expect($log->input_tokens)->toBeNull()
|
||||
->and($log->output_tokens)->toBeNull()
|
||||
->and($log->cache_read_tokens)->toBeNull()
|
||||
->and($log->cache_write_tokens)->toBeNull()
|
||||
->and($log->ratelimit_remaining)->toBeNull()
|
||||
->and($log->ratelimit_reset_at)->toBeNull();
|
||||
});
|
||||
|
||||
@@ -18,32 +18,63 @@ beforeEach(function (): void {
|
||||
Config::set('services.anthropic.api_key', 'test-key');
|
||||
});
|
||||
|
||||
function fakeAnthropicWithOverlay(string $direction, int $confidence, array $events, bool $major = false): void
|
||||
/**
|
||||
* Anthropic-shaped Phase 1 assistant turn that includes a real
|
||||
* web_search_tool_result block (the source of truth for harvested
|
||||
* citations).
|
||||
*
|
||||
* @param array<int, array{url: string, title: string}> $results
|
||||
* @return array<string, mixed>
|
||||
*/
|
||||
function fakeSearchResultsTurn(array $results): array
|
||||
{
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push([
|
||||
'stop_reason' => 'end_turn',
|
||||
'content' => [['type' => 'text', 'text' => 'Search summary.']],
|
||||
])
|
||||
->push([
|
||||
'stop_reason' => 'tool_use',
|
||||
'content' => [[
|
||||
'type' => 'tool_use',
|
||||
'name' => 'submit_overlay',
|
||||
'input' => [
|
||||
'direction' => $direction,
|
||||
'confidence' => $confidence,
|
||||
'reasoning_short' => 'Test reasoning.',
|
||||
'events_cited' => $events,
|
||||
'agrees_with_ridge' => true,
|
||||
'major_impact_event' => $major,
|
||||
],
|
||||
]],
|
||||
]),
|
||||
// URL HEAD verification probes — accept everything by default
|
||||
'*' => Http::response('', 200),
|
||||
]);
|
||||
$content = [['type' => 'text', 'text' => 'Searching...']];
|
||||
foreach ($results as $idx => $r) {
|
||||
$content[] = [
|
||||
'type' => 'server_tool_use',
|
||||
'id' => 'srvtoolu_'.$idx,
|
||||
'name' => 'web_search',
|
||||
'input' => ['query' => 'oil news'],
|
||||
];
|
||||
$content[] = [
|
||||
'type' => 'web_search_tool_result',
|
||||
'tool_use_id' => 'srvtoolu_'.$idx,
|
||||
'content' => [[
|
||||
'type' => 'web_search_result',
|
||||
'url' => $r['url'],
|
||||
'title' => $r['title'],
|
||||
'encrypted_content' => str_repeat('LONG_PAGE_TEXT_', 200),
|
||||
'page_age' => '1 day ago',
|
||||
]],
|
||||
];
|
||||
}
|
||||
|
||||
return ['stop_reason' => 'end_turn', 'content' => $content];
|
||||
}
|
||||
|
||||
/** @param array<int, array<string, mixed>> $events */
|
||||
function fakeSubmitTurn(string $direction, int $confidence, array $events, bool $major = false): array
|
||||
{
|
||||
$input = [
|
||||
'direction' => $direction,
|
||||
'confidence' => $confidence,
|
||||
'reasoning_short' => 'Test reasoning.',
|
||||
'agrees_with_ridge' => true,
|
||||
'major_impact_event' => $major,
|
||||
];
|
||||
if ($events !== []) {
|
||||
$input['events_cited'] = $events;
|
||||
}
|
||||
|
||||
return [
|
||||
'stop_reason' => 'tool_use',
|
||||
'content' => [[
|
||||
'type' => 'tool_use',
|
||||
'id' => 'toolu_submit',
|
||||
'name' => 'submit_overlay',
|
||||
'input' => $input,
|
||||
]],
|
||||
];
|
||||
}
|
||||
|
||||
it('skips when ANTHROPIC_API_KEY is not set', function (): void {
|
||||
@@ -54,8 +85,13 @@ it('skips when ANTHROPIC_API_KEY is not set', function (): void {
|
||||
expect($service->run())->toBeNull();
|
||||
});
|
||||
|
||||
it('rejects the overlay when no events are cited', function (): void {
|
||||
fakeAnthropicWithOverlay('rising', 60, []);
|
||||
it('rejects only when neither web search nor model cited anything', function (): void {
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'no results']]])
|
||||
->push(fakeSubmitTurn('rising', 60, [])),
|
||||
'*' => Http::response('', 200),
|
||||
]);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
|
||||
@@ -66,30 +102,13 @@ it('rejects the overlay when no events are cited', function (): void {
|
||||
it('verifies a URL via GET fallback when HEAD returns 405', function (): void {
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push([
|
||||
'stop_reason' => 'end_turn',
|
||||
'content' => [['type' => 'text', 'text' => 'ok']],
|
||||
])
|
||||
->push([
|
||||
'stop_reason' => 'tool_use',
|
||||
'content' => [[
|
||||
'type' => 'tool_use',
|
||||
'name' => 'submit_overlay',
|
||||
'input' => [
|
||||
'direction' => 'rising',
|
||||
'confidence' => 60,
|
||||
'reasoning_short' => 'Hostile-to-HEAD source.',
|
||||
'events_cited' => [
|
||||
['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/x', 'impact' => 'rising'],
|
||||
],
|
||||
'agrees_with_ridge' => true,
|
||||
'major_impact_event' => false,
|
||||
],
|
||||
]],
|
||||
]),
|
||||
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
|
||||
->push(fakeSubmitTurn('rising', 60, [
|
||||
['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/x', 'impact' => 'rising'],
|
||||
])),
|
||||
'reuters.com/*' => Http::sequence()
|
||||
->push('', 405) // HEAD → 405 Method Not Allowed
|
||||
->push('partial-body', 200), // GET fallback succeeds
|
||||
->push('', 405)
|
||||
->push('partial-body', 200),
|
||||
]);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
@@ -99,65 +118,13 @@ it('verifies a URL via GET fallback when HEAD returns 405', function (): void {
|
||||
->and($row->events_json)->toHaveCount(1);
|
||||
});
|
||||
|
||||
it('rejects the overlay when both HEAD and GET fail', function (): void {
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push([
|
||||
'stop_reason' => 'end_turn',
|
||||
'content' => [['type' => 'text', 'text' => 'ok']],
|
||||
])
|
||||
->push([
|
||||
'stop_reason' => 'tool_use',
|
||||
'content' => [[
|
||||
'type' => 'tool_use',
|
||||
'name' => 'submit_overlay',
|
||||
'input' => [
|
||||
'direction' => 'rising',
|
||||
'confidence' => 60,
|
||||
'reasoning_short' => 'Truly dead URL.',
|
||||
'events_cited' => [
|
||||
['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'],
|
||||
],
|
||||
'agrees_with_ridge' => true,
|
||||
'major_impact_event' => false,
|
||||
],
|
||||
]],
|
||||
]),
|
||||
'example.com/*' => Http::sequence()
|
||||
->push('', 404) // HEAD → 404
|
||||
->push('', 404), // GET → still 404
|
||||
]);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
|
||||
expect($service->run())->toBeNull()
|
||||
->and(LlmOverlay::query()->count())->toBe(0);
|
||||
});
|
||||
|
||||
it('rejects the overlay when every cited URL is unreachable', function (): void {
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push([
|
||||
'stop_reason' => 'end_turn',
|
||||
'content' => [['type' => 'text', 'text' => 'ok']],
|
||||
])
|
||||
->push([
|
||||
'stop_reason' => 'tool_use',
|
||||
'content' => [[
|
||||
'type' => 'tool_use',
|
||||
'name' => 'submit_overlay',
|
||||
'input' => [
|
||||
'direction' => 'rising',
|
||||
'confidence' => 60,
|
||||
'reasoning_short' => 'Test.',
|
||||
'events_cited' => [
|
||||
['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'],
|
||||
],
|
||||
'agrees_with_ridge' => true,
|
||||
'major_impact_event' => false,
|
||||
],
|
||||
]],
|
||||
]),
|
||||
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
|
||||
->push(fakeSubmitTurn('rising', 60, [
|
||||
['headline' => 'X', 'source' => 'Reuters', 'url' => 'https://example.com/dead', 'impact' => 'rising'],
|
||||
])),
|
||||
'example.com/*' => Http::response('', 404),
|
||||
]);
|
||||
|
||||
@@ -168,14 +135,14 @@ it('rejects the overlay when every cited URL is unreachable', function (): void
|
||||
});
|
||||
|
||||
it('persists an overlay row with verified citations and capped confidence', function (): void {
|
||||
fakeAnthropicWithOverlay(
|
||||
direction: 'rising',
|
||||
confidence: 95, // above cap → expect capped to 75
|
||||
events: [
|
||||
['headline' => 'OPEC cuts output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
|
||||
],
|
||||
major: true,
|
||||
);
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
|
||||
->push(fakeSubmitTurn('rising', 95, [
|
||||
['headline' => 'OPEC cuts output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
|
||||
], major: true)),
|
||||
'*' => Http::response('', 200),
|
||||
]);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
|
||||
@@ -183,51 +150,20 @@ it('persists an overlay row with verified citations and capped confidence', func
|
||||
|
||||
expect($row)->not->toBeNull()
|
||||
->and($row->direction)->toBe('rising')
|
||||
->and($row->confidence)->toBe(75) // capped
|
||||
->and($row->confidence)->toBe(75)
|
||||
->and($row->major_impact_event)->toBeTrue()
|
||||
->and($row->search_used)->toBeTrue()
|
||||
->and($row->events_json)->toHaveCount(1);
|
||||
});
|
||||
|
||||
it('retries the submit when the model omits events_cited', function (): void {
|
||||
it('harvests citations from web_search_tool_result when the model omits events_cited', function (): void {
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push([
|
||||
'stop_reason' => 'end_turn',
|
||||
'content' => [['type' => 'text', 'text' => 'Search done.']],
|
||||
])
|
||||
->push([
|
||||
'stop_reason' => 'tool_use',
|
||||
'content' => [[
|
||||
'type' => 'tool_use',
|
||||
'id' => 'toolu_first',
|
||||
'name' => 'submit_overlay',
|
||||
'input' => [
|
||||
'direction' => 'rising',
|
||||
'confidence' => 70,
|
||||
'reasoning_short' => 'Forgot citations.',
|
||||
// events_cited omitted entirely — the bug we are guarding against
|
||||
],
|
||||
]],
|
||||
])
|
||||
->push([
|
||||
'stop_reason' => 'tool_use',
|
||||
'content' => [[
|
||||
'type' => 'tool_use',
|
||||
'id' => 'toolu_retry',
|
||||
'name' => 'submit_overlay',
|
||||
'input' => [
|
||||
'direction' => 'rising',
|
||||
'confidence' => 70,
|
||||
'reasoning_short' => 'With citations now.',
|
||||
'events_cited' => [
|
||||
['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
|
||||
],
|
||||
'agrees_with_ridge' => true,
|
||||
'major_impact_event' => false,
|
||||
],
|
||||
]],
|
||||
]),
|
||||
->push(fakeSearchResultsTurn([
|
||||
['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'],
|
||||
['url' => 'https://bloomberg.com/iran', 'title' => 'Iran tensions'],
|
||||
]))
|
||||
->push(fakeSubmitTurn('rising', 70, [])),
|
||||
'*' => Http::response('', 200),
|
||||
]);
|
||||
|
||||
@@ -236,42 +172,79 @@ it('retries the submit when the model omits events_cited', function (): void {
|
||||
$row = $service->run();
|
||||
|
||||
expect($row)->not->toBeNull()
|
||||
->and($row->events_json)->toHaveCount(1)
|
||||
->and(LlmOverlay::query()->count())->toBe(1);
|
||||
->and($row->events_json)->toHaveCount(2)
|
||||
->and(collect($row->events_json)->pluck('url')->all())
|
||||
->toEqualCanonicalizing(['https://reuters.com/opec', 'https://bloomberg.com/iran'])
|
||||
->and(collect($row->events_json)->pluck('impact')->unique()->all())
|
||||
->toBe(['neutral']);
|
||||
});
|
||||
|
||||
it('rejects when the retry also omits events_cited', function (): void {
|
||||
it('merges model events_cited with harvested URLs deduped by URL', function (): void {
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push([
|
||||
'stop_reason' => 'end_turn',
|
||||
'content' => [['type' => 'text', 'text' => 'Search done.']],
|
||||
])
|
||||
->push([
|
||||
'stop_reason' => 'tool_use',
|
||||
'content' => [[
|
||||
'type' => 'tool_use',
|
||||
'id' => 'toolu_first',
|
||||
'name' => 'submit_overlay',
|
||||
'input' => ['direction' => 'rising', 'confidence' => 70, 'reasoning_short' => 'No cites.'],
|
||||
]],
|
||||
])
|
||||
->push([
|
||||
'stop_reason' => 'tool_use',
|
||||
'content' => [[
|
||||
'type' => 'tool_use',
|
||||
'id' => 'toolu_retry',
|
||||
'name' => 'submit_overlay',
|
||||
'input' => ['direction' => 'rising', 'confidence' => 70, 'reasoning_short' => 'Still none.'],
|
||||
]],
|
||||
]),
|
||||
->push(fakeSearchResultsTurn([
|
||||
['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'],
|
||||
['url' => 'https://bloomberg.com/iran', 'title' => 'Iran tensions'],
|
||||
]))
|
||||
->push(fakeSubmitTurn('rising', 70, [
|
||||
['headline' => 'OPEC slashes output', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
|
||||
['headline' => 'Refinery fire', 'source' => 'CNBC', 'url' => 'https://cnbc.com/refinery', 'impact' => 'rising'],
|
||||
])),
|
||||
'*' => Http::response('', 200),
|
||||
]);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
|
||||
expect($service->run())->toBeNull()
|
||||
->and(LlmOverlay::query()->count())->toBe(0);
|
||||
$row = $service->run();
|
||||
|
||||
expect($row)->not->toBeNull()
|
||||
->and(collect($row->events_json)->pluck('url')->all())
|
||||
->toEqualCanonicalizing([
|
||||
'https://reuters.com/opec',
|
||||
'https://bloomberg.com/iran',
|
||||
'https://cnbc.com/refinery',
|
||||
]);
|
||||
|
||||
$opec = collect($row->events_json)->firstWhere('url', 'https://reuters.com/opec');
|
||||
expect($opec['impact'])->toBe('rising')
|
||||
->and($opec['headline'])->toBe('OPEC slashes output');
|
||||
|
||||
$bloomberg = collect($row->events_json)->firstWhere('url', 'https://bloomberg.com/iran');
|
||||
expect($bloomberg['impact'])->toBe('neutral');
|
||||
});
|
||||
|
||||
it('does not resend Phase 1 web_search_tool_result blocks on the submit call', function (): void {
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push(fakeSearchResultsTurn([
|
||||
['url' => 'https://reuters.com/opec', 'title' => 'OPEC cuts output'],
|
||||
]))
|
||||
->push(fakeSubmitTurn('rising', 70, [
|
||||
['headline' => 'OPEC', 'source' => 'Reuters', 'url' => 'https://reuters.com/opec', 'impact' => 'rising'],
|
||||
])),
|
||||
'*' => Http::response('', 200),
|
||||
]);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
|
||||
$service->run();
|
||||
|
||||
$anthropicRequests = collect(Http::recorded())
|
||||
->filter(fn (array $pair): bool => str_contains($pair[0]->url(), 'api.anthropic.com'))
|
||||
->values();
|
||||
|
||||
expect($anthropicRequests)->toHaveCount(2);
|
||||
|
||||
$submitBody = $anthropicRequests[1][0]->data();
|
||||
$messagesJson = json_encode($submitBody['messages'], JSON_UNESCAPED_SLASHES);
|
||||
|
||||
expect($submitBody['messages'])->toHaveCount(1)
|
||||
->and($submitBody['messages'][0]['role'])->toBe('user');
|
||||
|
||||
expect($messagesJson)->not->toContain('web_search_tool_result')
|
||||
->and($messagesJson)->not->toContain('LONG_PAGE_TEXT_')
|
||||
->and($messagesJson)->not->toContain('server_tool_use')
|
||||
->and($messagesJson)->toContain('https://reuters.com/opec');
|
||||
});
|
||||
|
||||
it('honors the 4-hour cooldown for event-driven calls', function (): void {
|
||||
@@ -291,14 +264,19 @@ it('honors the 4-hour cooldown for event-driven calls', function (): void {
|
||||
'updated_at' => now(),
|
||||
]);
|
||||
|
||||
fakeAnthropicWithOverlay('falling', 40, [
|
||||
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
|
||||
->push(fakeSubmitTurn('falling', 40, [
|
||||
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
|
||||
])),
|
||||
'*' => Http::response('', 200),
|
||||
]);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
|
||||
expect($service->run(eventDriven: true))->toBeNull() // <4h since prior
|
||||
->and(LlmOverlay::query()->count())->toBe(1); // no new row inserted
|
||||
expect($service->run(eventDriven: true))->toBeNull()
|
||||
->and(LlmOverlay::query()->count())->toBe(1);
|
||||
|
||||
Carbon::setTestNow();
|
||||
});
|
||||
@@ -320,8 +298,13 @@ it('always runs (ignores cooldown) when not event-driven', function (): void {
|
||||
'updated_at' => now(),
|
||||
]);
|
||||
|
||||
fakeAnthropicWithOverlay('falling', 40, [
|
||||
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
|
||||
Http::fake([
|
||||
'*api.anthropic.com/*' => Http::sequence()
|
||||
->push(['stop_reason' => 'end_turn', 'content' => [['type' => 'text', 'text' => 'ok']]])
|
||||
->push(fakeSubmitTurn('falling', 40, [
|
||||
['headline' => 'A', 'source' => 'X', 'url' => 'https://reuters.com/a', 'impact' => 'falling'],
|
||||
])),
|
||||
'*' => Http::response('', 200),
|
||||
]);
|
||||
|
||||
$service = new LlmOverlayService(new ApiLogger, app(WeeklyForecastService::class));
|
||||
|
||||
Reference in New Issue
Block a user