The daily forecast:llm-overlay command was being skipped because the previous single-conversation flow consumed more than Tier-1's 50,000 input-tokens-per- minute Anthropic bucket. The web_search tool auto-caches its results (~55k tokens) and requires `encrypted_content` intact when those blocks are resent, so the prior retry-on-missing-citations path either 429'd or 400'd on the second call. LlmOverlayService now runs two independent API calls. Phase 1 invokes the web_search tool and we discard the transcript after harvesting the URLs + titles from the returned web_search_tool_result blocks. Phase 2 is a fresh conversation containing the forecast context and the harvested headlines as plain text, with a forced submit_overlay tool call. events_cited is now optional in the tool schema — Haiku's flaky compliance no longer matters because citations come from the search results, not the model's transcription. Model-tagged events (with directional impact) merge with harvested-only entries (impact: 'neutral'), deduped by URL. Between phases the service reads anthropic-ratelimit-input-tokens-remaining / …-reset from Phase 1's headers and sleeps proportionally — only long enough for the SUBMIT_TOKEN_BUDGET worth of refill, not for the full bucket reset, capped at 65 seconds. ApiLogger now captures usage.input_tokens, usage.output_tokens, cache_read_input_tokens, cache_creation_input_tokens, plus the rate-limit remaining/reset headers on every Anthropic response. New nullable columns on api_logs make rate-limit diagnostics directly queryable. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
118 lines
3.8 KiB
PHP
118 lines
3.8 KiB
PHP
<?php
|
|
|
|
namespace App\Services;
|
|
|
|
use App\Models\ApiLog;
|
|
use Illuminate\Http\Client\RequestException;
|
|
use Illuminate\Http\Client\Response;
|
|
use Illuminate\Support\Str;
|
|
use Throwable;
|
|
|
|
class ApiLogger
|
|
{
|
|
/**
|
|
* Cap the stored response body. MEDIUMTEXT can hold ~16MB, but
|
|
* persisting more than 64KB is rarely useful for debugging and
|
|
* blows up the row size on busy services.
|
|
*/
|
|
private const int RESPONSE_BODY_CAP = 65_536;
|
|
|
|
/**
|
|
* Execute an HTTP request and log it to api_logs.
|
|
*
|
|
* The callable must return an Illuminate\Http\Client\Response.
|
|
* Exceptions are logged and re-thrown so the caller handles them.
|
|
*
|
|
* Persists the response body to `api_logs.response_body` ONLY when
|
|
* the call failed (non-2xx) or threw. Truncates to RESPONSE_BODY_CAP.
|
|
*
|
|
* @param callable(): Response $request
|
|
*/
|
|
public function send(string $service, string $method, string $url, callable $request): Response
|
|
{
|
|
$start = microtime(true);
|
|
$statusCode = null;
|
|
$error = null;
|
|
$responseBody = null;
|
|
$usage = [];
|
|
|
|
try {
|
|
$response = $request();
|
|
$statusCode = $response->status();
|
|
$usage = $this->extractUsage($response);
|
|
|
|
if ($response->failed()) {
|
|
$body = $response->body();
|
|
$error = Str::limit($body, 1000);
|
|
$responseBody = $this->truncate($body);
|
|
}
|
|
|
|
return $response;
|
|
} catch (Throwable $e) {
|
|
$error = $e->getMessage();
|
|
|
|
// RequestException carries the response, ConnectionException
|
|
// doesn't. Pull the body when it's available.
|
|
if ($e instanceof RequestException) {
|
|
$responseBody = $this->truncate($e->response->body());
|
|
$usage = $this->extractUsage($e->response);
|
|
}
|
|
|
|
throw $e;
|
|
} finally {
|
|
ApiLog::create([
|
|
'service' => $service,
|
|
'method' => strtoupper($method),
|
|
'url' => $url,
|
|
'status_code' => $statusCode,
|
|
'duration_ms' => (int) round((microtime(true) - $start) * 1000),
|
|
'error' => $error,
|
|
'response_body' => $responseBody,
|
|
...$usage,
|
|
]);
|
|
}
|
|
}
|
|
|
|
private function truncate(string $body): string
|
|
{
|
|
return strlen($body) > self::RESPONSE_BODY_CAP
|
|
? substr($body, 0, self::RESPONSE_BODY_CAP)
|
|
: $body;
|
|
}
|
|
|
|
/**
|
|
* Pull token-usage and rate-limit telemetry from a provider response.
|
|
*
|
|
* Today only Anthropic exposes both. Other providers return mostly
|
|
* NULLs — callers don't need to know which is which.
|
|
*
|
|
* @return array<string, int|string|null>
|
|
*/
|
|
private function extractUsage(?Response $response): array
|
|
{
|
|
if ($response === null) {
|
|
return [];
|
|
}
|
|
|
|
$usage = $response->json('usage');
|
|
$tokens = is_array($usage) ? $usage : [];
|
|
|
|
$reset = $response->header('anthropic-ratelimit-input-tokens-reset');
|
|
$remaining = $response->header('anthropic-ratelimit-input-tokens-remaining');
|
|
|
|
return [
|
|
'input_tokens' => $this->intOrNull($tokens['input_tokens'] ?? null),
|
|
'output_tokens' => $this->intOrNull($tokens['output_tokens'] ?? null),
|
|
'cache_read_tokens' => $this->intOrNull($tokens['cache_read_input_tokens'] ?? null),
|
|
'cache_write_tokens' => $this->intOrNull($tokens['cache_creation_input_tokens'] ?? null),
|
|
'ratelimit_remaining' => $this->intOrNull($remaining !== '' ? $remaining : null),
|
|
'ratelimit_reset_at' => $reset !== '' ? $reset : null,
|
|
];
|
|
}
|
|
|
|
private function intOrNull(mixed $value): ?int
|
|
{
|
|
return is_numeric($value) ? (int) $value : null;
|
|
}
|
|
}
|