From 9705510e1944184798506e80114e46a00a5651eb Mon Sep 17 00:00:00 2001 From: Jean-Christophe Hoelt Date: Tue, 26 May 2026 15:45:30 +0300 Subject: [PATCH] feat(session): add model fallback chain with cooldown and error classification --- packages/core/src/session-message.ts | 3 + .../llm/src/protocols/anthropic-messages.ts | 101 +-- .../llm/src/protocols/openai-responses.ts | 373 +--------- packages/llm/src/protocols/shared.ts | 12 +- packages/llm/test/continuation-scenarios.ts | 104 --- ...ponses-gpt-5-5-reasoning-continuation.json | 58 -- .../test/provider/anthropic-messages.test.ts | 212 +----- .../llm/test/provider/golden.recorded.test.ts | 1 - .../test/provider/openai-responses.test.ts | 654 +----------------- packages/llm/test/recorded-golden.ts | 12 +- packages/llm/test/recorded-scenarios.ts | 440 +++++------- packages/opencode/src/agent/agent.ts | 9 + packages/opencode/src/cli/cmd/tui/app.tsx | 14 + .../tui/feature-plugins/system/session-v2.tsx | 10 +- .../src/cli/cmd/tui/routes/session/index.tsx | 16 +- packages/opencode/src/config/agent.ts | 4 + packages/opencode/src/config/config.ts | 6 + packages/opencode/src/session/fallback.ts | 388 +++++++++++ packages/opencode/src/session/llm-call.ts | 325 +++++++++ packages/opencode/src/session/llm.ts | 322 ++------- .../src/session/llm/native-request.ts | 13 +- packages/opencode/src/session/llm/request.ts | 8 +- packages/opencode/src/session/message-v2.ts | 1 + packages/opencode/src/session/processor.ts | 24 +- packages/opencode/src/session/prompt.ts | 18 +- packages/opencode/src/session/retry.ts | 11 +- .../opencode/test/session/fallback.test.ts | 200 ++++++ .../test/session/llm-native-recorded.test.ts | 2 + .../opencode/test/session/llm-native.test.ts | 36 - packages/opencode/test/session/llm.test.ts | 3 + .../test/session/processor-effect.test.ts | 107 +++ packages/sdk/js/src/v2/gen/types.gen.ts | 316 ++++----- 32 files changed, 1561 insertions(+), 2242 deletions(-) delete mode 100644 packages/llm/test/continuation-scenarios.ts delete mode 100644 packages/llm/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning-continuation.json create mode 100644 packages/opencode/src/session/fallback.ts create mode 100644 packages/opencode/src/session/llm-call.ts create mode 100644 packages/opencode/test/session/fallback.test.ts diff --git a/packages/core/src/session-message.ts b/packages/core/src/session-message.ts index 73b6dd7da2b9..6fe57543d975 100644 --- a/packages/core/src/session-message.ts +++ b/packages/core/src/session-message.ts @@ -113,6 +113,9 @@ export class AssistantTool extends Schema.Class("Session.Message. export class AssistantText extends Schema.Class("Session.Message.Assistant.Text")({ type: Schema.Literal("text"), text: Schema.String, + ignored: Schema.optional(Schema.Boolean), + synthetic: Schema.optional(Schema.Boolean), + fallbackNotice: Schema.optional(Schema.Union([Schema.Literal("using"), Schema.Literal("switch"), Schema.Literal("resume")])), }) {} export class AssistantReasoning extends Schema.Class("Session.Message.Assistant.Reasoning")({ diff --git a/packages/llm/src/protocols/anthropic-messages.ts b/packages/llm/src/protocols/anthropic-messages.ts index 234ccd5baf00..53c6886e5d9e 100644 --- a/packages/llm/src/protocols/anthropic-messages.ts +++ b/packages/llm/src/protocols/anthropic-messages.ts @@ -10,11 +10,9 @@ import { type CacheHint, type FinishReason, type LLMRequest, - type MediaPart, type ProviderMetadata, type ToolCallPart, type ToolDefinition, - type ToolResultContentPart, type ToolResultPart, } from "../schema" import { JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared" @@ -41,17 +39,6 @@ const AnthropicTextBlock = Schema.Struct({ }) type AnthropicTextBlock = Schema.Schema.Type -const AnthropicImageBlock = Schema.Struct({ - type: Schema.tag("image"), - source: Schema.Struct({ - type: Schema.tag("base64"), - media_type: Schema.String, - data: Schema.String, - }), - cache_control: Schema.optional(AnthropicCacheControl), -}) -type AnthropicImageBlock = Schema.Schema.Type - const AnthropicThinkingBlock = Schema.Struct({ type: Schema.tag("thinking"), thinking: Schema.String, @@ -97,24 +84,15 @@ const AnthropicServerToolResultBlock = Schema.Struct({ }) type AnthropicServerToolResultBlock = Schema.Schema.Type -// Anthropic accepts either a plain string or an ordered array of text/image -// blocks inside `tool_result.content`. The array form is required when a tool -// returns image bytes (screenshot, image search, etc.) so they can be passed -// to the model as proper image inputs instead of being JSON-stringified into -// the prompt — which silently inflates context by megabytes and can push the -// conversation over the model's token limit. -const AnthropicToolResultContent = Schema.Union([AnthropicTextBlock, AnthropicImageBlock]) - const AnthropicToolResultBlock = Schema.Struct({ type: Schema.tag("tool_result"), tool_use_id: Schema.String, - content: Schema.Union([Schema.String, Schema.Array(AnthropicToolResultContent)]), + content: Schema.String, is_error: Schema.optional(Schema.Boolean), cache_control: Schema.optional(AnthropicCacheControl), }) -const AnthropicUserBlock = Schema.Union([AnthropicTextBlock, AnthropicImageBlock, AnthropicToolResultBlock]) -type AnthropicUserBlock = Schema.Schema.Type +const AnthropicUserBlock = Schema.Union([AnthropicTextBlock, AnthropicToolResultBlock]) const AnthropicAssistantBlock = Schema.Union([ AnthropicTextBlock, AnthropicThinkingBlock, @@ -206,13 +184,7 @@ const AnthropicEvent = Schema.Struct({ content_block: Schema.optional(AnthropicStreamBlock), delta: Schema.optional(AnthropicStreamDelta), usage: Schema.optional(AnthropicUsage), - // `type` and `message` are both required per Anthropic's spec, but - // OpenAI-compatible proxies and gateway translations occasionally drop one - // or the other; mark them optional so a partial payload still parses and - // the parser can fall back to whichever field is populated. - error: Schema.optional( - Schema.Struct({ type: Schema.optional(Schema.String), message: Schema.optional(Schema.String) }), - ), + error: Schema.optional(Schema.Struct({ type: Schema.String, message: Schema.String })), }) type AnthropicEvent = Schema.Schema.Type @@ -300,46 +272,6 @@ const lowerServerToolResult = Effect.fn("AnthropicMessages.lowerServerToolResult return { type: wireType, tool_use_id: part.id, content: part.result.value } satisfies AnthropicServerToolResultBlock }) -const lowerImage = Effect.fn("AnthropicMessages.lowerImage")(function* (part: MediaPart) { - if (!part.mediaType.startsWith("image/")) - return yield* invalid(`Anthropic Messages user media content only supports images`) - return { - type: "image" as const, - source: { - type: "base64" as const, - media_type: part.mediaType, - data: ProviderShared.mediaBase64(part), - }, - } satisfies AnthropicImageBlock -}) - -// Tool results may carry structured text/images. Keep media as provider-native -// content instead of JSON-stringifying base64 into a prompt string. -const lowerToolResultContentItem = Effect.fn("AnthropicMessages.lowerToolResultContentItem")(function* ( - item: ToolResultContentPart, -) { - if (item.type === "text") return { type: "text" as const, text: item.text } satisfies AnthropicTextBlock - if (item.mediaType.startsWith("image/")) - return { - type: "image" as const, - source: { - type: "base64" as const, - media_type: item.mediaType, - data: ProviderShared.mediaBase64(item), - }, - } satisfies AnthropicImageBlock - return yield* invalid(`Anthropic Messages tool-result media content only supports images, got ${item.mediaType}`) -}) - -const lowerToolResultContent = Effect.fn("AnthropicMessages.lowerToolResultContent")(function* (part: ToolResultPart) { - // Text / json / error results stay as a string for backward compatibility - // with existing cassettes and provider expectations. - if (part.result.type !== "content") return ProviderShared.toolResultText(part) - // Preserve the narrowed array element type when compiled through a consumer package. - const content: ReadonlyArray = part.result.value - return yield* Effect.forEach(content, lowerToolResultContentItem) -}) - const lowerMessages = Effect.fn("AnthropicMessages.lowerMessages")(function* ( request: LLMRequest, breakpoints: Cache.Breakpoints, @@ -348,17 +280,11 @@ const lowerMessages = Effect.fn("AnthropicMessages.lowerMessages")(function* ( for (const message of request.messages) { if (message.role === "user") { - const content: AnthropicUserBlock[] = [] + const content: AnthropicTextBlock[] = [] for (const part of message.content) { - if (part.type === "text") { - content.push({ type: "text", text: part.text, cache_control: cacheControl(breakpoints, part.cache) }) - continue - } - if (part.type === "media") { - content.push(yield* lowerImage(part)) - continue - } - return yield* ProviderShared.unsupportedContent("Anthropic Messages", "user", ["text", "media"]) + if (!ProviderShared.supportsContent(part, ["text"])) + return yield* ProviderShared.unsupportedContent("Anthropic Messages", "user", ["text"]) + content.push({ type: "text", text: part.text, cache_control: cacheControl(breakpoints, part.cache) }) } messages.push({ role: "user", content }) continue @@ -402,7 +328,7 @@ const lowerMessages = Effect.fn("AnthropicMessages.lowerMessages")(function* ( content.push({ type: "tool_result", tool_use_id: part.id, - content: yield* lowerToolResultContent(part), + content: ProviderShared.toolResultText(part), is_error: part.result.type === "error" ? true : undefined, cache_control: cacheControl(breakpoints, part.cache), }) @@ -709,18 +635,9 @@ const onMessageDelta = (state: ParserState, event: AnthropicEvent): StepResult = return [{ ...state, lifecycle, usage }, events] } -// Prefix `error.type` so overloads, rate limits, and quota errors are visible -// even when the provider message is generic or empty. -const providerErrorMessage = (event: AnthropicEvent): string => { - const type = event.error?.type - const message = event.error?.message - if (type && message) return `${type}: ${message}` - return message || type || "Anthropic Messages stream error" -} - const onError = (state: ParserState, event: AnthropicEvent): StepResult => [ state, - [LLMEvent.providerError({ message: providerErrorMessage(event) })], + [LLMEvent.providerError({ message: event.error?.message ?? "Anthropic Messages stream error" })], ] const step = (state: ParserState, event: AnthropicEvent) => { diff --git a/packages/llm/src/protocols/openai-responses.ts b/packages/llm/src/protocols/openai-responses.ts index ad673a263f4b..7d536e16ce17 100644 --- a/packages/llm/src/protocols/openai-responses.ts +++ b/packages/llm/src/protocols/openai-responses.ts @@ -6,16 +6,14 @@ import { HttpTransport, WebSocketTransport } from "../route/transport" import { Protocol } from "../route/protocol" import { LLMEvent, + type MediaPart, Usage, type FinishReason, type LLMRequest, type ProviderMetadata, - type ReasoningPart, type TextPart, type ToolCallPart, type ToolDefinition, - type ToolResultContentPart, - type ToolResultPart, } from "../schema" import { JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared" import { OpenAIOptions } from "./utils/openai-options" @@ -45,39 +43,10 @@ const OpenAIResponsesOutputText = Schema.Struct({ text: Schema.String, }) -const OpenAIResponsesReasoningSummaryText = Schema.Struct({ - type: Schema.tag("summary_text"), - text: Schema.String, -}) - -const OpenAIResponsesReasoningItem = Schema.Struct({ - type: Schema.tag("reasoning"), - id: Schema.String, - summary: Schema.Array(OpenAIResponsesReasoningSummaryText), - encrypted_content: optionalNull(Schema.String), -}) - -const OpenAIResponsesItemReference = Schema.Struct({ - type: Schema.tag("item_reference"), - id: Schema.String, -}) - -// `function_call_output.output` accepts either a plain string or an ordered -// array of content items so tools can return images in addition to text. -// https://platform.openai.com/docs/api-reference/responses/object -const OpenAIResponsesFunctionCallOutputContent = Schema.Union([OpenAIResponsesInputText, OpenAIResponsesInputImage]) - -const OpenAIResponsesFunctionCallOutput = Schema.Union([ - Schema.String, - Schema.Array(OpenAIResponsesFunctionCallOutputContent), -]) - const OpenAIResponsesInputItem = Schema.Union([ Schema.Struct({ role: Schema.tag("system"), content: Schema.String }), Schema.Struct({ role: Schema.tag("user"), content: Schema.Array(OpenAIResponsesInputContent) }), Schema.Struct({ role: Schema.tag("assistant"), content: Schema.Array(OpenAIResponsesOutputText) }), - OpenAIResponsesReasoningItem, - OpenAIResponsesItemReference, Schema.Struct({ type: Schema.tag("function_call"), call_id: Schema.String, @@ -87,20 +56,11 @@ const OpenAIResponsesInputItem = Schema.Union([ Schema.Struct({ type: Schema.tag("function_call_output"), call_id: Schema.String, - output: OpenAIResponsesFunctionCallOutput, + output: Schema.String, }), ]) type OpenAIResponsesInputItem = Schema.Schema.Type -// Mutable counterpart of the schema reasoning item so `lowerMessages` can fold -// multiple streamed summary parts into the same item before flushing. -type OpenAIResponsesReasoningInput = { - type: "reasoning" - id: string - summary: Array<{ type: "summary_text"; text: string }> - encrypted_content?: string | null -} - const OpenAIResponsesTool = Schema.Struct({ type: Schema.tag("function"), name: Schema.String, @@ -189,26 +149,13 @@ const OpenAIResponsesStreamItem = Schema.Struct({ server_label: Schema.optional(Schema.String), output: Schema.optional(Schema.Unknown), error: Schema.optional(Schema.Unknown), - encrypted_content: optionalNull(Schema.String), }) type OpenAIResponsesStreamItem = Schema.Schema.Type -// OpenAI Responses surfaces provider failures in two related shapes. The -// streaming `error` event carries the details at the top level -// (`{ type: "error", code, message, param, sequence_number }`), while -// `response.failed` carries them under `response.error`. We capture both so -// the parser can surface a useful provider-error message in either path. -const OpenAIResponsesErrorPayload = Schema.Struct({ - code: optionalNull(Schema.String), - message: optionalNull(Schema.String), - param: optionalNull(Schema.String), -}) - const OpenAIResponsesEvent = Schema.Struct({ type: Schema.String, delta: Schema.optional(Schema.String), item_id: Schema.optional(Schema.String), - summary_index: Schema.optional(Schema.Number), item: Schema.optional(OpenAIResponsesStreamItem), response: Schema.optional( Schema.StructWithRest( @@ -217,14 +164,12 @@ const OpenAIResponsesEvent = Schema.Struct({ service_tier: optionalNull(Schema.String), incomplete_details: optionalNull(Schema.Struct({ reason: Schema.String })), usage: optionalNull(OpenAIResponsesUsage), - error: optionalNull(OpenAIResponsesErrorPayload), }), [Schema.Record(Schema.String, Schema.Unknown)], ), ), code: Schema.optional(Schema.String), message: Schema.optional(Schema.String), - param: Schema.optional(Schema.String), }) type OpenAIResponsesEvent = Schema.Schema.Type @@ -232,18 +177,6 @@ interface ParserState { readonly tools: ToolStream.State readonly hasFunctionCall: boolean readonly lifecycle: Lifecycle.State - readonly reasoningItems: Readonly> - readonly store: boolean | undefined -} - -type ReasoningSummaryStatus = "active" | "can-conclude" | "concluded" - -interface ReasoningStreamItem { - readonly encryptedContent: string | null | undefined - // Keyed by OpenAI's numeric `summary_index`. JS object keys coerce to - // strings, but typing the map as `Record` documents intent - // and matches the wire field. - readonly summaryParts: Readonly> } const invalid = ProviderShared.invalidRequest @@ -273,61 +206,26 @@ const lowerToolCall = (part: ToolCallPart): OpenAIResponsesInputItem => ({ arguments: ProviderShared.encodeJson(part.input), }) -const lowerReasoning = (part: ReasoningPart): OpenAIResponsesReasoningInput | undefined => { - const openai = part.providerMetadata?.openai - if (!ProviderShared.isRecord(openai) || typeof openai.itemId !== "string" || openai.itemId.length === 0) - return undefined - const encryptedContent = - typeof openai.reasoningEncryptedContent === "string" - ? openai.reasoningEncryptedContent - : openai.reasoningEncryptedContent === null - ? null - : undefined - return { - type: "reasoning", - id: openai.itemId, - summary: part.text.length > 0 ? [{ type: "summary_text", text: part.text }] : [], - encrypted_content: encryptedContent, - } -} +const imageUrl = (part: MediaPart) => + typeof part.data === "string" && part.data.startsWith("data:") + ? part.data + : `data:${part.mediaType};base64,${ProviderShared.mediaBytes(part)}` const lowerUserContent = Effect.fn("OpenAIResponses.lowerUserContent")(function* ( part: LLMRequest["messages"][number]["content"][number], ) { if (part.type === "text") return { type: "input_text" as const, text: part.text } if (part.type === "media" && part.mediaType.startsWith("image/")) { - return { type: "input_image" as const, image_url: ProviderShared.mediaDataUrl(part) } + return { type: "input_image" as const, image_url: imageUrl(part) } } if (part.type === "media") return yield* invalid("OpenAI Responses user media content only supports images") return yield* ProviderShared.unsupportedContent("OpenAI Responses", "user", ["text", "media"]) }) -// Tool results may carry structured text/images. Keep media as provider-native -// content instead of JSON-stringifying base64 into a prompt string. -const lowerToolResultContentItem = Effect.fn("OpenAIResponses.lowerToolResultContentItem")(function* ( - item: ToolResultContentPart, -) { - if (item.type === "text") return { type: "input_text" as const, text: item.text } - if (item.mediaType.startsWith("image/")) - return { - type: "input_image" as const, - image_url: ProviderShared.mediaDataUrl(item), - } - return yield* invalid(`OpenAI Responses tool-result media content only supports images, got ${item.mediaType}`) -}) - -const lowerToolResultOutput = Effect.fn("OpenAIResponses.lowerToolResultOutput")(function* (part: ToolResultPart) { - // Text/json/error results are encoded as a plain string for backward - // compatibility with existing cassettes and provider expectations. - if (part.result.type !== "content") return ProviderShared.toolResultText(part) - return yield* Effect.forEach(part.result.value, lowerToolResultContentItem) -}) - const lowerMessages = Effect.fn("OpenAIResponses.lowerMessages")(function* (request: LLMRequest) { const system: OpenAIResponsesInputItem[] = request.system.length === 0 ? [] : [{ role: "system", content: ProviderShared.joinText(request.system) }] const input: OpenAIResponsesInputItem[] = [...system] - const store = OpenAIOptions.store(request) for (const message of request.messages) { if (message.role === "user") { @@ -337,72 +235,31 @@ const lowerMessages = Effect.fn("OpenAIResponses.lowerMessages")(function* (requ if (message.role === "assistant") { const content: TextPart[] = [] - const reasoningItems: Record = {} - const reasoningReferences = new Set() - const flushText = () => { - if (content.length === 0) return - input.push({ role: "assistant", content: content.map((part) => ({ type: "output_text", text: part.text })) }) - content.splice(0, content.length) - } for (const part of message.content) { + if (!ProviderShared.supportsContent(part, ["text", "tool-call"])) + return yield* ProviderShared.unsupportedContent("OpenAI Responses", "assistant", ["text", "tool-call"]) if (part.type === "text") { content.push(part) continue } - if (part.type === "reasoning") { - flushText() - const reasoning = lowerReasoning(part) - if (!reasoning) continue - if (store !== false && reasoning.id) { - if (!reasoningReferences.has(reasoning.id)) input.push({ type: "item_reference", id: reasoning.id }) - reasoningReferences.add(reasoning.id) - continue - } - const existing = reasoningItems[reasoning.id] - if (existing) { - existing.summary.push(...reasoning.summary) - if (typeof reasoning.encrypted_content === "string") - existing.encrypted_content = reasoning.encrypted_content - continue - } - reasoningItems[reasoning.id] = reasoning - input.push(reasoning) - continue - } if (part.type === "tool-call") { - flushText() input.push(lowerToolCall(part)) continue } - return yield* ProviderShared.unsupportedContent("OpenAI Responses", "assistant", [ - "text", - "reasoning", - "tool-call", - ]) } - flushText() + if (content.length > 0) + input.push({ role: "assistant", content: content.map((part) => ({ type: "output_text", text: part.text })) }) continue } for (const part of message.content) { if (!ProviderShared.supportsContent(part, ["tool-result"])) return yield* ProviderShared.unsupportedContent("OpenAI Responses", "tool", ["tool-result"]) - input.push({ - type: "function_call_output", - call_id: part.id, - output: yield* lowerToolResultOutput(part), - }) + input.push({ type: "function_call_output", call_id: part.id, output: ProviderShared.toolResultText(part) }) } } - // With store:false, OpenAI only accepts previous reasoning items when the - // complete item has encrypted state. Summary blocks for one item may carry - // that state only on the last block, so filter after they have been joined. - return store === false - ? input.filter( - (item) => !("type" in item) || item.type !== "reasoning" || typeof item.encrypted_content === "string", - ) - : input + return input }) const lowerOptions = Effect.fn("OpenAIResponses.lowerOptions")(function* (request: LLMRequest) { @@ -510,11 +367,6 @@ const isHostedToolItem = ( ): item is OpenAIResponsesStreamItem & { type: HostedToolType; id: string } => item.type in HOSTED_TOOLS && typeof item.id === "string" && item.id.length > 0 -const isReasoningItem = ( - item: OpenAIResponsesStreamItem, -): item is OpenAIResponsesStreamItem & { type: "reasoning"; id: string } => - item.type === "reasoning" && typeof item.id === "string" && item.id.length > 0 - // Round-trip the full item as the structured result so consumers can extract // outputs / sources / status without re-decoding. const hostedToolResult = (item: OpenAIResponsesStreamItem) => { @@ -567,51 +419,28 @@ const onOutputTextDelta = (state: ParserState, event: OpenAIResponsesEvent): Ste const onReasoningDelta = (state: ParserState, event: OpenAIResponsesEvent): StepResult => { if (!event.delta) return [state, NO_EVENTS] const events: LLMEvent[] = [] - const itemID = event.item_id ?? "reasoning-0" - const id = - event.summary_index !== undefined || state.reasoningItems[itemID] ? `${itemID}:${event.summary_index ?? 0}` : itemID return [ { ...state, - lifecycle: Lifecycle.reasoningDelta(state.lifecycle, events, id, event.delta), + lifecycle: Lifecycle.reasoningDelta(state.lifecycle, events, event.item_id ?? "reasoning-0", event.delta), + }, + events, + ] +} + +const onReasoningDone = (state: ParserState, event: OpenAIResponsesEvent): StepResult => { + const events: LLMEvent[] = [] + return [ + { + ...state, + lifecycle: Lifecycle.reasoningEnd(state.lifecycle, events, event.item_id ?? "reasoning-0"), }, events, ] } -const onReasoningDone = (state: ParserState, _event: OpenAIResponsesEvent): StepResult => [state, NO_EVENTS] - -const reasoningMetadata = (item: OpenAIResponsesStreamItem & { id: string }) => - openaiMetadata({ itemId: item.id, reasoningEncryptedContent: item.encrypted_content ?? null }) - -// OpenAI Responses streams reasoning items in a stable order: -// `output_item.added` (reasoning) → -// `reasoning_summary_part.added` (index=0) → -// `reasoning_summary_text.delta` → -// `reasoning_summary_part.done` (index=0) → -// (repeat for index>0) → -// `output_item.done` (reasoning). -// The handlers below rely on this ordering: `onOutputItemAdded` seeds the -// per-item entry, `onReasoningSummaryPartAdded` for `summary_index === 0` -// short-circuits when the entry already exists, and higher-index handlers -// fold against the same entry. Behaviour for out-of-order events is -// best-effort, not guaranteed. const onOutputItemAdded = (state: ParserState, event: OpenAIResponsesEvent): StepResult => { const item = event.item - if (item && isReasoningItem(item)) { - const events: LLMEvent[] = [] - return [ - { - ...state, - lifecycle: Lifecycle.reasoningStart(state.lifecycle, events, `${item.id}:0`, reasoningMetadata(item)), - reasoningItems: { - ...state.reasoningItems, - [item.id]: { encryptedContent: item.encrypted_content, summaryParts: { 0: "active" } }, - }, - }, - events, - ] - } if (item?.type !== "function_call" || !item.id) return [state, NO_EVENTS] const providerMetadata = openaiMetadata({ itemId: item.id }) const events: LLMEvent[] = [] @@ -632,103 +461,6 @@ const onOutputItemAdded = (state: ParserState, event: OpenAIResponsesEvent): Ste ] } -const onReasoningSummaryPartAdded = (state: ParserState, event: OpenAIResponsesEvent): StepResult => { - if (!event.item_id || event.summary_index === undefined) return [state, NO_EVENTS] - const item = state.reasoningItems[event.item_id] ?? { encryptedContent: undefined, summaryParts: {} } - if (event.summary_index === 0) { - if (state.reasoningItems[event.item_id]) return [state, NO_EVENTS] - const events: LLMEvent[] = [] - return [ - { - ...state, - lifecycle: Lifecycle.reasoningStart( - state.lifecycle, - events, - `${event.item_id}:0`, - openaiMetadata({ itemId: event.item_id, reasoningEncryptedContent: null }), - ), - reasoningItems: { - ...state.reasoningItems, - [event.item_id]: { ...item, summaryParts: { 0: "active" } }, - }, - }, - events, - ] - } - - const events: LLMEvent[] = [] - const closed = Object.entries(item.summaryParts) - .filter((entry) => entry[1] === "can-conclude") - .reduce( - (lifecycle, entry) => - Lifecycle.reasoningEnd( - lifecycle, - events, - `${event.item_id}:${entry[0]}`, - openaiMetadata({ itemId: event.item_id }), - ), - state.lifecycle, - ) - return [ - { - ...state, - lifecycle: Lifecycle.reasoningStart( - closed, - events, - `${event.item_id}:${event.summary_index}`, - openaiMetadata({ itemId: event.item_id, reasoningEncryptedContent: item.encryptedContent ?? null }), - ), - reasoningItems: { - ...state.reasoningItems, - [event.item_id]: { - ...item, - summaryParts: { - ...Object.fromEntries( - Object.entries(item.summaryParts).map((entry) => - entry[1] === "can-conclude" ? [entry[0], "concluded" as const] : entry, - ), - ), - [event.summary_index]: "active", - }, - }, - }, - }, - events, - ] -} - -const onReasoningSummaryPartDone = (state: ParserState, event: OpenAIResponsesEvent): StepResult => { - if (!event.item_id || event.summary_index === undefined) return [state, NO_EVENTS] - const item = state.reasoningItems[event.item_id] - if (!item) return [state, NO_EVENTS] - const events: LLMEvent[] = [] - return [ - { - ...state, - lifecycle: - state.store !== false - ? Lifecycle.reasoningEnd( - state.lifecycle, - events, - `${event.item_id}:${event.summary_index}`, - openaiMetadata({ itemId: event.item_id }), - ) - : state.lifecycle, - reasoningItems: { - ...state.reasoningItems, - [event.item_id]: { - ...item, - summaryParts: { - ...item.summaryParts, - [event.summary_index]: state.store !== false ? "concluded" : "can-conclude", - }, - }, - }, - }, - events, - ] -} - const onFunctionCallArgumentsDelta = Effect.fn("OpenAIResponses.onFunctionCallArgumentsDelta")(function* ( state: ParserState, event: OpenAIResponsesEvent, @@ -786,32 +518,6 @@ const onOutputItemDone = Effect.fn("OpenAIResponses.onOutputItemDone")(function* return [{ ...state, lifecycle }, events] satisfies StepResult } - if (isReasoningItem(item)) { - const events: LLMEvent[] = [] - const providerMetadata = reasoningMetadata(item) - const reasoningItem = state.reasoningItems[item.id] - if (reasoningItem) { - const lifecycle = Object.entries(reasoningItem.summaryParts) - .filter((entry) => entry[1] === "active" || entry[1] === "can-conclude") - .reduce( - (lifecycle, entry) => Lifecycle.reasoningEnd(lifecycle, events, `${item.id}:${entry[0]}`, providerMetadata), - state.lifecycle, - ) - const { [item.id]: _removed, ...reasoningItems } = state.reasoningItems - return [{ ...state, lifecycle, reasoningItems }, events] satisfies StepResult - } - if (!state.lifecycle.reasoning.has(item.id)) { - const lifecycle = Lifecycle.stepStart(state.lifecycle, events) - events.push(LLMEvent.reasoningStart({ id: item.id, providerMetadata })) - events.push(LLMEvent.reasoningEnd({ id: item.id, providerMetadata })) - return [{ ...state, lifecycle }, events] satisfies StepResult - } - return [ - { ...state, lifecycle: Lifecycle.reasoningEnd(state.lifecycle, events, item.id, providerMetadata) }, - events, - ] satisfies StepResult - } - return [state, NO_EVENTS] satisfies StepResult }) @@ -831,27 +537,14 @@ const onResponseFinish = (state: ParserState, event: OpenAIResponsesEvent): Step return [{ ...state, lifecycle }, events] } -// Build a single human-readable message from whatever the provider supplied. -// When both code and message are present, prefix the code so consumers see -// the failure mode (e.g. `rate_limit_exceeded: Slow down`) instead of just -// the bare message — production rate limits and context-length failures used -// to be indistinguishable from generic stream drops. -const providerErrorMessage = (event: OpenAIResponsesEvent, fallback: string): string => { - const nested = event.response?.error ?? undefined - const message = event.message || nested?.message || undefined - const code = event.code || nested?.code || undefined - if (message && code) return `${code}: ${message}` - return message || code || fallback -} - const onResponseFailed = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [ state, - [LLMEvent.providerError({ message: providerErrorMessage(event, "OpenAI Responses response failed") })], + [LLMEvent.providerError({ message: event.message ?? event.code ?? "OpenAI Responses response failed" })], ] const onError = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [ state, - [LLMEvent.providerError({ message: providerErrorMessage(event, "OpenAI Responses stream error") })], + [LLMEvent.providerError({ message: event.message ?? event.code ?? "OpenAI Responses stream error" })], ] const step = (state: ParserState, event: OpenAIResponsesEvent) => { @@ -868,10 +561,6 @@ const step = (state: ParserState, event: OpenAIResponsesEvent) => { event.type === "response.reasoning_summary_text.done" ) return Effect.succeed(onReasoningDone(state, event)) - if (event.type === "response.reasoning_summary_part.added") - return Effect.succeed(onReasoningSummaryPartAdded(state, event)) - if (event.type === "response.reasoning_summary_part.done") - return Effect.succeed(onReasoningSummaryPartDone(state, event)) if (event.type === "response.output_item.added") return Effect.succeed(onOutputItemAdded(state, event)) if (event.type === "response.function_call_arguments.delta") return onFunctionCallArgumentsDelta(state, event) if (event.type === "response.output_item.done") return onOutputItemDone(state, event) @@ -898,13 +587,7 @@ export const protocol = Protocol.make({ }, stream: { event: Protocol.jsonEvent(OpenAIResponsesEvent), - initial: (request) => ({ - hasFunctionCall: false, - tools: ToolStream.empty(), - lifecycle: Lifecycle.initial(), - reasoningItems: {}, - store: OpenAIOptions.store(request), - }), + initial: () => ({ hasFunctionCall: false, tools: ToolStream.empty(), lifecycle: Lifecycle.initial() }), step, terminal: (event) => TERMINAL_TYPES.has(event.type), }, diff --git a/packages/llm/src/protocols/shared.ts b/packages/llm/src/protocols/shared.ts index aa37c62e4367..a5d5e04df717 100644 --- a/packages/llm/src/protocols/shared.ts +++ b/packages/llm/src/protocols/shared.ts @@ -80,7 +80,7 @@ export const subtractTokens = (total: number | undefined, subtrahend: number | u */ export const sumTokens = (...values: ReadonlyArray): number | undefined => { if (values.every((value) => value === undefined)) return undefined - return values.reduce((acc: number, value) => acc + (value ?? 0), 0) + return values.reduce((acc, value) => acc + (value ?? 0), 0) } export const eventError = (route: string, message: string, raw?: string) => @@ -122,16 +122,6 @@ export const parseToolInput = (route: string, name: string, raw: string) => export const mediaBytes = (part: MediaPart) => typeof part.data === "string" ? part.data : Buffer.from(part.data).toString("base64") -export const mediaBase64 = (part: MediaPart) => { - if (typeof part.data !== "string" || !part.data.startsWith("data:")) return mediaBytes(part) - return part.data.slice(part.data.indexOf(",") + 1) -} - -export const mediaDataUrl = (part: MediaPart) => - typeof part.data === "string" && part.data.startsWith("data:") - ? part.data - : `data:${part.mediaType};base64,${mediaBytes(part)}` - export const trimBaseUrl = (value: string) => value.replace(/\/+$/, "") export const toolResultText = (part: ToolResultPart) => { diff --git a/packages/llm/test/continuation-scenarios.ts b/packages/llm/test/continuation-scenarios.ts deleted file mode 100644 index 1bb1848b557e..000000000000 --- a/packages/llm/test/continuation-scenarios.ts +++ /dev/null @@ -1,104 +0,0 @@ -import { LLM, Message, ToolCallPart, ToolDefinition, ToolResultPart, type ContentPart, type Model } from "../src" - -export const basicContinuation = ["system", "user-text", "assistant-text", "user-follow-up"] as const -export const toolContinuation = ["tool-call", "tool-result"] as const -export const reasoningContinuation = ["assistant-reasoning", "encrypted-reasoning"] as const -export const mediaContinuation = ["user-image"] as const -export const maximalContinuation = [ - ...basicContinuation, - ...toolContinuation, - ...reasoningContinuation, - ...mediaContinuation, -] as const - -export type ContinuationFeature = (typeof maximalContinuation)[number] - -export const nativeOpenAIResponsesContinuation = [ - ...basicContinuation, - ...toolContinuation, - "encrypted-reasoning", - ...mediaContinuation, -] as const satisfies ReadonlyArray - -export const nativeAnthropicMessagesContinuation = [ - ...basicContinuation, - ...toolContinuation, - "assistant-reasoning", - ...mediaContinuation, -] as const satisfies ReadonlyArray - -export const continuationTool = ToolDefinition.make({ - name: "get_weather", - description: "Get current weather for a city.", - inputSchema: { - type: "object", - properties: { city: { type: "string" } }, - required: ["city"], - additionalProperties: false, - }, -}) - -export function continuationRequest(input: { - readonly id: string - readonly model: Model - readonly features: ReadonlyArray - readonly image?: string -}) { - const features = new Set(input.features) - const messages = [] - const firstUser: ContentPart[] = [] - const firstAssistant: ContentPart[] = [] - - if (features.has("user-text")) firstUser.push({ type: "text", text: "What is shown here?" }) - if (features.has("user-image")) - firstUser.push({ type: "media", mediaType: "image/png", data: input.image ?? "AAECAw==" }) - if (firstUser.length > 0) messages.push(Message.user(firstUser)) - - if (features.has("assistant-reasoning")) - firstAssistant.push({ - type: "reasoning", - text: "I inspected the previous turn.", - providerMetadata: { anthropic: { signature: "sig_continuation_1" } }, - }) - if (features.has("encrypted-reasoning")) - firstAssistant.push({ - type: "reasoning", - text: "I inspected the previous turn.", - providerMetadata: { - openai: { - itemId: "rs_continuation_1", - reasoningEncryptedContent: "encrypted-continuation-state", - }, - }, - }) - if (features.has("assistant-text")) firstAssistant.push({ type: "text", text: "It shows a small test image." }) - if (firstAssistant.length > 0) messages.push(Message.assistant(firstAssistant)) - - if (features.has("tool-call")) { - messages.push(Message.user("Check the weather in Paris before continuing.")) - messages.push( - Message.assistant([ToolCallPart.make({ id: "call_weather_1", name: "get_weather", input: { city: "Paris" } })]), - ) - } - if (features.has("tool-result")) { - messages.push( - Message.tool(ToolResultPart.make({ id: "call_weather_1", name: "get_weather", result: { temperature: 22 } })), - ) - if (features.has("assistant-text")) messages.push(Message.assistant("Paris is 22 degrees.")) - } - if (features.has("user-follow-up")) - messages.push(Message.user("Continue from this conversation in one short sentence.")) - - return LLM.request({ - id: input.id, - model: input.model, - system: features.has("system") ? "You are concise. Continue from the provided history." : undefined, - messages, - tools: features.has("tool-call") ? [continuationTool] : [], - cache: "none", - providerOptions: features.has("encrypted-reasoning") - ? { openai: { store: false, include: ["reasoning.encrypted_content"], reasoningSummary: "auto" } } - : undefined, - generation: { maxTokens: 80, temperature: 0 }, - }) -} diff --git a/packages/llm/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning-continuation.json b/packages/llm/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning-continuation.json deleted file mode 100644 index 850e381cafbd..000000000000 --- a/packages/llm/test/fixtures/recordings/openai-responses/openai-responses-gpt-5-5-reasoning-continuation.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "version": 1, - "metadata": { - "name": "openai-responses/openai-responses-gpt-5-5-reasoning-continuation", - "recordedAt": "2026-05-23T23:19:06.776Z", - "provider": "openai", - "route": "openai-responses", - "transport": "http", - "model": "gpt-5.5", - "tags": [ - "prefix:openai-responses", - "provider:openai", - "flagship", - "reasoning", - "continuation", - "encrypted-reasoning", - "golden" - ] - }, - "interactions": [ - { - "transport": "http", - "request": { - "method": "POST", - "url": "https://api.openai.com/v1/responses", - "headers": { - "content-type": "application/json" - }, - "body": "{\"model\":\"gpt-5.5\",\"input\":[{\"role\":\"system\",\"content\":\"Show concise reasoning when the provider supports visible reasoning summaries.\"},{\"role\":\"user\",\"content\":[{\"type\":\"input_text\",\"text\":\"Think briefly, then reply exactly with: Hello!\"}]}],\"store\":false,\"include\":[\"reasoning.encrypted_content\"],\"reasoning\":{\"effort\":\"low\",\"summary\":\"auto\"},\"text\":{\"verbosity\":\"low\"},\"max_output_tokens\":120,\"stream\":true}" - }, - "response": { - "status": 200, - "headers": { - "content-type": "text/event-stream; charset=utf-8" - }, - "body": "event: response.created\ndata: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_0a0794dab3b8ec7d016a1235e74e148195beb46e1925d20292\",\"object\":\"response\",\"created_at\":1779578343,\"status\":\"in_progress\",\"background\":false,\"completed_at\":null,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":120,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"context\":\"current_turn\",\"effort\":\"low\",\"summary\":\"detailed\"},\"safety_identifier\":null,\"service_tier\":\"auto\",\"store\":false,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"low\"},\"tool_choice\":\"auto\",\"tools\":[],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":null,\"user\":null,\"metadata\":{}},\"sequence_number\":0}\n\nevent: response.in_progress\ndata: {\"type\":\"response.in_progress\",\"response\":{\"id\":\"resp_0a0794dab3b8ec7d016a1235e74e148195beb46e1925d20292\",\"object\":\"response\",\"created_at\":1779578343,\"status\":\"in_progress\",\"background\":false,\"completed_at\":null,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":120,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"context\":\"current_turn\",\"effort\":\"low\",\"summary\":\"detailed\"},\"safety_identifier\":null,\"service_tier\":\"auto\",\"store\":false,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"low\"},\"tool_choice\":\"auto\",\"tools\":[],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":null,\"user\":null,\"metadata\":{}},\"sequence_number\":1}\n\nevent: response.output_item.added\ndata: {\"type\":\"response.output_item.added\",\"item\":{\"id\":\"rs_0a0794dab3b8ec7d016a1235e7ce3881958a5eca32a36a14c5\",\"type\":\"reasoning\",\"encrypted_content\":\"gAAAAABqEjXnglldg7hhpTBATVqj7sThK5ATieOVR8sZGYPDW2zYopwpKxA3RyRccK_FPjRvvlzrvL-FitOxmdMGBaKa5jncrT9hHo5IMhsFsCEHkQ1x5tlrKPqtfwJ_LFexR0h_IpPogu8wlVAkHRoWQoq61o9vBxjMOEsq6dtXu09959gXnAvJA3jN_mqNkRZ7Yp6LaJJtLDAAtt_dhX8veoEFXZ412lCY4zcaMvC5o0yq6MPvLIN4NhHmfPKkVAy-j8wGlgA42KR4wd5-VeFXUdeSn32dlNLZZxBFa9w6iTgCQ9aF-3C7RB4OXeSY782QUD1dRyFybd7vJtjlptwXBntSHZ9wugoKSDEj0KnvQKG_WiCWuJvkGiOVno4MAs5QnCmKBnpak5OV1wOhPwX2ez6OmAYT4mMKIogdfivVvUxMrmdVJzgE85WoZEAU2ZporxVXkI7_8p0L6dxxwk_IKiKSCz-bZgsCtOP5Jsr5GeI831nVv272kZ3DugV-hcjGHAE5T9KhebzpFjsdxnJcfxuGY8SyRaLlUAHM_37H4veHsOzyhCoaG8mMaT3gIb4tAvM7ezd1xzLsFae89P5xCv_fNeoV7qmf2IWDWUi1vitIib5w9jsclWRqYaLVZR0GK6dYyNJ1DXDOOcWRdH7UJakv1m2koUbcYWBuxao7sc-af_9ySKAloWhb6QjiVElJHYtwraJBtX-CLBVHEYqAXmZgMUWVbz8NNRA6JS1TrOys7_LiQtXXubLWas_66LyaqmB-628LCUitUISYYc2wmq1uUm7gjPA53Wm4F7VU6g-PO7bt1O0Nd-jasisPXINTX3Z4hgC1APPEq29iEHwmEPnicO_Nu6U3JLfq4DD6r1oLK-RnIp3Ratw0P-Gwog86RBLGUWIEIKdFu6m9d1TI8rIBbAVaBA==\",\"summary\":[]},\"output_index\":0,\"sequence_number\":2}\n\nevent: response.output_item.done\ndata: {\"type\":\"response.output_item.done\",\"item\":{\"id\":\"rs_0a0794dab3b8ec7d016a1235e7ce3881958a5eca32a36a14c5\",\"type\":\"reasoning\",\"encrypted_content\":\"gAAAAABqEjXoGMCw3WDXpoD9151PEr2Lt8raW7KBKefQhZJGWx5f8jy152bApO6oE-Mr1BhUtfZNq3OPBVfSL4ioQ9bHREfujIBXgk9LUDBAz2Sle7KjOr9HaUV16A4HBiaFIRFjsHPS9G8yEySp1m6F1CD_WR6apyUGgugRh_y39EcOJmxPOzmiac5DVM6fraA1VpcGbqrZ1x2ANHFDOfnYTycPtPNTgzE7LjkYjDDWbT03uN1YxfP4pqjDVRzY14pA8bSZ8ys-pDv5kUFCAsw-OlU4jYKUXp-M8_6KTaRQP71LPwppt__zG_NJPfy-qUil4pOU8_NoxtxerHgLLXbfExZdzfpoGinoEjn7nj7BJDEtl-LNeNEb5c-1ZymNfVMp-Cs3fLEPkAV8rtHFtZ0MhE_07GKbGo7hTrOmkM4DydxmHsdWGNbXAG35cprslEA5P7p3GHFKnRs5hGs2eq-XcZ3yki64ZBOU_Tv6UR7nUH09gF1rdrJo3dpre6M00COwwdZ02zUP5KxCuI8FKu2jsZu9zgMVXDALsdtM5orTCVLXsn4rddWd111zE-vMjNmMMmktW2cHMjH7j1ooA-9P083koNVYiLi4UhMA64gTqgyl8MxkZekl7eFSMa7qk295NaHOKtFxzYYcZ9jdioCwSPSZ0ZZWLoNgrK7SWfRh0uaTHNcMZ3wq8ae6CguktIeVTCPTQAqJLQqd7AU0oOCKCJ7BWnC-L8UC6m7Pm9ZS958uUVeWBhgKHzMAGq9UeQB7IEeAcbMn3EDgOSfd8qCb8iwU9iG9dcu9axQwWU7pd7kd-T-He61W7z5wWgpx1KehWCxrN6kuKSo6p-uUfwVnJukreOn8BJNAzADQgz68bhmN9VGih7YcKVnLgwDwKditrjSd6-tfE0Baarj3jWENvT6ohY17R9FDrKS-2v8IIX6tGjoKJw8SRhaWLNv4vWlmxRgR0gdac3qumd0GKqsWSveNz01naA==\",\"summary\":[]},\"output_index\":0,\"sequence_number\":3}\n\nevent: response.output_item.added\ndata: {\"type\":\"response.output_item.added\",\"item\":{\"id\":\"msg_0a0794dab3b8ec7d016a1235e8d64c81959a41f8db3ea7b66c\",\"type\":\"message\",\"status\":\"in_progress\",\"content\":[],\"phase\":\"final_answer\",\"role\":\"assistant\"},\"output_index\":1,\"sequence_number\":4}\n\nevent: response.content_part.added\ndata: {\"type\":\"response.content_part.added\",\"content_index\":0,\"item_id\":\"msg_0a0794dab3b8ec7d016a1235e8d64c81959a41f8db3ea7b66c\",\"output_index\":1,\"part\":{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"\"},\"sequence_number\":5}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\"Hello\",\"item_id\":\"msg_0a0794dab3b8ec7d016a1235e8d64c81959a41f8db3ea7b66c\",\"logprobs\":[],\"obfuscation\":\"3nRhhCWA1H8\",\"output_index\":1,\"sequence_number\":6}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\"!\",\"item_id\":\"msg_0a0794dab3b8ec7d016a1235e8d64c81959a41f8db3ea7b66c\",\"logprobs\":[],\"obfuscation\":\"60NqChSEyXHKsoy\",\"output_index\":1,\"sequence_number\":7}\n\nevent: response.output_text.done\ndata: {\"type\":\"response.output_text.done\",\"content_index\":0,\"item_id\":\"msg_0a0794dab3b8ec7d016a1235e8d64c81959a41f8db3ea7b66c\",\"logprobs\":[],\"output_index\":1,\"sequence_number\":8,\"text\":\"Hello!\"}\n\nevent: response.content_part.done\ndata: {\"type\":\"response.content_part.done\",\"content_index\":0,\"item_id\":\"msg_0a0794dab3b8ec7d016a1235e8d64c81959a41f8db3ea7b66c\",\"output_index\":1,\"part\":{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"Hello!\"},\"sequence_number\":9}\n\nevent: response.output_item.done\ndata: {\"type\":\"response.output_item.done\",\"item\":{\"id\":\"msg_0a0794dab3b8ec7d016a1235e8d64c81959a41f8db3ea7b66c\",\"type\":\"message\",\"status\":\"completed\",\"content\":[{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"Hello!\"}],\"phase\":\"final_answer\",\"role\":\"assistant\"},\"output_index\":1,\"sequence_number\":10}\n\nevent: response.completed\ndata: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp_0a0794dab3b8ec7d016a1235e74e148195beb46e1925d20292\",\"object\":\"response\",\"created_at\":1779578343,\"status\":\"completed\",\"background\":false,\"completed_at\":1779578344,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":120,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[{\"id\":\"rs_0a0794dab3b8ec7d016a1235e7ce3881958a5eca32a36a14c5\",\"type\":\"reasoning\",\"encrypted_content\":\"gAAAAABqEjXoMO9Ci_Q06HKQ0YDBarUvbp9ulkR9W2RXWPbx7XKokNCrKUZX-pPPGpUg6r-vTXe8iEX-oED6TmjxZV_nyo838x6pmQJlDqz5JECs2axIrUbCjv9xBt3ob8eAyOizhKFjp3dJNu4i01c38MPZ5QYpD24uCKf69jzjUfydKIEjbo0VhP3K6SDG0V9ZUtua-e6WMqzIg-W5Zs3u64DxGw974ntmvNsx8lsuLR-bk9S5ZZ7zPlCG2Emwfph8UE5HJmIfmMxYlrY5qmXSWKDhse9hovQj-TrvbllP-0vLNQWEPLc3aUfVrWWR9i3NZZ-nxJZiIJPCF3xxIIyKaLh9a6Lh9J6Z-brsvVfbVJWXIGZhsu-uKk6Gwoqo56KqHdNaPF7lkPo5GAWfMrweCnJZ4o_j-oWm8BwTkXxrLib4XYKDO2JNqrNdbmy8rZ7UGgW_DVTiNyZi6LoRfSuvK45MWV2uzB_OJ9LBcqgscY4HyPvKrhGG4Peh4iXuBUCyQQ2IudM5GbeeMOAF3dnEzZff68SwE1H56CO6PtKhVQ6cFJMf7LwI5LFFio0qJnEDx-MejvU7PxmYW7R3MEbgjbsuEFU5KnRVYsgug3_Bq1vXdmP2qhebufFZwz26SwaFqyn3xjwCP8-GR7lWCZ2EvUvWtfxJ5_zgkZg06UsF4Eo_CWKFdp0ao43nemNJxOlMzFa6tPuCgplmD0oYoQ316f-bWK02-eJk56S7G4bZSk8cQfExfIZMjW2f-qrxvfxEpFiXsZF80BQwgRUOeKjsqidg2ihdldRkXGn3vX8p15mf1UgstU8y3DNd2_qJe1f_pEl6rWNXoxFdSRCTG7wTAqbCCmuDgCKGhNQY9tfJNsFqgWBIGkqKy88DN_HiWywJjJ-5u9aoe68yDK-E0TMDqs7ZrTely1wvmkl2yF0XQttaB30taxkIcRR-n0PRO-CRNA_9nJkw9ZsBb8oBjyqWH_mwSijT5g==\",\"summary\":[]},{\"id\":\"msg_0a0794dab3b8ec7d016a1235e8d64c81959a41f8db3ea7b66c\",\"type\":\"message\",\"status\":\"completed\",\"content\":[{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"Hello!\"}],\"phase\":\"final_answer\",\"role\":\"assistant\"}],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"context\":\"current_turn\",\"effort\":\"low\",\"summary\":\"detailed\"},\"safety_identifier\":null,\"service_tier\":\"default\",\"store\":false,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"low\"},\"tool_choice\":\"auto\",\"tools\":[],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":{\"input_tokens\":31,\"input_tokens_details\":{\"cached_tokens\":0},\"output_tokens\":20,\"output_tokens_details\":{\"reasoning_tokens\":12},\"total_tokens\":51},\"user\":null,\"metadata\":{}},\"sequence_number\":11}\n\n" - } - }, - { - "transport": "http", - "request": { - "method": "POST", - "url": "https://api.openai.com/v1/responses", - "headers": { - "content-type": "application/json" - }, - "body": "{\"model\":\"gpt-5.5\",\"input\":[{\"role\":\"user\",\"content\":[{\"type\":\"input_text\",\"text\":\"Think briefly, then reply exactly with: Hello!\"}]},{\"type\":\"reasoning\",\"id\":\"rs_0a0794dab3b8ec7d016a1235e7ce3881958a5eca32a36a14c5\",\"summary\":[],\"encrypted_content\":\"gAAAAABqEjXoGMCw3WDXpoD9151PEr2Lt8raW7KBKefQhZJGWx5f8jy152bApO6oE-Mr1BhUtfZNq3OPBVfSL4ioQ9bHREfujIBXgk9LUDBAz2Sle7KjOr9HaUV16A4HBiaFIRFjsHPS9G8yEySp1m6F1CD_WR6apyUGgugRh_y39EcOJmxPOzmiac5DVM6fraA1VpcGbqrZ1x2ANHFDOfnYTycPtPNTgzE7LjkYjDDWbT03uN1YxfP4pqjDVRzY14pA8bSZ8ys-pDv5kUFCAsw-OlU4jYKUXp-M8_6KTaRQP71LPwppt__zG_NJPfy-qUil4pOU8_NoxtxerHgLLXbfExZdzfpoGinoEjn7nj7BJDEtl-LNeNEb5c-1ZymNfVMp-Cs3fLEPkAV8rtHFtZ0MhE_07GKbGo7hTrOmkM4DydxmHsdWGNbXAG35cprslEA5P7p3GHFKnRs5hGs2eq-XcZ3yki64ZBOU_Tv6UR7nUH09gF1rdrJo3dpre6M00COwwdZ02zUP5KxCuI8FKu2jsZu9zgMVXDALsdtM5orTCVLXsn4rddWd111zE-vMjNmMMmktW2cHMjH7j1ooA-9P083koNVYiLi4UhMA64gTqgyl8MxkZekl7eFSMa7qk295NaHOKtFxzYYcZ9jdioCwSPSZ0ZZWLoNgrK7SWfRh0uaTHNcMZ3wq8ae6CguktIeVTCPTQAqJLQqd7AU0oOCKCJ7BWnC-L8UC6m7Pm9ZS958uUVeWBhgKHzMAGq9UeQB7IEeAcbMn3EDgOSfd8qCb8iwU9iG9dcu9axQwWU7pd7kd-T-He61W7z5wWgpx1KehWCxrN6kuKSo6p-uUfwVnJukreOn8BJNAzADQgz68bhmN9VGih7YcKVnLgwDwKditrjSd6-tfE0Baarj3jWENvT6ohY17R9FDrKS-2v8IIX6tGjoKJw8SRhaWLNv4vWlmxRgR0gdac3qumd0GKqsWSveNz01naA==\"},{\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello!\"}]},{\"role\":\"user\",\"content\":[{\"type\":\"input_text\",\"text\":\"Now reply exactly with: Done.\"}]}],\"store\":false,\"include\":[\"reasoning.encrypted_content\"],\"reasoning\":{\"effort\":\"low\",\"summary\":\"auto\"},\"text\":{\"verbosity\":\"low\"},\"max_output_tokens\":40,\"stream\":true}" - }, - "response": { - "status": 200, - "headers": { - "content-type": "text/event-stream; charset=utf-8" - }, - "body": "event: response.created\ndata: {\"type\":\"response.created\",\"response\":{\"id\":\"resp_0a0794dab3b8ec7d016a1235e991c88195a4d2f9766babd985\",\"object\":\"response\",\"created_at\":1779578345,\"status\":\"in_progress\",\"background\":false,\"completed_at\":null,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":40,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"context\":\"current_turn\",\"effort\":\"low\",\"summary\":\"detailed\"},\"safety_identifier\":null,\"service_tier\":\"auto\",\"store\":false,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"low\"},\"tool_choice\":\"auto\",\"tools\":[],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":null,\"user\":null,\"metadata\":{}},\"sequence_number\":0}\n\nevent: response.in_progress\ndata: {\"type\":\"response.in_progress\",\"response\":{\"id\":\"resp_0a0794dab3b8ec7d016a1235e991c88195a4d2f9766babd985\",\"object\":\"response\",\"created_at\":1779578345,\"status\":\"in_progress\",\"background\":false,\"completed_at\":null,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":40,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"context\":\"current_turn\",\"effort\":\"low\",\"summary\":\"detailed\"},\"safety_identifier\":null,\"service_tier\":\"auto\",\"store\":false,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"low\"},\"tool_choice\":\"auto\",\"tools\":[],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":null,\"user\":null,\"metadata\":{}},\"sequence_number\":1}\n\nevent: response.output_item.added\ndata: {\"type\":\"response.output_item.added\",\"item\":{\"id\":\"rs_0a0794dab3b8ec7d016a1235ea4dd88195a32179255ed6c532\",\"type\":\"reasoning\",\"encrypted_content\":\"gAAAAABqEjXqB-kOX_0QAeoEksgNjwSbtGmVEQuMj5ODcFV6b7Kp3E8RoHRRmSXtRH0rtNbZRbhKz5jM48DUpDI1WTeO2HqCd_A3fsSgFxp5ACGVFjPWjfvP2JMdDkpoOo5gu2zy7WsWY0fseocQQ5q_jfG6SWw0fyaeeqfdQ9HkcHyg6gVEl5skb4L8_2lD5nClmLlNVVh5JCuXRH9eYysrfO19NOZ29A2MVUX-XgB6mmK5uSb1jE43GhrEPPYrMbB5JyzM6B-yeB8rE4H2wx530hQqtxwSZREa8G03rzTJ49_KAPWl0djGDDtufUX-t4EpBHo6loA3PMuiZ3VsJTkkPpEqkm6QQyAVkQ_8AdRu12CqHbFdu73I-BnArzr33yW6reNUjnZjFV5bWDyxIMh6ljy3O_2nGk-qdTLt6bGJbEjTdPj1hi7icYZTVPqofPU4pjlo9BnIBheo-4u9pA26V9G9vDAtM4myDdMEe4pnieUztBUYPUOVMaG2U9gqtNs6iPehKo3BeKy4lhYPorL2OPmf1lVUQOCW1MBbwT5xt1kjOVw7LggnyjrBsXVvDBWg0AFcvm14r3ZQezPgLetQfSx56mVEJpui9BuVSUg2Xvqb5tCCip6TipUVvzZJKKkxN43o8N6UVXLIn6wgstAn9727JgBEsjMxzvuOWaaI-qM3dWMcFzFSGvKb6gTiF37AhSOzosf31hnsGx8AnGmLmbuW3IMhZXZZMHgVUHx4p8pNRPeoCE-Bv833KZbRfVxe6tbmkLadBjXYaCQqPXDHBR7qbpfu4_M9UAy0kpic-joepxQKsCT2t6vsNaThRYaN3PtIgjs5xxAfa9yKeIYak8yiP96CUlME9Y7zaOIydPWYBhWLf3phQsWMax9eKdLDh19f0Y0iAJPpk--1xd2OVTuDxKIMpA==\",\"summary\":[]},\"output_index\":0,\"sequence_number\":2}\n\nevent: response.output_item.done\ndata: {\"type\":\"response.output_item.done\",\"item\":{\"id\":\"rs_0a0794dab3b8ec7d016a1235ea4dd88195a32179255ed6c532\",\"type\":\"reasoning\",\"encrypted_content\":\"gAAAAABqEjXqW5MInCBPKulZ1lizyFtOaKUpKgHldAXVjTTs4XFE45gtxC1NbJoOi2tHoQhpfq-JGxtjSQEDTHnMCiLLhyqvQ4GlWVF4n51xcFVC_WgymkZqDxG6xPw8ITAsRI5vb8HiPO6EmmKt6xGIVXOjrrxRNAY3xtrByeYSvnCa6FDUHEkMeXmwllBalCeQPNDPl0Ub2ehuchNG0loMVLJoOjT-2KgDrXlOa6rCn3nUf4U1W5JA_kHytlgrD0IPbs7nY8wemdynJRXBoNSOT_U3nQSB6j-i4KIJAdLiUs9LVWMYleqmFQNs8S4dC3i5DfpHXWUMZ5Ai1d3gbvMP8bH7fsUyfIhyiDUvlgr6PZ9rfh8JqkjOpiQ7NFtSDuHQGdx__W3qi23WPDp3iQjKxVl1oUXfbMzsPE4bmNN9dnJ9qTQ43kvw8GyrGrSqRS8jCKuk9bxqeR_ibj4KoDdxvVbUeGMg3WKANfCRsNXlxwYtMpu3I4HxKm5EuMNKDg_e9RFH2wFEDm9wCKMZrC_5LShgKhSfhsk3yJ47Mit0zYdX27kyHGlZvpzLPYKdtHW1O15KNT4gFKBrIguCtjXz2Lb42ENM6Jo8BTY7BZbf0hXZ4A5mFNl_gyLVHWEHpR1GcYiJbs9RtQ-7qX_PTeZ11iFY3a7_jM7TK3WEN2IuG0OKbZVHvOkVcvyBEgIbzSzCzhtC-j584knI4WmYiqnltuwRcR2N3sxYY3vMcYGA2_AU5kYlZcJcztapTTW-aKbyGxPcw5D_dqb5mGpDqJgquye-qOufDt4Fd7cSc-g8awqR8QPsLz-ZDPLMB9JKQ3VqLQlNCKDUoDodGOAL3h-7EQG66osALfhpdsWcNmuVqlb0lNAXklrsZJtRKBU4pJ1UGCyVDwde7nv6I9PW19VumaRrJhc2cC52qUoyihvUo8xJsElaFp7-EHn5ymS4znZhRfyA_4UDL4rwj-3DqbMwNeDJrgBc3w==\",\"summary\":[]},\"output_index\":0,\"sequence_number\":3}\n\nevent: response.output_item.added\ndata: {\"type\":\"response.output_item.added\",\"item\":{\"id\":\"msg_0a0794dab3b8ec7d016a1235eaae648195ab7ad5385b641107\",\"type\":\"message\",\"status\":\"in_progress\",\"content\":[],\"phase\":\"final_answer\",\"role\":\"assistant\"},\"output_index\":1,\"sequence_number\":4}\n\nevent: response.content_part.added\ndata: {\"type\":\"response.content_part.added\",\"content_index\":0,\"item_id\":\"msg_0a0794dab3b8ec7d016a1235eaae648195ab7ad5385b641107\",\"output_index\":1,\"part\":{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"\"},\"sequence_number\":5}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\"Done\",\"item_id\":\"msg_0a0794dab3b8ec7d016a1235eaae648195ab7ad5385b641107\",\"logprobs\":[],\"obfuscation\":\"7tyE7hMvNOTM\",\"output_index\":1,\"sequence_number\":6}\n\nevent: response.output_text.delta\ndata: {\"type\":\"response.output_text.delta\",\"content_index\":0,\"delta\":\".\",\"item_id\":\"msg_0a0794dab3b8ec7d016a1235eaae648195ab7ad5385b641107\",\"logprobs\":[],\"obfuscation\":\"RGXvuTTSJS3AT5E\",\"output_index\":1,\"sequence_number\":7}\n\nevent: response.output_text.done\ndata: {\"type\":\"response.output_text.done\",\"content_index\":0,\"item_id\":\"msg_0a0794dab3b8ec7d016a1235eaae648195ab7ad5385b641107\",\"logprobs\":[],\"output_index\":1,\"sequence_number\":8,\"text\":\"Done.\"}\n\nevent: response.content_part.done\ndata: {\"type\":\"response.content_part.done\",\"content_index\":0,\"item_id\":\"msg_0a0794dab3b8ec7d016a1235eaae648195ab7ad5385b641107\",\"output_index\":1,\"part\":{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"Done.\"},\"sequence_number\":9}\n\nevent: response.output_item.done\ndata: {\"type\":\"response.output_item.done\",\"item\":{\"id\":\"msg_0a0794dab3b8ec7d016a1235eaae648195ab7ad5385b641107\",\"type\":\"message\",\"status\":\"completed\",\"content\":[{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"Done.\"}],\"phase\":\"final_answer\",\"role\":\"assistant\"},\"output_index\":1,\"sequence_number\":10}\n\nevent: response.completed\ndata: {\"type\":\"response.completed\",\"response\":{\"id\":\"resp_0a0794dab3b8ec7d016a1235e991c88195a4d2f9766babd985\",\"object\":\"response\",\"created_at\":1779578345,\"status\":\"completed\",\"background\":false,\"completed_at\":1779578346,\"error\":null,\"frequency_penalty\":0.0,\"incomplete_details\":null,\"instructions\":null,\"max_output_tokens\":40,\"max_tool_calls\":null,\"model\":\"gpt-5.5-2026-04-23\",\"moderation\":null,\"output\":[{\"id\":\"rs_0a0794dab3b8ec7d016a1235ea4dd88195a32179255ed6c532\",\"type\":\"reasoning\",\"encrypted_content\":\"gAAAAABqEjXq8oliF2VeqiOUi-jUdi49emjffD6wtbmxlwQWbJ6tSxXIjyXvCeclOqKx83G83GyDOJqvR4L_D8V_ebJtgG87ahWB8Rr9LEQDoLT24n4Vz279xtHMxEGgv7f0NmaXu2dGFeFY_s2RhH-DqNE7V4nEkS7odJOTkhxTKgEcxtz3dDlEnGU7IgN2sD1lh9y90BD3ysvARegy4Cs0DhUjLOvkx11G9lk5dQ3yo1ek8JhTHpnVSYrLDYIudCh6pfu1yP1tx8xbxDHUcwlNclU9Hp_9ils5FhZNWC_tiLDscXXvRPBgMF77jdOicCV6cyUV0Snsu1_KSRbm4rLtgXLXVMqFyYpxdyicsD577e4yZ0VVXT4Oo_af0eDh3I3ZPIWui38EmYuoRhvQuYZkqjhGd_xOkvjQF4_Tp6cyNO0XdAMGMoYG-5npHC0gcPpv56qYGX8ffj0P8ZyR9shn3H7kcQqE2YXXBa42VKK0poPbC996xSqFNW7ygePel41h493XlJ70wnP50vFY5s0raNFf9eLP3YYmLxiPks9gshayGwUQXNNwrSimoQv3OeJzRzihbzZNWTfhR4xKs53nlXMjwnnXwHRH5D07vJg_1zU7BQzJ-QRLZnsnhIOq3psHt1yuoCtsSTKBN6HPiR81F-snIttJiUAiYsgv_ajwPxxnKP0FnFXQfBuaUAtAOD5G_3MC1yECjzq-YI4MDOXj4dsIGnHkdzXo-DV2lXMl2WnPqytoUkugp14SWbJso-eDsN5QivqspnYc1VsdNAaOOgjBiHmi-bACI1CykrkuiYJm1nOHAH4L4IQjpd0pcNm-Dk7z9LGIE5lwKI07hLXp_ByhVXRT8xWuugl43pzoM1jgYD4LjTjScC3ymauqqvKjjoHfnt0Zma0eVDeQrnVT6W9RQ9wDt5KVebrrwJTqlaNV0HywZJo3gwFy-Qq5MfwAwwC-GdjMsER1TgXO_E5kFZZD4sNVgw==\",\"summary\":[]},{\"id\":\"msg_0a0794dab3b8ec7d016a1235eaae648195ab7ad5385b641107\",\"type\":\"message\",\"status\":\"completed\",\"content\":[{\"type\":\"output_text\",\"annotations\":[],\"logprobs\":[],\"text\":\"Done.\"}],\"phase\":\"final_answer\",\"role\":\"assistant\"}],\"parallel_tool_calls\":true,\"presence_penalty\":0.0,\"previous_response_id\":null,\"prompt_cache_key\":null,\"prompt_cache_retention\":\"24h\",\"reasoning\":{\"context\":\"current_turn\",\"effort\":\"low\",\"summary\":\"detailed\"},\"safety_identifier\":null,\"service_tier\":\"default\",\"store\":false,\"temperature\":1.0,\"text\":{\"format\":{\"type\":\"text\"},\"verbosity\":\"low\"},\"tool_choice\":\"auto\",\"tools\":[],\"top_logprobs\":0,\"top_p\":0.98,\"truncation\":\"disabled\",\"usage\":{\"input_tokens\":35,\"input_tokens_details\":{\"cached_tokens\":0},\"output_tokens\":20,\"output_tokens_details\":{\"reasoning_tokens\":12},\"total_tokens\":55},\"user\":null,\"metadata\":{}},\"sequence_number\":11}\n\n" - } - } - ] -} diff --git a/packages/llm/test/provider/anthropic-messages.test.ts b/packages/llm/test/provider/anthropic-messages.test.ts index 5198af9ab768..e9b03c621f07 100644 --- a/packages/llm/test/provider/anthropic-messages.test.ts +++ b/packages/llm/test/provider/anthropic-messages.test.ts @@ -1,12 +1,10 @@ import { describe, expect } from "bun:test" import { Effect } from "effect" -import { HttpClientRequest } from "effect/unstable/http" import { CacheHint, LLM, LLMError, Message, ToolCallPart, Usage } from "../../src" import { Auth, LLMClient } from "../../src/route" import * as AnthropicMessages from "../../src/protocols/anthropic-messages" -import { continuationRequest, nativeAnthropicMessagesContinuation } from "../continuation-scenarios" import { it } from "../lib/effect" -import { dynamicResponse, fixedResponse } from "../lib/http" +import { fixedResponse } from "../lib/http" import { sseEvents } from "../lib/sse" const model = AnthropicMessages.route @@ -24,19 +22,6 @@ const request = LLM.request({ generation: { maxTokens: 20, temperature: 0 }, }) -type AnthropicToolResult = Extract< - AnthropicMessages.AnthropicMessagesBody["messages"][number]["content"][number], - { readonly type: "tool_result" } -> - -const expectToolResult = (body: AnthropicMessages.AnthropicMessagesBody): AnthropicToolResult => { - const result = body.messages - .flatMap((message) => (message.role === "user" ? message.content : [])) - .find((block): block is AnthropicToolResult => block.type === "tool_result") - expect(result).toBeDefined() - return result! -} - describe("Anthropic Messages route", () => { it.effect("prepares Anthropic Messages target", () => Effect.gen(function* () { @@ -55,7 +40,7 @@ describe("Anthropic Messages route", () => { it.effect("prepares tool call and tool result messages", () => Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( + const prepared = yield* LLMClient.prepare( LLM.request({ id: "req_tool_result", model, @@ -84,131 +69,6 @@ describe("Anthropic Messages route", () => { }), ) - // Regression: screenshot/read tool results must stay structured so base64 - // image data is not JSON-stringified into `tool_result.content`. - it.effect("lowers image tool-result content as structured image blocks", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - LLM.request({ - id: "req_tool_result_image", - model, - messages: [ - Message.user("Show me the screenshot."), - Message.assistant([ToolCallPart.make({ id: "call_1", name: "read", input: { filePath: "shot.png" } })]), - Message.tool({ - id: "call_1", - name: "read", - resultType: "content", - result: [ - { type: "text", text: "Image read successfully" }, - { type: "media", mediaType: "image/png", data: "AAECAw==" }, - ], - }), - ], - cache: "none", - }), - ) - - expect(expectToolResult(prepared.body).content).toEqual([ - { type: "text", text: "Image read successfully" }, - { type: "image", source: { type: "base64", media_type: "image/png", data: "AAECAw==" } }, - ]) - }), - ) - - it.effect("lowers single-image tool-result content as a structured image block", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - LLM.request({ - id: "req_tool_result_image_only", - model, - messages: [ - Message.assistant([ToolCallPart.make({ id: "call_1", name: "screenshot", input: {} })]), - Message.tool({ - id: "call_1", - name: "screenshot", - resultType: "content", - result: [{ type: "media", mediaType: "image/jpeg", data: "/9j/AA==" }], - }), - ], - cache: "none", - }), - ) - - expect(expectToolResult(prepared.body).content).toEqual([ - { type: "image", source: { type: "base64", media_type: "image/jpeg", data: "/9j/AA==" } }, - ]) - }), - ) - - it.effect("rejects non-image media in tool-result content with a clear error", () => - Effect.gen(function* () { - const error = yield* LLMClient.prepare( - LLM.request({ - id: "req_tool_result_unsupported_media", - model, - messages: [ - Message.assistant([ToolCallPart.make({ id: "call_1", name: "fetch", input: {} })]), - Message.tool({ - id: "call_1", - name: "fetch", - resultType: "content", - result: [{ type: "media", mediaType: "audio/mpeg", data: "AAECAw==" }], - }), - ], - cache: "none", - }), - ).pipe(Effect.flip) - - expect(error.message).toContain("Anthropic Messages") - expect(error.message).toContain("audio/mpeg") - }), - ) - - it.effect("prepares the composed native continuation request", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - continuationRequest({ - id: "req_native_continuation_anthropic", - model, - features: nativeAnthropicMessagesContinuation, - }), - ) - - expect(prepared.body).toMatchObject({ - system: [{ type: "text", text: "You are concise. Continue from the provided history." }], - messages: [ - { - role: "user", - content: [ - { type: "text", text: "What is shown here?" }, - { type: "image", source: { type: "base64", media_type: "image/png", data: "AAECAw==" } }, - ], - }, - { - role: "assistant", - content: [ - { type: "thinking", thinking: "I inspected the previous turn.", signature: "sig_continuation_1" }, - { type: "text", text: "It shows a small test image." }, - ], - }, - { role: "user", content: [{ type: "text", text: "Check the weather in Paris before continuing." }] }, - { - role: "assistant", - content: [{ type: "tool_use", id: "call_weather_1", name: "get_weather", input: { city: "Paris" } }], - }, - { - role: "user", - content: [{ type: "tool_result", tool_use_id: "call_weather_1", content: '{"temperature":22}' }], - }, - { role: "assistant", content: [{ type: "text", text: "Paris is 22 degrees." }] }, - { role: "user", content: [{ type: "text", text: "Continue from this conversation in one short sentence." }] }, - ], - }) - expect(prepared.body.tools).toEqual([expect.objectContaining({ name: "get_weather" })]) - }), - ) - it.effect("lowers preserved Anthropic reasoning signature metadata", () => Effect.gen(function* () { const prepared = yield* LLMClient.prepare( @@ -337,29 +197,7 @@ describe("Anthropic Messages route", () => { ), ) - // Prefix the error type so consumers can distinguish overloads, rate - // limits, and quota errors without parsing the message string. - expect(response.events).toEqual([{ type: "provider-error", message: "overloaded_error: Overloaded" }]) - }), - ) - - it.effect("falls back to error type when no message is present", () => - Effect.gen(function* () { - const response = yield* LLMClient.generate(request).pipe( - Effect.provide(fixedResponse(sseEvents({ type: "error", error: { type: "overloaded_error", message: "" } }))), - ) - - expect(response.events).toEqual([{ type: "provider-error", message: "overloaded_error" }]) - }), - ) - - it.effect("falls back to a stable default when error payload is absent", () => - Effect.gen(function* () { - const response = yield* LLMClient.generate(request).pipe( - Effect.provide(fixedResponse(sseEvents({ type: "error" }))), - ) - - expect(response.events).toEqual([{ type: "provider-error", message: "Anthropic Messages stream error" }]) + expect(response.events).toEqual([{ type: "provider-error", message: "Overloaded" }]) }), ) @@ -554,51 +392,17 @@ describe("Anthropic Messages route", () => { }), ) - it.effect("continues a conversation with user image content", () => + it.effect("rejects unsupported user media content", () => Effect.gen(function* () { - const response = yield* LLMClient.generate( + const error = yield* LLMClient.prepare( LLM.request({ id: "req_media", model, - messages: [ - Message.user([ - { type: "text", text: "What is in this image?" }, - { type: "media", mediaType: "image/png", data: "AAECAw==" }, - ]), - ], + messages: [Message.user({ type: "media", mediaType: "image/png", data: "AAECAw==" })], }), - ).pipe( - Effect.provide( - dynamicResponse((input) => - Effect.gen(function* () { - const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie) - expect(yield* Effect.promise(() => web.json())).toMatchObject({ - messages: [ - { - role: "user", - content: [ - { type: "text", text: "What is in this image?" }, - { type: "image", source: { type: "base64", media_type: "image/png", data: "AAECAw==" } }, - ], - }, - ], - }) - return input.respond( - sseEvents( - { type: "content_block_start", index: 0, content_block: { type: "text", text: "" } }, - { type: "content_block_delta", index: 0, delta: { type: "text_delta", text: "An image." } }, - { type: "content_block_stop", index: 0 }, - { type: "message_delta", delta: { stop_reason: "end_turn" }, usage: { output_tokens: 3 } }, - { type: "message_stop" }, - ), - { headers: { "content-type": "text/event-stream" } }, - ) - }), - ), - ), - ) + ).pipe(Effect.flip) - expect(response.text).toBe("An image.") + expect(error.message).toContain("Anthropic Messages user messages only support text content for now") }), ) diff --git a/packages/llm/test/provider/golden.recorded.test.ts b/packages/llm/test/provider/golden.recorded.test.ts index 79d8589cf8c9..0575322ae731 100644 --- a/packages/llm/test/provider/golden.recorded.test.ts +++ b/packages/llm/test/provider/golden.recorded.test.ts @@ -84,7 +84,6 @@ describeRecordedGoldenScenarios([ scenarios: [ { id: "text", temperature: false }, { id: "reasoning", temperature: false }, - { id: "reasoning-continuation", temperature: false }, { id: "tool-call", temperature: false }, { id: "tool-loop", temperature: false }, { id: "image-tool-result", temperature: false, maxTokens: 40 }, diff --git a/packages/llm/test/provider/openai-responses.test.ts b/packages/llm/test/provider/openai-responses.test.ts index dd57d00a9177..1b7ae038c66e 100644 --- a/packages/llm/test/provider/openai-responses.test.ts +++ b/packages/llm/test/provider/openai-responses.test.ts @@ -7,7 +7,6 @@ import * as Azure from "../../src/providers/azure" import * as OpenAI from "../../src/providers/openai" import * as OpenAIResponses from "../../src/protocols/openai-responses" import * as ProviderShared from "../../src/protocols/shared" -import { continuationRequest, nativeOpenAIResponsesContinuation } from "../continuation-scenarios" import { it } from "../lib/effect" import { dynamicResponse, fixedResponse } from "../lib/http" import { sseEvents } from "../lib/sse" @@ -26,19 +25,6 @@ const request = LLM.request({ const configEnv = (env: Record) => Effect.provide(ConfigProvider.layer(ConfigProvider.fromEnv({ env }))) -type OpenAIToolOutput = Extract< - OpenAIResponses.OpenAIResponsesBody["input"][number], - { readonly type: "function_call_output" } -> - -const expectToolOutput = (body: OpenAIResponses.OpenAIResponsesBody): OpenAIToolOutput => { - const output = body.input.find( - (item): item is OpenAIToolOutput => "type" in item && item.type === "function_call_output", - ) - expect(output).toBeDefined() - return output! -} - describe("OpenAI Responses route", () => { it.effect("prepares OpenAI Responses target", () => Effect.gen(function* () { @@ -261,127 +247,6 @@ describe("OpenAI Responses route", () => { }), ) - // Regression: screenshot/read tool results must stay structured so base64 - // image data is not JSON-stringified into `function_call_output.output`. - it.effect("lowers image tool-result content as structured input_image items", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - LLM.request({ - id: "req_tool_result_image", - model, - messages: [ - Message.user("Show me the screenshot."), - Message.assistant([ToolCallPart.make({ id: "call_1", name: "read", input: { filePath: "shot.png" } })]), - Message.tool({ - id: "call_1", - name: "read", - resultType: "content", - result: [ - { type: "text", text: "Image read successfully" }, - { type: "media", mediaType: "image/png", data: "AAECAw==" }, - ], - }), - ], - }), - ) - - expect(expectToolOutput(prepared.body).output).toEqual([ - { type: "input_text", text: "Image read successfully" }, - { type: "input_image", image_url: "data:image/png;base64,AAECAw==" }, - ]) - }), - ) - - it.effect("lowers single-image tool-result content as structured input_image array", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - LLM.request({ - id: "req_tool_result_image_only", - model, - messages: [ - Message.assistant([ToolCallPart.make({ id: "call_1", name: "screenshot", input: {} })]), - Message.tool({ - id: "call_1", - name: "screenshot", - resultType: "content", - result: [{ type: "media", mediaType: "image/png", data: "AAECAw==" }], - }), - ], - }), - ) - - expect(expectToolOutput(prepared.body).output).toEqual([ - { type: "input_image", image_url: "data:image/png;base64,AAECAw==" }, - ]) - }), - ) - - it.effect("rejects non-image media in tool-result content with a clear error", () => - Effect.gen(function* () { - const error = yield* LLMClient.prepare( - LLM.request({ - id: "req_tool_result_unsupported_media", - model, - messages: [ - Message.assistant([ToolCallPart.make({ id: "call_1", name: "fetch", input: {} })]), - Message.tool({ - id: "call_1", - name: "fetch", - resultType: "content", - result: [{ type: "media", mediaType: "audio/mpeg", data: "AAECAw==" }], - }), - ], - }), - ).pipe(Effect.flip) - - expect(error.message).toContain("OpenAI Responses") - expect(error.message).toContain("audio/mpeg") - }), - ) - - it.effect("prepares the composed native continuation request", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - continuationRequest({ - id: "req_native_continuation_openai", - model, - features: nativeOpenAIResponsesContinuation, - }), - ) - - expect(prepared.body).toMatchObject({ - input: [ - { role: "system", content: "You are concise. Continue from the provided history." }, - { - role: "user", - content: [ - { type: "input_text", text: "What is shown here?" }, - { type: "input_image", image_url: "data:image/png;base64,AAECAw==" }, - ], - }, - { - type: "reasoning", - id: "rs_continuation_1", - encrypted_content: "encrypted-continuation-state", - summary: [{ type: "summary_text", text: "I inspected the previous turn." }], - }, - { role: "assistant", content: [{ type: "output_text", text: "It shows a small test image." }] }, - { role: "user", content: [{ type: "input_text", text: "Check the weather in Paris before continuing." }] }, - { type: "function_call", call_id: "call_weather_1", name: "get_weather", arguments: '{"city":"Paris"}' }, - { type: "function_call_output", call_id: "call_weather_1", output: '{"temperature":22}' }, - { role: "assistant", content: [{ type: "output_text", text: "Paris is 22 degrees." }] }, - { - role: "user", - content: [{ type: "input_text", text: "Continue from this conversation in one short sentence." }], - }, - ], - include: ["reasoning.encrypted_content"], - store: false, - }) - expect(prepared.body.tools).toEqual([expect.objectContaining({ type: "function", name: "get_weather" })]) - }), - ) - it.effect("maps OpenAI provider options to Responses options", () => Effect.gen(function* () { const prepared = yield* LLMClient.prepare( @@ -393,7 +258,7 @@ describe("OpenAI Responses route", () => { promptCacheKey: "session_123", reasoningEffort: "high", reasoningSummary: "auto", - include: ["reasoning.encrypted_content"], + includeEncryptedReasoning: true, }, }, }), @@ -407,108 +272,6 @@ describe("OpenAI Responses route", () => { }), ) - it.effect("accepts the full ResponseIncludable union", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - LLM.request({ - model, - prompt: "hi", - providerOptions: { - openai: { - include: ["reasoning.encrypted_content", "code_interpreter_call.outputs", "web_search_call.results"], - }, - }, - }), - ) - - expect(prepared.body.include).toEqual([ - "reasoning.encrypted_content", - "code_interpreter_call.outputs", - "web_search_call.results", - ]) - }), - ) - - it.effect("filters unknown includable values out of the include array", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - LLM.request({ - model, - prompt: "hi", - // The user passed one invalid entry alongside a valid one. Keep the - // valid one so the request still succeeds rather than failing on a - // typo from upstream config. - providerOptions: { openai: { include: ["reasoning.encrypted_content", "bogus.thing"] } }, - }), - ) - - expect(prepared.body.include).toEqual(["reasoning.encrypted_content"]) - }), - ) - - it.effect("treats an explicit empty include as no include at all", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - LLM.request({ model, prompt: "hi", providerOptions: { openai: { include: [] } } }), - ) - - expect(prepared.body.include).toBeUndefined() - }), - ) - - it.effect("treats an all-invalid include as no include at all", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - LLM.request({ model, prompt: "hi", providerOptions: { openai: { include: ["bogus.thing"] } } }), - ) - - expect(prepared.body.include).toBeUndefined() - }), - ) - - it.effect("omits include when no include is set", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - LLM.request({ model, prompt: "hi", providerOptions: { openai: { store: false } } }), - ) - - expect(prepared.body.include).toBeUndefined() - }), - ) - - it.effect("requests encrypted reasoning by default for GPT-5 reasoning models", () => - Effect.gen(function* () { - // The native OpenAI facade configures GPT-5 stateless (store: false) with - // reasoningSummary: "auto" by default. Without `include`, a follow-up - // turn cannot replay reasoning state, so the facade also opts into - // `reasoning.encrypted_content` automatically. - const prepared = yield* LLMClient.prepare( - LLM.request({ - model: OpenAI.configure({ baseURL: "https://api.openai.test/v1/", apiKey: "test" }).responses("gpt-5.2"), - prompt: "hi", - }), - ) - - expect(prepared.body.store).toBe(false) - expect(prepared.body.include).toEqual(["reasoning.encrypted_content"]) - expect(prepared.body.reasoning).toEqual({ effort: "medium", summary: "auto" }) - }), - ) - - it.effect("lets callers opt out of the GPT-5 default include", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - LLM.request({ - model: OpenAI.configure({ baseURL: "https://api.openai.test/v1/", apiKey: "test" }).responses("gpt-5.2"), - prompt: "hi", - providerOptions: { openai: { include: [] } }, - }), - ) - - expect(prepared.body.include).toBeUndefined() - }), - ) - it.effect("request OpenAI provider options override route defaults", () => Effect.gen(function* () { const prepared = yield* LLMClient.prepare( @@ -617,322 +380,6 @@ describe("OpenAI Responses route", () => { }), ) - it.effect("preserves encrypted reasoning metadata for continuation", () => - Effect.gen(function* () { - const response = yield* LLMClient.generate(request).pipe( - Effect.provide( - fixedResponse( - sseEvents( - { type: "response.reasoning_summary_text.delta", item_id: "rs_1", delta: "thinking" }, - { - type: "response.output_item.done", - item: { - type: "reasoning", - id: "rs_1", - encrypted_content: "encrypted-state", - summary: [{ type: "summary_text", text: "thinking" }], - }, - }, - { type: "response.completed", response: { id: "resp_1" } }, - ), - ), - ), - ) - - expect(response.events).toContainEqual( - expect.objectContaining({ - type: "reasoning-end", - id: "rs_1", - providerMetadata: { openai: { itemId: "rs_1", reasoningEncryptedContent: "encrypted-state" } }, - }), - ) - }), - ) - - it.effect("streams each reasoning summary part as a separate block", () => - Effect.gen(function* () { - const response = yield* LLMClient.generate( - LLM.updateRequest(request, { providerOptions: { openai: { store: false } } }), - ).pipe( - Effect.provide( - fixedResponse( - sseEvents( - { - type: "response.output_item.added", - item: { type: "reasoning", id: "rs_1", encrypted_content: null }, - }, - { type: "response.reasoning_summary_part.added", item_id: "rs_1", summary_index: 0 }, - { type: "response.reasoning_summary_text.delta", item_id: "rs_1", summary_index: 0, delta: "First" }, - { type: "response.reasoning_summary_part.done", item_id: "rs_1", summary_index: 0 }, - { type: "response.reasoning_summary_part.added", item_id: "rs_1", summary_index: 1 }, - { type: "response.reasoning_summary_text.delta", item_id: "rs_1", summary_index: 1, delta: "Second" }, - { type: "response.reasoning_summary_part.done", item_id: "rs_1", summary_index: 1 }, - { - type: "response.output_item.done", - item: { type: "reasoning", id: "rs_1", encrypted_content: "encrypted-state" }, - }, - { type: "response.completed", response: { id: "resp_1" } }, - ), - ), - ), - ) - - expect(response.reasoning).toBe("FirstSecond") - expect(response.events).toMatchObject([ - { type: "step-start", index: 0 }, - { - type: "reasoning-start", - id: "rs_1:0", - providerMetadata: { openai: { itemId: "rs_1", reasoningEncryptedContent: null } }, - }, - { type: "reasoning-delta", id: "rs_1:0", text: "First" }, - { type: "reasoning-end", id: "rs_1:0", providerMetadata: { openai: { itemId: "rs_1" } } }, - { - type: "reasoning-start", - id: "rs_1:1", - providerMetadata: { openai: { itemId: "rs_1", reasoningEncryptedContent: null } }, - }, - { type: "reasoning-delta", id: "rs_1:1", text: "Second" }, - { - type: "reasoning-end", - id: "rs_1:1", - providerMetadata: { openai: { itemId: "rs_1", reasoningEncryptedContent: "encrypted-state" } }, - }, - { type: "step-finish", index: 0, reason: "stop" }, - { type: "finish", reason: "stop" }, - ]) - }), - ) - - it.effect("closes reasoning summary parts when storage is not disabled", () => - Effect.gen(function* () { - const response = yield* LLMClient.generate(request).pipe( - Effect.provide( - fixedResponse( - sseEvents( - { - type: "response.output_item.added", - item: { type: "reasoning", id: "rs_1", encrypted_content: null }, - }, - { type: "response.reasoning_summary_part.added", item_id: "rs_1", summary_index: 0 }, - { type: "response.reasoning_summary_text.delta", item_id: "rs_1", summary_index: 0, delta: "First" }, - { type: "response.reasoning_summary_part.done", item_id: "rs_1", summary_index: 0 }, - { type: "response.reasoning_summary_part.added", item_id: "rs_1", summary_index: 1 }, - { type: "response.reasoning_summary_text.delta", item_id: "rs_1", summary_index: 1, delta: "Second" }, - { type: "response.reasoning_summary_part.done", item_id: "rs_1", summary_index: 1 }, - { - type: "response.output_item.done", - item: { type: "reasoning", id: "rs_1", encrypted_content: null }, - }, - { type: "response.completed", response: { id: "resp_1" } }, - ), - ), - ), - ) - - expect(response.events.filter((event) => event.type === "reasoning-end")).toEqual([ - { type: "reasoning-end", id: "rs_1:0", providerMetadata: { openai: { itemId: "rs_1" } } }, - { type: "reasoning-end", id: "rs_1:1", providerMetadata: { openai: { itemId: "rs_1" } } }, - ]) - }), - ) - - it.effect("continues a stateless reasoning conversation", () => - Effect.gen(function* () { - const response = yield* LLMClient.generate( - LLM.request({ - id: "req_reasoning_continue", - model, - messages: [ - Message.user("What changed?"), - Message.assistant([ - { - type: "reasoning", - text: "Checked the previous diff.", - providerMetadata: { - openai: { - itemId: "rs_1", - reasoningEncryptedContent: "encrypted-state", - }, - }, - }, - { type: "text", text: "The parser changed." }, - ]), - Message.user("Summarize it."), - ], - providerOptions: { openai: { store: false } }, - }), - ).pipe( - Effect.provide( - dynamicResponse((input) => - Effect.gen(function* () { - const web = yield* HttpClientRequest.toWeb(input.request).pipe(Effect.orDie) - expect(yield* Effect.promise(() => web.json())).toMatchObject({ - input: [ - { role: "user", content: [{ type: "input_text", text: "What changed?" }] }, - { - type: "reasoning", - id: "rs_1", - encrypted_content: "encrypted-state", - summary: [{ type: "summary_text", text: "Checked the previous diff." }], - }, - { role: "assistant", content: [{ type: "output_text", text: "The parser changed." }] }, - { role: "user", content: [{ type: "input_text", text: "Summarize it." }] }, - ], - }) - return input.respond( - sseEvents( - { type: "response.output_text.delta", item_id: "msg_1", delta: "Parser now round-trips reasoning." }, - { type: "response.completed", response: { id: "resp_1" } }, - ), - { headers: { "content-type": "text/event-stream" } }, - ) - }), - ), - ), - ) - - expect(response.text).toBe("Parser now round-trips reasoning.") - }), - ) - - it.effect("preserves assistant content order around reasoning items", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - LLM.request({ - id: "req_reasoning_order", - model, - messages: [ - Message.assistant([ - { type: "text", text: "Before." }, - { - type: "reasoning", - text: "Checked order.", - providerMetadata: { - openai: { - itemId: "rs_1", - reasoningEncryptedContent: "encrypted-state", - }, - }, - }, - { type: "text", text: "After." }, - ]), - ], - providerOptions: { openai: { store: false } }, - }), - ) - - expect(prepared.body.input).toEqual([ - { role: "assistant", content: [{ type: "output_text", text: "Before." }] }, - { - type: "reasoning", - id: "rs_1", - encrypted_content: "encrypted-state", - summary: [{ type: "summary_text", text: "Checked order." }], - }, - { role: "assistant", content: [{ type: "output_text", text: "After." }] }, - ]) - }), - ) - - it.effect("references stored reasoning items by id", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - LLM.request({ - model, - messages: [ - Message.assistant([ - { - type: "reasoning", - text: "Checked the previous diff.", - providerMetadata: { openai: { itemId: "rs_1" } }, - }, - ]), - ], - providerOptions: { openai: { store: true } }, - }), - ) - - expect(prepared.body.input).toEqual([{ type: "item_reference", id: "rs_1" }]) - }), - ) - - it.effect("joins streamed summary blocks into one continuation reasoning item", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - LLM.request({ - id: "req_multi_summary_continuation", - model, - messages: [ - Message.assistant([ - { - type: "reasoning", - text: "First", - providerMetadata: { openai: { itemId: "rs_1" } }, - }, - { - type: "reasoning", - text: "Second", - providerMetadata: { openai: { itemId: "rs_1", reasoningEncryptedContent: "encrypted-state" } }, - }, - ]), - ], - providerOptions: { openai: { store: false } }, - }), - ) - - expect(prepared.body.input).toEqual([ - { - type: "reasoning", - id: "rs_1", - encrypted_content: "encrypted-state", - summary: [ - { type: "summary_text", text: "First" }, - { type: "summary_text", text: "Second" }, - ], - }, - ]) - }), - ) - - it.effect("skips non-persisted reasoning ids without encrypted state", () => - Effect.gen(function* () { - const prepared = yield* LLMClient.prepare( - LLM.request({ - id: "req_reasoning_without_encrypted_state", - model, - messages: [ - Message.user("What changed?"), - Message.assistant([ - { - type: "reasoning", - text: "Checked the previous diff.", - providerMetadata: { - openai: { - itemId: "rs_1", - reasoningEncryptedContent: null, - }, - }, - }, - { type: "text", text: "The parser changed." }, - ]), - Message.user("Summarize it."), - ], - providerOptions: { openai: { store: false } }, - }), - ) - - expect(prepared.body).toMatchObject({ - input: [ - { role: "user", content: [{ type: "input_text", text: "What changed?" }] }, - { role: "assistant", content: [{ type: "output_text", text: "The parser changed." }] }, - { role: "user", content: [{ type: "input_text", text: "Summarize it." }] }, - ], - store: false, - }) - }), - ) - it.effect("assembles streamed function call input", () => Effect.gen(function* () { const body = sseEvents( @@ -1129,11 +576,7 @@ describe("OpenAI Responses route", () => { Effect.provide(fixedResponse(sseEvents({ type: "error", code: "rate_limit_exceeded", message: "Slow down" }))), ) - // Prefix the code so consumers see the failure mode, not just the - // sometimes-generic provider message. The bare message alone meant - // production errors like rate limits were indistinguishable from - // unrelated stream failures. - expect(response.events).toEqual([{ type: "provider-error", message: "rate_limit_exceeded: Slow down" }]) + expect(response.events).toEqual([{ type: "provider-error", message: "Slow down" }]) }), ) @@ -1147,99 +590,6 @@ describe("OpenAI Responses route", () => { }), ) - it.effect("falls back to error code when message is empty", () => - Effect.gen(function* () { - const response = yield* LLMClient.generate(request).pipe( - Effect.provide(fixedResponse(sseEvents({ type: "error", code: "internal_error", message: "" }))), - ) - - expect(response.events).toEqual([{ type: "provider-error", message: "internal_error" }]) - }), - ) - - // Regression: `response.failed` carries the failure details under - // `response.error`, not at the top level. The previous handler only - // checked top-level `message`/`code` and so always emitted the bare - // "OpenAI Responses response failed" string, hiding the real cause. - it.effect("surfaces response.failed details from response.error", () => - Effect.gen(function* () { - const response = yield* LLMClient.generate(request).pipe( - Effect.provide( - fixedResponse( - sseEvents({ - type: "response.failed", - response: { - id: "resp_failed_1", - error: { code: "server_error", message: "Upstream model unavailable" }, - }, - }), - ), - ), - ) - - expect(response.events).toEqual([{ type: "provider-error", message: "server_error: Upstream model unavailable" }]) - }), - ) - - it.effect("surfaces response.failed code when no nested message is present", () => - Effect.gen(function* () { - const response = yield* LLMClient.generate(request).pipe( - Effect.provide( - fixedResponse( - sseEvents({ - type: "response.failed", - response: { id: "resp_failed_2", error: { code: "invalid_prompt" } }, - }), - ), - ), - ) - - expect(response.events).toEqual([{ type: "provider-error", message: "invalid_prompt" }]) - }), - ) - - it.effect("surfaces error event details even when they arrive nested under response.error", () => - Effect.gen(function* () { - // Some OpenAI-compatible proxies and older SDK versions wrap the - // top-level error fields into a nested `response.error` payload - // when they bubble up an HTTP error as an SSE `error` event. Honour - // both shapes so the user still sees the underlying cause instead - // of the catch-all string. - const response = yield* LLMClient.generate(request).pipe( - Effect.provide( - fixedResponse( - sseEvents({ - type: "error", - response: { error: { code: "context_length_exceeded", message: "prompt too long" } }, - }), - ), - ), - ) - - expect(response.events).toEqual([{ type: "provider-error", message: "context_length_exceeded: prompt too long" }]) - }), - ) - - it.effect("falls back to a stable default when both error and response are absent", () => - Effect.gen(function* () { - const response = yield* LLMClient.generate(request).pipe( - Effect.provide(fixedResponse(sseEvents({ type: "error" }))), - ) - - expect(response.events).toEqual([{ type: "provider-error", message: "OpenAI Responses stream error" }]) - }), - ) - - it.effect("falls back to a stable default when response.failed has no error payload", () => - Effect.gen(function* () { - const response = yield* LLMClient.generate(request).pipe( - Effect.provide(fixedResponse(sseEvents({ type: "response.failed", response: { id: "resp_failed_3" } }))), - ) - - expect(response.events).toEqual([{ type: "provider-error", message: "OpenAI Responses response failed" }]) - }), - ) - it.effect("fails HTTP provider errors before stream parsing", () => Effect.gen(function* () { const error = yield* LLMClient.generate(request).pipe( diff --git a/packages/llm/test/recorded-golden.ts b/packages/llm/test/recorded-golden.ts index 76568ca6714f..eb126136740f 100644 --- a/packages/llm/test/recorded-golden.ts +++ b/packages/llm/test/recorded-golden.ts @@ -2,7 +2,7 @@ import type { HttpRecorder } from "@opencode-ai/http-recorder" import { describe } from "bun:test" import { Effect } from "effect" import type { Model } from "../src" -import { goldenScenarioTags, goldenScenarioTitle, runGoldenScenario, type GoldenScenarioID } from "./recorded-scenarios" +import { goldenScenarioTags, runGoldenScenario, type GoldenScenarioID } from "./recorded-scenarios" import { recordedTests } from "./recorded-test" import { kebab } from "./recorded-utils" @@ -35,6 +35,14 @@ type TargetInput = { const scenarioInput = (input: ScenarioInput) => (typeof input === "string" ? { id: input } : input) +const scenarioTitle = (id: GoldenScenarioID) => { + if (id === "text") return "streams text" + if (id === "tool-call") return "streams tool call" + if (id === "reasoning") return "uses reasoning" + if (id === "image") return "reads image text" + return "drives a tool loop" +} + const defaultPrefix = (target: TargetInput) => { if (target.prefix) return target.prefix const transport = target.transport === "websocket" ? "-websocket" : "" @@ -69,7 +77,7 @@ const runTarget = (target: TargetInput) => { describe(`${target.name} recorded`, () => { target.scenarios.forEach((raw) => { const input = scenarioInput(raw) - const name = input.name ?? goldenScenarioTitle(input.id) + const name = input.name ?? scenarioTitle(input.id) recorded.effect.with( name, { diff --git a/packages/llm/test/recorded-scenarios.ts b/packages/llm/test/recorded-scenarios.ts index db28ec4493f2..3bad88186538 100644 --- a/packages/llm/test/recorded-scenarios.ts +++ b/packages/llm/test/recorded-scenarios.ts @@ -1,17 +1,6 @@ import { expect } from "bun:test" import { Effect, Schema, Stream } from "effect" -import { - LLM, - LLMEvent, - LLMResponse, - Message, - ToolChoice, - ToolDefinition, - type ContentPart, - type FinishReason, - type LLMRequest, - type Model, -} from "../src" +import { LLM, LLMEvent, LLMResponse, Message, ToolChoice, ToolDefinition, type LLMRequest, type Model } from "../src" import { LLMClient } from "../src/route" import { tool } from "../src/tool" @@ -50,6 +39,47 @@ export const weatherRuntimeTool = tool({ ), }) +export const textRequest = (input: { + readonly id: string + readonly model: Model + readonly prompt?: string + readonly maxTokens?: number + readonly temperature?: number | false +}) => + LLM.request({ + id: input.id, + model: input.model, + system: "You are concise.", + prompt: input.prompt ?? "Reply with exactly: Hello!", + cache: "none", + providerOptions: + input.model.route.id === "gemini" ? { gemini: { thinkingConfig: { thinkingBudget: 0 } } } : undefined, + generation: + input.temperature === false + ? { maxTokens: input.maxTokens ?? 80 } + : { maxTokens: input.maxTokens ?? 80, temperature: input.temperature ?? 0 }, + }) + +export const weatherToolRequest = (input: { + readonly id: string + readonly model: Model + readonly maxTokens?: number + readonly temperature?: number | false +}) => + LLM.request({ + id: input.id, + model: input.model, + system: "Call tools exactly as requested.", + prompt: "Call get_weather with city exactly Paris.", + tools: [weatherTool], + toolChoice: ToolChoice.make(weatherTool), + cache: "none", + generation: + input.temperature === false + ? { maxTokens: input.maxTokens ?? 80 } + : { maxTokens: input.maxTokens ?? 80, temperature: input.temperature ?? 0 }, + }) + export const weatherToolLoopRequest = (input: { readonly id: string readonly model: Model @@ -86,6 +116,52 @@ const restroomImage = () => Effect.map((bytes) => Buffer.from(bytes).toString("base64")), ) +export const imageRequest = (input: { + readonly id: string + readonly model: Model + readonly image: string + readonly maxTokens?: number + readonly temperature?: number | false +}) => + LLM.request({ + id: input.id, + model: input.model, + system: "Read images carefully. Reply only with the visible text.", + messages: [ + Message.user([ + { + type: "text", + text: "The image contains exactly three lowercase English words. Read them left to right and reply with only those words.", + }, + { type: "media", mediaType: "image/png", data: input.image }, + ]), + ], + cache: "none", + generation: + input.temperature === false + ? { maxTokens: input.maxTokens ?? 20 } + : { maxTokens: input.maxTokens ?? 20, temperature: input.temperature ?? 0 }, + }) + +export const reasoningRequest = (input: { + readonly id: string + readonly model: Model + readonly maxTokens?: number + readonly temperature?: number | false +}) => + LLM.request({ + id: input.id, + model: input.model, + system: "Show concise reasoning when the provider supports visible reasoning summaries.", + prompt: "Think briefly, then reply exactly with: Hello!", + cache: "none", + providerOptions: { openai: { reasoningEffort: "low", reasoningSummary: "auto" } }, + generation: + input.temperature === false + ? { maxTokens: input.maxTokens ?? 120 } + : { maxTokens: input.maxTokens ?? 120, temperature: input.temperature ?? 0 }, + }) + export const runWeatherToolLoop = (request: LLMRequest) => LLMClient.stream({ request, @@ -136,6 +212,8 @@ export const expectGoldenWeatherToolLoop = (events: ReadonlyArray) => expect(LLMResponse.text({ events }).trim()).toMatch(/^Paris is sunny\.?$/) } +export type GoldenScenarioID = "text" | "tool-call" | "tool-loop" | "image" | "image-tool-result" | "reasoning" + export interface GoldenScenarioContext { readonly id: string readonly model: Model @@ -145,9 +223,6 @@ export interface GoldenScenarioContext { const generate = (request: LLMRequest) => LLMClient.generate(request) -const generation = (context: GoldenScenarioContext, maxTokens: number) => - context.temperature === false ? { maxTokens } : { maxTokens, temperature: context.temperature ?? 0 } - const normalizeImageText = (value: string) => value .toLowerCase() @@ -155,234 +230,117 @@ const normalizeImageText = (value: string) => .replace(/\s+/g, " ") .trim() -const encryptedReasoningOptions = { - openai: { - store: false, - include: ["reasoning.encrypted_content"], - reasoningEffort: "low", - reasoningSummary: "auto", - }, -} as const - -type AssistantTextExpectation = string | RegExp - -type UserStep = { readonly type: "user"; readonly content: Message.ContentInput } -type AssistantStep = { - readonly type: "assistant" - readonly text?: AssistantTextExpectation - readonly toolCall?: { readonly name: string; readonly input: unknown } - readonly reasoning?: "openai-encrypted" - readonly id?: string - readonly system?: string - readonly maxTokens?: number - readonly finish?: FinishReason - readonly tools?: LLM.RequestInput["tools"] - readonly toolChoice?: LLM.RequestInput["toolChoice"] - readonly providerOptions?: LLMRequest["providerOptions"] - readonly assert?: (response: LLMResponse) => void -} -type ConversationStep = UserStep | AssistantStep - -const user = (content: Message.ContentInput): ConversationStep => ({ type: "user", content }) - -const assistant = { - expectText: ( - text: AssistantTextExpectation, - options?: Omit, - ): ConversationStep => ({ type: "assistant", text, ...options }), - expectToolCall: ( - name: string, - input: unknown, - options?: Omit, - ): ConversationStep => ({ type: "assistant", toolCall: { name, input }, finish: "tool-calls", ...options }), - expectEncryptedReasoningText: ( - text: AssistantTextExpectation, - options?: Omit, - ): ConversationStep => ({ - type: "assistant", - text, - reasoning: "openai-encrypted", - providerOptions: encryptedReasoningOptions, - ...options, - }), -} - -const assertAssistantText = (actual: string, expected: AssistantTextExpectation) => { - if (typeof expected === "string") { - expect(actual.trim()).toBe(expected) - return - } - expect(actual.trim()).toMatch(expected) -} - -const assertAssistantToolCall = (response: LLMResponse, expected: NonNullable) => { - expect(response.toolCalls).toMatchObject([ - { type: "tool-call", id: expect.any(String), name: expected.name, input: expected.input }, - ]) -} - -// The generated golden scenarios only model one assistant shape at a time: -// encrypted reasoning + text, text, or tool call. Keep mixed interleavings in -// focused protocol tests where event order can be asserted directly. -const assistantMessageFromResponse = (response: LLMResponse, step: AssistantStep) => { - const content: ContentPart[] = [] - if (step.reasoning === "openai-encrypted") { - const reasoning = response.events.find( - (event): event is Extract => - LLMEvent.is.reasoningEnd(event) && typeof event.providerMetadata?.openai?.itemId === "string", - ) - if (!reasoning) throw new Error("OpenAI Responses did not return reasoning metadata") - expect(reasoning.providerMetadata?.openai?.reasoningEncryptedContent).toEqual(expect.any(String)) - content.push({ type: "reasoning", text: response.reasoning, providerMetadata: reasoning.providerMetadata }) - } - - if (response.text.length > 0) content.push({ type: "text", text: response.text }) - content.push(...response.toolCalls) - return Message.assistant(content) +export const goldenScenarioTags = (id: GoldenScenarioID) => { + if (id === "text") return ["text", "golden"] + if (id === "tool-call") return ["tool", "tool-call", "golden"] + if (id === "image") return ["media", "image", "vision", "golden"] + if (id === "image-tool-result") return ["media", "image", "vision", "tool", "tool-result", "golden"] + if (id === "reasoning") return ["reasoning", "golden"] + return ["tool", "tool-loop", "golden"] } -const runGeneratedConversation = (context: GoldenScenarioContext, steps: ReadonlyArray) => +export const runGoldenScenario = (id: GoldenScenarioID, context: GoldenScenarioContext) => Effect.gen(function* () { - const messages: Message[] = [] - let generated = 0 - for (const step of steps) { - if (step.type === "user") { - messages.push(Message.user(step.content)) - continue - } - - generated += 1 + if (id === "text") { const response = yield* generate( - LLM.request({ - id: step.id ? `${context.id}_${step.id}` : `${context.id}_${generated}`, + textRequest({ + id: context.id, model: context.model, - system: step.system, - cache: "none", - messages, - tools: step.tools, - toolChoice: step.toolChoice, - providerOptions: step.providerOptions, - generation: generation(context, step.maxTokens ?? context.maxTokens ?? 80), + prompt: "Reply exactly with: Hello!", + maxTokens: context.maxTokens ?? 40, + temperature: context.temperature, }), ) - if (step.text !== undefined) assertAssistantText(response.text, step.text) - if (step.toolCall) assertAssistantToolCall(response, step.toolCall) - step.assert?.(response) - expectFinish(response.events, step.finish ?? "stop") - messages.push(assistantMessageFromResponse(response, step)) + expect(response.text.trim()).toMatch(/^Hello!?$/) + expectFinish(response.events, "stop") + return } - }) -const runTextScenario = (context: GoldenScenarioContext) => - runGeneratedConversation(context, [ - user("Reply exactly with: Hello!"), - assistant.expectText(/^Hello!?$/, { - system: "You are concise.", - maxTokens: context.maxTokens ?? 40, - providerOptions: - context.model.route.id === "gemini" ? { gemini: { thinkingConfig: { thinkingBudget: 0 } } } : undefined, - }), - ]) - -const runToolCallScenario = (context: GoldenScenarioContext) => - runGeneratedConversation(context, [ - user("Call get_weather with city exactly Paris."), - assistant.expectToolCall( - weatherToolName, - { city: "Paris" }, - { - system: "Call tools exactly as requested.", - tools: [weatherTool], - toolChoice: ToolChoice.make(weatherTool), - maxTokens: context.maxTokens ?? 80, - }, - ), - ]) - -const runImageScenario = (context: GoldenScenarioContext) => - Effect.gen(function* () { - yield* runGeneratedConversation(context, [ - user([ - { - type: "text", - text: "The image contains exactly three lowercase English words. Read them left to right and reply with only those words.", - }, - { type: "media", mediaType: "image/png", data: yield* restroomImage() }, - ]), - assistant.expectText(/.+/, { - system: "Read images carefully. Reply only with the visible text.", - maxTokens: context.maxTokens ?? 20, - assert: (response) => expect(normalizeImageText(response.text)).toBe(RESTROOM_IMAGE_TEXT), - }), - ]) - }) + if (id === "tool-call") { + const response = yield* generate( + weatherToolRequest({ + id: context.id, + model: context.model, + maxTokens: context.maxTokens ?? 80, + temperature: context.temperature, + }), + ) + expectWeatherToolCall(response) + expectFinish(response.events, "tool-calls") + return + } -// Reproduces a tool-result image round trip: a tool returns image bytes, and -// the next model turn must receive provider-native image content instead of a -// JSON-stringified base64 blob. -const screenshotToolName = "read_screenshot" -const runImageToolResultScenario = (context: GoldenScenarioContext) => - Effect.gen(function* () { - const image = yield* restroomImage() - const response = yield* generate( - LLM.request({ - id: `${context.id}_image_tool_result`, - model: context.model, - system: "Read images carefully. Reply only with the visible text, lowercase, no punctuation.", - cache: "none", - generation: generation(context, context.maxTokens ?? 40), - messages: [ - Message.user("Use the read_screenshot tool, then reply with the words shown."), - Message.assistant([{ type: "tool-call", id: "call_screenshot_1", name: screenshotToolName, input: {} }]), - Message.tool({ - id: "call_screenshot_1", - name: screenshotToolName, - resultType: "content", - result: [ - { type: "text", text: "Image read successfully" }, - { type: "media", mediaType: "image/png", data: image }, - ], - }), - ], - tools: [ - ToolDefinition.make({ - name: screenshotToolName, - description: "Capture a screenshot of the current screen.", - inputSchema: { type: "object", properties: {}, additionalProperties: false }, - }), - ], - }), - ) + if (id === "image") { + const response = yield* generate( + imageRequest({ + id: context.id, + model: context.model, + image: yield* restroomImage(), + maxTokens: context.maxTokens ?? 20, + temperature: context.temperature, + }), + ) + expect(normalizeImageText(response.text)).toBe(RESTROOM_IMAGE_TEXT) + expectFinish(response.events, "stop") + return + } - expectFinish(response.events, "stop") - expect(normalizeImageText(response.text)).toBe(RESTROOM_IMAGE_TEXT) - }) + if (id === "image-tool-result") { + const screenshotToolName = "read_screenshot" + const image = yield* restroomImage() + const response = yield* generate( + LLM.request({ + id: `${context.id}_image_tool_result`, + model: context.model, + system: "Read images carefully. Reply only with the visible text, lowercase, no punctuation.", + cache: "none", + generation: + context.temperature === false + ? { maxTokens: context.maxTokens ?? 40 } + : { maxTokens: context.maxTokens ?? 40, temperature: context.temperature ?? 0 }, + messages: [ + Message.user("Use the read_screenshot tool, then reply with the words shown."), + Message.assistant([{ type: "tool-call", id: "call_screenshot_1", name: screenshotToolName, input: {} }]), + Message.tool({ + id: "call_screenshot_1", + name: screenshotToolName, + resultType: "content", + result: [ + { type: "text", text: "Image read successfully" }, + { type: "media", mediaType: "image/png", data: image }, + ], + }), + ], + tools: [ + ToolDefinition.make({ + name: screenshotToolName, + description: "Capture a screenshot of the current screen.", + inputSchema: { type: "object", properties: {}, additionalProperties: false }, + }), + ], + }), + ) -const runReasoningScenario = (context: GoldenScenarioContext) => - runGeneratedConversation(context, [ - user("Think briefly, then reply exactly with: Hello!"), - assistant.expectText(/^Hello!?$/, { - system: "Show concise reasoning when the provider supports visible reasoning summaries.", - providerOptions: { openai: { reasoningEffort: "low", reasoningSummary: "auto" } }, - maxTokens: context.maxTokens ?? 120, - assert: (response) => expect(response.usage?.reasoningTokens ?? 0).toBeGreaterThan(0), - }), - ]) + expectFinish(response.events, "stop") + expect(normalizeImageText(response.text)).toBe(RESTROOM_IMAGE_TEXT) + return + } -const runReasoningContinuationScenario = (context: GoldenScenarioContext) => - runGeneratedConversation(context, [ - user("Think briefly, then reply exactly with: Hello!"), - assistant.expectEncryptedReasoningText(/^Hello!?$/, { - id: "first", - system: "Show concise reasoning when the provider supports visible reasoning summaries.", - maxTokens: context.maxTokens ?? 120, - }), - user("Now reply exactly with: Done."), - assistant.expectText(/^Done\.?$/, { id: "second", maxTokens: 40, providerOptions: encryptedReasoningOptions }), - ]) + if (id === "reasoning") { + const response = yield* generate( + reasoningRequest({ + id: context.id, + model: context.model, + maxTokens: context.maxTokens ?? 120, + temperature: context.temperature, + }), + ) + expect(response.text.trim()).toMatch(/^Hello!?$/) + expect(response.usage?.reasoningTokens ?? 0).toBeGreaterThan(0) + expectFinish(response.events, "stop") + return + } -const runToolLoopScenario = (context: GoldenScenarioContext) => - Effect.gen(function* () { expectGoldenWeatherToolLoop( yield* runWeatherToolLoop( goldenWeatherToolLoopRequest({ @@ -395,30 +353,6 @@ const runToolLoopScenario = (context: GoldenScenarioContext) => ) }) -const goldenScenarios = { - text: { title: "streams text", tags: ["text", "golden"], run: runTextScenario }, - "tool-call": { title: "streams tool call", tags: ["tool", "tool-call", "golden"], run: runToolCallScenario }, - "tool-loop": { title: "drives a tool loop", tags: ["tool", "tool-loop", "golden"], run: runToolLoopScenario }, - image: { title: "reads image text", tags: ["media", "image", "vision", "golden"], run: runImageScenario }, - "image-tool-result": { - title: "reads image returned from tool result", - tags: ["media", "image", "vision", "tool", "tool-result", "golden"], - run: runImageToolResultScenario, - }, - reasoning: { title: "uses reasoning", tags: ["reasoning", "golden"], run: runReasoningScenario }, - "reasoning-continuation": { - title: "continues encrypted reasoning", - tags: ["reasoning", "continuation", "encrypted-reasoning", "golden"], - run: runReasoningContinuationScenario, - }, -} as const - -export type GoldenScenarioID = keyof typeof goldenScenarios -export const goldenScenarioTitle = (id: GoldenScenarioID) => goldenScenarios[id].title -export const goldenScenarioTags = (id: GoldenScenarioID) => [...goldenScenarios[id].tags] -export const runGoldenScenario = (id: GoldenScenarioID, context: GoldenScenarioContext) => - goldenScenarios[id].run(context) - const usageSummary = (usage: LLMResponse["usage"] | undefined) => { if (!usage) return undefined return Object.fromEntries( diff --git a/packages/opencode/src/agent/agent.ts b/packages/opencode/src/agent/agent.ts index 064a59f59ed1..45ba4a483795 100644 --- a/packages/opencode/src/agent/agent.ts +++ b/packages/opencode/src/agent/agent.ts @@ -43,6 +43,12 @@ export const Info = Schema.Struct({ }), ), variant: Schema.optional(Schema.String), + fallbacks: Schema.optional( + Schema.Array(Schema.Struct({ + providerID: ProviderID, + modelID: ModelID, + })), + ), prompt: Schema.optional(Schema.String), options: Schema.Record(Schema.String, Schema.Unknown), steps: Schema.optional(Schema.Finite), @@ -295,6 +301,9 @@ export const layer = Layer.effect( native: false, } if (value.model) item.model = Provider.parseModel(value.model) + if (value.fallbacks) { + item.fallbacks = value.fallbacks.map((f) => Provider.parseModel(f)) + } item.variant = value.variant ?? item.variant item.prompt = value.prompt ?? item.prompt item.description = value.description ?? item.description diff --git a/packages/opencode/src/cli/cmd/tui/app.tsx b/packages/opencode/src/cli/cmd/tui/app.tsx index 68803d0d118c..35fb129acfa2 100644 --- a/packages/opencode/src/cli/cmd/tui/app.tsx +++ b/packages/opencode/src/cli/cmd/tui/app.tsx @@ -988,6 +988,20 @@ function App(props: { onSnapshot?: () => Promise }) { } }) + event.on("llm.fallback.triggered", (evt) => { + toast.show({ + message: `Falling back to ${evt.properties.modelID} (${evt.properties.reason})`, + variant: "warning", + }) + }) + + event.on("llm.fallback.used", (evt) => { + toast.show({ + message: `Switched to ${evt.properties.modelID}`, + variant: "info", + }) + }) + event.on("session.error", (evt) => { const error = evt.properties.error if (error && typeof error === "object" && error.name === "MessageAbortedError") return diff --git a/packages/opencode/src/cli/cmd/tui/feature-plugins/system/session-v2.tsx b/packages/opencode/src/cli/cmd/tui/feature-plugins/system/session-v2.tsx index 8b2b2ed37b28..4ed4f30114bd 100644 --- a/packages/opencode/src/cli/cmd/tui/feature-plugins/system/session-v2.tsx +++ b/packages/opencode/src/cli/cmd/tui/feature-plugins/system/session-v2.tsx @@ -367,7 +367,11 @@ function AssistantMessage(props: { } function AssistantText(props: { part: SessionMessageAssistantText; syntax: SyntaxStyle }) { - const { theme } = useTheme() + const { theme, subtleSyntax } = useTheme() + const fallbackNotice = props.part.fallbackNotice + const isFallback = fallbackNotice != null + const fg = fallbackNotice === "resume" ? theme.success : isFallback ? theme.error : props.part.ignored ? theme.textMuted : theme.text + const syntaxStyle = isFallback || props.part.ignored ? subtleSyntax() : props.syntax return ( @@ -375,10 +379,10 @@ function AssistantText(props: { part: SessionMessageAssistantText; syntax: Synta filetype="markdown" drawUnstyledText={false} streaming={true} - syntaxStyle={props.syntax} + syntaxStyle={syntaxStyle} content={props.part.text.trim()} conceal={true} - fg={theme.text} + fg={fg} /> diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx index b3dc2a4a7de2..346522ea7012 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx @@ -1434,6 +1434,10 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las const sync = useSync() const messages = createMemo(() => sync.data.message[props.message.sessionID] ?? []) const model = createMemo(() => Model.name(ctx.providers(), props.message.providerID, props.message.modelID)) + const providerName = createMemo(() => { + const p = ctx.providers()?.get(props.message.providerID) + return p?.name ?? props.message.providerID + }) const final = createMemo(() => { return props.message.finish && !["tool-calls", "unknown"].includes(props.message.finish) @@ -1503,7 +1507,7 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las ▣{" "} {" "} {Locale.titlecase(props.message.mode)} - · {model()} + · {model()} ({providerName()}) · {Locale.duration(duration())} @@ -1627,18 +1631,22 @@ function ReasoningHeader(props: { function TextPart(props: { last: boolean; part: TextPart; message: AssistantMessage }) { const ctx = use() - const { theme, syntax } = useTheme() + const { theme, syntax, subtleSyntax } = useTheme() + const fallbackNotice = (props.part as any).fallbackNotice as "using" | "switch" | "resume" | undefined + const isFallback = fallbackNotice != null + const fg = fallbackNotice === "resume" ? theme.success : isFallback ? theme.error : (props.part as any).ignored ? theme.textMuted : theme.markdownText + const syntaxStyle = isFallback || (props.part as any).ignored ? subtleSyntax() : syntax() return ( diff --git a/packages/opencode/src/config/agent.ts b/packages/opencode/src/config/agent.ts index 379301cd2931..da5865dd4f42 100644 --- a/packages/opencode/src/config/agent.ts +++ b/packages/opencode/src/config/agent.ts @@ -45,6 +45,9 @@ const AgentSchema = Schema.StructWithRest( }), maxSteps: Schema.optional(PositiveInt).annotate({ description: "@deprecated Use 'steps' field instead." }), permission: Schema.optional(ConfigPermission.Info), + fallbacks: Schema.optional(Schema.mutable(Schema.Array(ConfigModelID))).annotate({ + description: "Fallback models to try when the primary model fails, in provider/model format", + }), }), [Schema.Record(Schema.String, Schema.Any)], ) @@ -64,6 +67,7 @@ const KNOWN_KEYS = new Set([ "maxSteps", "options", "permission", + "fallbacks", "disable", "tools", ]) diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index 456d6c3ee317..8a0a43c9731e 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -180,6 +180,12 @@ export const Info = Schema.Struct({ model: Schema.optional(ConfigModelID).annotate({ description: "Model to use in the format of provider/model, eg anthropic/claude-2", }), + fallbacks: Schema.optional(Schema.mutable(Schema.Array(ConfigModelID))).annotate({ + description: "Fallback models to try when the primary model fails, in provider/model format", + }), + cooldown_seconds: Schema.optional(Schema.Number.check(Schema.isGreaterThan(0))).annotate({ + description: "Duration in seconds to put a provider/model in cooldown after a retryable error (default: 300)", + }), small_model: Schema.optional(ConfigModelID).annotate({ description: "Small model to use for tasks like title generation in the format of provider/model", }), diff --git a/packages/opencode/src/session/fallback.ts b/packages/opencode/src/session/fallback.ts new file mode 100644 index 000000000000..7fd5af67bf77 --- /dev/null +++ b/packages/opencode/src/session/fallback.ts @@ -0,0 +1,388 @@ +import { BusEvent } from "@/bus/bus-event" +import { Schema, Effect, Option, Cause } from "effect" +import * as Stream from "effect/Stream" +import type { LLMEvent } from "@opencode-ai/llm" +import { ProviderID, ModelID } from "@/provider/schema" +import { SessionID } from "./schema" +import { SessionRetry } from "./retry" +import type { Err } from "./retry" +import { MessageV2 } from "./message-v2" +import type { ProviderResult } from "./llm-call" +import { toStream } from "./llm-call" +import type { Provider } from "@/provider/provider" +import type { Config } from "@/config/config" +import type { Bus } from "@/bus" + +export class CooldownManager { + private store = new Map() + + private key(providerID: string, modelID: string): string { + return `${providerID}/${modelID}` + } + + put(providerID: string, modelID: string, durationMs: number): void { + this.store.set(this.key(providerID, modelID), Date.now() + durationMs) + } + + isCooledDown(providerID: string, modelID: string): boolean { + const k = this.key(providerID, modelID) + const expiry = this.store.get(k) + if (expiry === undefined) return false + if (Date.now() >= expiry) { + this.store.delete(k) + return false + } + return true + } + + remaining(providerID: string, modelID: string): number | undefined { + const k = this.key(providerID, modelID) + const expiry = this.store.get(k) + if (expiry === undefined) return undefined + const left = expiry - Date.now() + if (left <= 0) { + this.store.delete(k) + return undefined + } + return left + } + + clear(providerID: string, modelID: string): void { + this.store.delete(this.key(providerID, modelID)) + } +} + +export type ChainEntry = { providerID: string; modelID: string } + +export type StartDecision = + | { kind: "primary" } + | { kind: "fallback"; index: number } + | { kind: "soonest"; index: number } + +export function pickStart( + primary: ChainEntry, + fallbacks: Array, + cooldown: { isCooledDown(providerID: string, modelID: string): boolean; remaining(providerID: string, modelID: string): number | undefined }, +): StartDecision { + if (!cooldown.isCooledDown(primary.providerID, primary.modelID)) { + return { kind: "primary" } + } + + const availableIndex = fallbacks.findIndex( + (f) => !cooldown.isCooledDown(f.providerID, f.modelID), + ) + if (availableIndex !== -1) { + return { kind: "fallback", index: availableIndex } + } + + let bestIndex = -1 + let bestRemaining = cooldown.remaining(primary.providerID, primary.modelID) ?? Infinity + for (let i = 0; i < fallbacks.length; i++) { + const remaining = cooldown.remaining(fallbacks[i].providerID, fallbacks[i].modelID) ?? Infinity + if (remaining < bestRemaining) { + bestRemaining = remaining + bestIndex = i + } + } + + return { kind: "soonest", index: bestIndex } +} + +export type FallbackEntry = { + providerID: string + modelID: string +} + +export function isRetryable(error: Err, provider: string): boolean { + return SessionRetry.retryable(error, provider) !== undefined +} + +export const FallbackTriggered = BusEvent.define( + "llm.fallback.triggered", + Schema.Struct({ + sessionID: SessionID, + modelID: ModelID, + providerID: ProviderID, + reason: Schema.String, + }), +) + +export const FallbackUsed = BusEvent.define( + "llm.fallback.used", + Schema.Struct({ + sessionID: SessionID, + modelID: ModelID, + providerID: ProviderID, + }), +) + +export const cooldown = new CooldownManager() + +export function classifyError(cause: Cause.Cause, prevProviderID: string, _prevModelID: string, cooldownSeconds: number): ClassifiedError | null { + const error = Cause.squash(cause) + let err = MessageV2.fromError(error, { providerID: ProviderID.make(prevProviderID) }) + if (!MessageV2.APIError.isInstance(err) && !MessageV2.ContextOverflowError.isInstance(err) && !MessageV2.AbortedError.isInstance(err)) { + err = new MessageV2.APIError({ + message: typeof error === "string" ? error : error instanceof Error ? error.message : "Unknown stream error", + isRetryable: true, + }).toObject() + } + if (!isRetryable(err, prevProviderID)) return null + const retryInfo = SessionRetry.retryable(err as unknown as SessionRetry.Err, prevProviderID) + return { + error: err, + isRetryable: true, + retryInfo, + reason: retryInfo?.message ?? "error", + } +} + +export const FALLBACK_NOTICE_ID = "fallback-notice" +export const FALLBACK_RESUME_ID = "fallback-resume" +export const FALLBACK_USING_ID = "fallback-using" + +export type ClassifiedError = { + error: unknown + isRetryable: boolean + retryInfo: SessionRetry.Retryable | undefined + reason: string +} + +export type FallbackDeps = { + provider: { + getModel: (providerID: ProviderID, modelID: ModelID) => Effect.Effect + getProvider: (providerID: ProviderID) => Effect.Effect + } + bus: Bus.Interface + config: { + get: () => Effect.Effect<{ cooldown_seconds?: number }, unknown> + } + classifyError: (cause: Cause.Cause, prevProviderID: string, prevModelID: string, cooldownSeconds: number) => ClassifiedError | null + call: (model: Provider.Model, providerID: string, modelID: string) => Effect.Effect + log: { + clone: () => FallbackDeps["log"] + info: (msg: string, ...args: any[]) => void + warn: (msg: string, ...args: any[]) => void + tag: (key: string, value: string) => FallbackDeps["log"] + } +} + +export type FallbackInput = { + sessionID: string + model: Provider.Model & { providerID: string; id: string } + fallbacks?: Array<{ providerID: string; modelID: string }> + usedFallback?: { providerID: string; modelID: string } + wasOnFallback?: boolean + abort: AbortSignal +} + +type StreamEvent = { type: string; id?: string; text?: string; [key: string]: unknown } + +function formatDuration(ms: number): string { + const seconds = Math.ceil(ms / 1000) + if (seconds < 60) return `${seconds}s` + const minutes = Math.ceil(seconds / 60) + if (minutes < 60) return `${minutes}m` + const hours = Math.floor(minutes / 60) + const remainingMinutes = minutes % 60 + return remainingMinutes > 0 ? `${hours}h ${remainingMinutes}m` : `${hours}h` +} + +function noticeEvents(text: string, id: string = FALLBACK_NOTICE_ID): StreamEvent[] { + return [ + { type: "text-start", id } as StreamEvent, + { type: "text-delta", id, text } as StreamEvent, + { type: "text-end", id } as StreamEvent, + ] +} + +function cooldownDuration( + err: Record, + retryInfo: SessionRetry.Retryable | undefined, + cooldownSeconds: number, + quotaCooldownMs: number, +): number { + const headers = (err as any)?.data?.responseHeaders ?? {} + const retryAfterMs = headers["retry-after-ms"] + if (retryAfterMs) { + const parsed = Number.parseFloat(retryAfterMs) + if (!Number.isNaN(parsed)) return Math.ceil(parsed) + } + const retryAfter = headers["retry-after"] + if (retryAfter) { + const parsed = Number.parseFloat(retryAfter) * 1000 + if (!Number.isNaN(parsed)) return Math.ceil(parsed) + const dateParsed = Date.parse(retryAfter) - Date.now() + if (!Number.isNaN(dateParsed) && dateParsed > 0) return Math.ceil(dateParsed) + } + if (retryInfo?.quotaLimit) return quotaCooldownMs + return cooldownSeconds * 1000 +} + +export function withFallback( + input: FallbackInput, + deps: FallbackDeps, +): Effect.Effect, unknown> { + return Effect.gen(function* () { + const QUOTA_COOLDOWN_MS = 6 * 60 * 60 * 1000 + const cfg = yield* deps.config.get() + const cooldownSeconds = cfg.cooldown_seconds ?? 300 + const fallbacks = input.fallbacks ?? [] + + if (fallbacks.length === 0) { + const result = yield* deps.call(input.model, input.model.providerID, input.model.id) + return toStream(result) + } + + const chainFallback = ( + stream: Stream.Stream, + prevEntry: { providerID: string; modelID: string }, + entry: { providerID: string; modelID: string }, + ): Stream.Stream => { + const el = deps.log.clone().tag("providerID", entry.providerID).tag("modelID", entry.modelID) + return stream.pipe( + Stream.catchCause((cause) => + Stream.unwrap( + Effect.gen(function* () { + if (cooldown.isCooledDown(entry.providerID, entry.modelID)) { + el.info("skipping cooled-down fallback") + return yield* Effect.failCause(cause) + } + + const resolved = yield* deps.provider + .getModel(ProviderID.make(entry.providerID), ModelID.make(entry.modelID)) + .pipe(Effect.option) + if (!Option.isSome(resolved)) { + el.info("fallback model not found, skipping") + return yield* Effect.failCause(cause) + } + const model = resolved.value + + if (input.abort.aborted) return yield* Effect.fail(new Error("Request aborted")) + + const classified = deps.classifyError(cause, prevEntry.providerID, prevEntry.modelID, cooldownSeconds) + if (!classified) { + el.info("non-retryable error, not falling back") + return yield* Effect.failCause(cause) + } + + const durationMs = cooldownDuration(classified.error as Record, classified.retryInfo, cooldownSeconds, QUOTA_COOLDOWN_MS) + cooldown.put(prevEntry.providerID, prevEntry.modelID, durationMs) + el.info("stream error, falling back", { cooldownMs: durationMs }) + + yield* deps.bus.publish(FallbackTriggered, { + sessionID: SessionID.make(input.sessionID ?? ""), + modelID: ModelID.make(prevEntry.modelID), + providerID: ProviderID.make(prevEntry.providerID), + reason: classified.reason, + }) + + input.usedFallback = { providerID: entry.providerID, modelID: entry.modelID } + yield* deps.bus.publish(FallbackUsed, { + sessionID: SessionID.make(input.sessionID ?? ""), + modelID: ModelID.make(entry.modelID), + providerID: ProviderID.make(entry.providerID), + }) + + const providerInfo = yield* deps.provider.getProvider(ProviderID.make(entry.providerID)).pipe(Effect.option) + const providerName = Option.isSome(providerInfo) ? providerInfo.value.name : entry.providerID + const reason = classified.reason.length > 40 ? classified.reason.slice(0, 37) + "..." : classified.reason + const notice = `→ Switching to ${model.name} (${providerName})${reason ? ` — ${reason}` : ""}` + const fallbackResult = yield* deps.call(model, entry.providerID, entry.modelID) + + return Stream.concat(Stream.fromIterable(noticeEvents(notice)), toStream(fallbackResult)) + }), + ), + ), + ) + } + + const primaryEntry = { providerID: input.model.providerID, modelID: input.model.id } + const decision = pickStart(primaryEntry, fallbacks, cooldown) + + if (decision.kind !== "primary") { + const startEntry = decision.kind === "soonest" && decision.index === -1 + ? primaryEntry + : fallbacks[decision.index] + + const startModel = yield* deps.provider + .getModel(ProviderID.make(startEntry.providerID), ModelID.make(startEntry.modelID)) + .pipe(Effect.option) + + let resolvedModel = startModel + let resolvedEntry = startEntry + if (!Option.isSome(resolvedModel) && (decision.kind !== "soonest" || decision.index !== -1)) { + const startIndex = decision.kind === "soonest" && decision.index !== -1 ? decision.index : decision.kind === "fallback" ? decision.index : 0 + for (let i = startIndex + 1; i < fallbacks.length; i++) { + const candidate = fallbacks[i] + if (decision.kind === "soonest" && cooldown.isCooledDown(candidate.providerID, candidate.modelID)) continue + const next = yield* deps.provider.getModel(ProviderID.make(candidate.providerID), ModelID.make(candidate.modelID)).pipe(Effect.option) + if (Option.isSome(next)) { + resolvedModel = next + resolvedEntry = candidate + break + } + } + } + + if (!Option.isSome(resolvedModel)) { + deps.log.warn("no fallbacks resolvable, attempting primary with cleared cooldown") + cooldown.clear(primaryEntry.providerID, primaryEntry.modelID) + const primaryResult = yield* deps.call(input.model, input.model.providerID, input.model.id) + return toStream(primaryResult) + } + + const model = Option.isSome(startModel) ? startModel.value : resolvedModel.value + deps.log.info("primary on cooldown, starting from fallback", { + providerID: resolvedEntry.providerID, + modelID: resolvedEntry.modelID, + }) + + yield* deps.bus.publish(FallbackTriggered, { + sessionID: SessionID.make(input.sessionID ?? ""), + modelID: ModelID.make(primaryEntry.modelID), + providerID: ProviderID.make(primaryEntry.providerID), + reason: "cooldown", + }) + input.usedFallback = { providerID: resolvedEntry.providerID, modelID: resolvedEntry.modelID } + yield* deps.bus.publish(FallbackUsed, { + sessionID: SessionID.make(input.sessionID ?? ""), + modelID: ModelID.make(resolvedEntry.modelID), + providerID: ProviderID.make(resolvedEntry.providerID), + }) + + const startResult = yield* deps.call(model, resolvedEntry.providerID, resolvedEntry.modelID) + let stream: Stream.Stream = toStream(startResult) + + const startIdx = decision.kind === "soonest" && decision.index === -1 ? 0 : decision.index + for (let i = startIdx + 1; i < fallbacks.length; i++) { + stream = chainFallback(stream, fallbacks[i - 1], fallbacks[i]) + } + + return stream + } + + const chain: Array<{ providerID: string; modelID: string }> = [ + { providerID: input.model.providerID, modelID: input.model.id }, + ...fallbacks, + ] + + const primaryResult = yield* deps.call(input.model, input.model.providerID, input.model.id) + const primary = toStream(primaryResult) + + let stream: Stream.Stream = primary + + if (input.wasOnFallback) { + const providerInfo = yield* deps.provider.getProvider(ProviderID.make(input.model.providerID)).pipe(Effect.option) + const providerName = Option.isSome(providerInfo) ? providerInfo.value.name : input.model.providerID + const notice = `→ Switched to ${input.model.name} (${providerName})` + stream = Stream.concat(Stream.fromIterable(noticeEvents(notice, FALLBACK_RESUME_ID)), primary) + } + + for (let i = 1; i < chain.length; i++) { + stream = chainFallback(stream, chain[i - 1], chain[i]) + } + + return stream + }) +} \ No newline at end of file diff --git a/packages/opencode/src/session/llm-call.ts b/packages/opencode/src/session/llm-call.ts new file mode 100644 index 000000000000..468077ec9723 --- /dev/null +++ b/packages/opencode/src/session/llm-call.ts @@ -0,0 +1,325 @@ +import type { Provider } from "@/provider/provider" +import type { Auth } from "@/auth" +import type { RuntimeFlags } from "@/effect/runtime-flags" +import type { Config } from "@/config/config" +import type { Plugin } from "@/plugin" +import { Permission } from "@/permission" +import { ProviderID } from "@/provider/schema" +import { Effect, Option } from "effect" +import * as Stream from "effect/Stream" +import { streamText, wrapLanguageModel, type ModelMessage, type Tool } from "ai" +import type { LLMEvent } from "@opencode-ai/llm" +import { GitLabWorkflowLanguageModel } from "gitlab-ai-provider" +import { ProviderTransform } from "@/provider/transform" +import type { Agent } from "@/agent/agent" +import { MessageV2 } from "./message-v2" +import { PermissionID } from "@/permission/schema" +import { Bus } from "@/bus" +import { Wildcard } from "@/util/wildcard" +import { SessionID } from "@/session/schema" +import { EffectBridge } from "@/effect/bridge" +import * as OtelTracer from "@effect/opentelemetry/Tracer" +import * as Log from "@opencode-ai/core/util/log" +import { LLMAISDK } from "./llm/ai-sdk" +import { LLMNativeRuntime } from "./llm/native-runtime" +import { LLMRequestPrep } from "./llm/request" + +const log = Log.create({ service: "llm-call" }) + +export type ProviderResult = + | { type: "native"; stream: Stream.Stream } + | { type: "ai-sdk"; result: any } + +export type CallInput = { + model: Provider.Model + providerID: string + modelID: string + sessionID: string + parentSessionID?: string + user: MessageV2.User + agent: Agent.Info + permission?: Permission.Ruleset + system: string[] + messages: ModelMessage[] + small?: boolean + tools: Record + retries?: number + toolChoice?: "auto" | "required" | "none" + abort: AbortSignal + deps: { + provider: { + getLanguage: (model: Provider.Model) => Effect.Effect + getProvider: (providerID: ProviderID) => Effect.Effect + } + auth: { + get: (providerID: ProviderID) => Effect.Effect + } + plugin: Plugin.Interface + perm: Permission.Interface + config: { + get: () => Effect.Effect + } + flags: RuntimeFlags.Info + llmClient: any + } + log?: { + info: (msg: string, ...args: any[]) => void + error: (msg: string, ...args: any[]) => void + } +} + +export const makeLLMCall = Effect.fn("LLMCall.make")(function* (input: CallInput) { + const { model, providerID, abort, deps } = input + const l = input.log ?? log + .clone() + .tag("providerID", providerID) + .tag("modelID", model.id) + .tag("session.id", input.sessionID) + const language = yield* deps.provider.getLanguage(model) + const [item, info] = yield* Effect.all( + [ + deps.provider.getProvider(ProviderID.make(providerID)), + deps.auth.get(ProviderID.make(providerID)), + ], + { concurrency: "unbounded" }, + ) + + const isWorkflow = language instanceof GitLabWorkflowLanguageModel + const prepared = yield* LLMRequestPrep.prepare({ + ...input, + model, + provider: item, + auth: info, + plugin: deps.plugin, + flags: deps.flags, + isWorkflow, + }) + + if (language instanceof GitLabWorkflowLanguageModel) { + const workflowModel = language as GitLabWorkflowLanguageModel & { + sessionID?: string + sessionPreapprovedTools?: string[] + approvalHandler?: (approvalTools: { name: string; args: string }[]) => Promise<{ approved: boolean }> + } + workflowModel.sessionID = input.sessionID + workflowModel.systemPrompt = prepared.system.join("\n") + workflowModel.toolExecutor = async (toolName, argsJson, _requestID) => { + const t = prepared.tools[toolName] + if (!t || !t.execute) { + return { result: "", error: `Unknown tool: ${toolName}` } + } + try { + const result = await t.execute!(JSON.parse(argsJson), { + toolCallId: _requestID, + messages: input.messages, + abortSignal: abort, + }) + const output = typeof result === "string" ? result : (result?.output ?? JSON.stringify(result)) + return { + result: output, + metadata: typeof result === "object" ? result?.metadata : undefined, + title: typeof result === "object" ? result?.title : undefined, + } + } catch (e: any) { + return { result: "", error: e.message ?? String(e) } + } + } + + const ruleset = Permission.merge(input.agent.permission ?? [], input.permission ?? []) + workflowModel.sessionPreapprovedTools = Object.keys(prepared.tools).filter((name) => { + const match = ruleset.findLast((rule) => Wildcard.match(name, rule.permission)) + return !match || match.action !== "ask" + }) + + const bridge = yield* EffectBridge.make() + const approvedToolsForSession = new Set() + workflowModel.approvalHandler = bridge.bind(async (approvalTools) => { + const uniqueNames = [...new Set(approvalTools.map((t: { name: string }) => t.name))] as string[] + if (uniqueNames.every((name) => approvedToolsForSession.has(name))) { + return { approved: true } + } + + const id = PermissionID.ascending() + let unsub: (() => void) | undefined + try { + unsub = Bus.subscribe(Permission.Event.Replied, (evt) => { + if (evt.properties.requestID === id) void evt.properties.reply + }) + const toolPatterns = approvalTools.map((t: { name: string; args: string }) => { + try { + const parsed = JSON.parse(t.args) as Record + const title = (parsed?.title ?? parsed?.name ?? "") as string + return title ? `${t.name}: ${title}` : t.name + } catch { + return t.name + } + }) + const uniquePatterns = [...new Set(toolPatterns)] as string[] + await bridge.promise( + deps.perm.ask({ + id, + sessionID: SessionID.make(input.sessionID), + permission: "workflow_tool_approval", + patterns: uniquePatterns, + metadata: { tools: approvalTools }, + always: uniquePatterns, + ruleset: [], + }), + ) + for (const name of uniqueNames) approvedToolsForSession.add(name) + workflowModel.sessionPreapprovedTools = [...(workflowModel.sessionPreapprovedTools ?? []), ...uniqueNames] + return { approved: true } + } catch { + return { approved: false } + } finally { + unsub?.() + } + }) + } + + const cfg = yield* deps.config.get() + + const tracer = cfg.experimental?.openTelemetry + ? Option.getOrUndefined(yield* Effect.serviceOption(OtelTracer.OtelTracer)) + : undefined + const telemetryTracer = tracer + ? new Proxy(tracer, { + get(target, prop, receiver) { + if (prop !== "startSpan") return Reflect.get(target, prop, receiver) + return (...args: Parameters) => { + const span = target.startSpan(...args) + span.setAttribute("session.id", input.sessionID) + return span + } + }, + }) + : undefined + + if (deps.flags.experimentalNativeLlm) { + const native = LLMNativeRuntime.stream({ + model, + provider: item, + auth: info, + llmClient: deps.llmClient, + messages: prepared.messages, + tools: prepared.tools, + toolChoice: input.toolChoice, + temperature: prepared.params.temperature, + topP: prepared.params.topP, + topK: prepared.params.topK, + maxOutputTokens: prepared.params.maxOutputTokens, + providerOptions: prepared.params.options, + headers: prepared.headers, + abort, + }) + if (native.type === "supported") { + yield* Effect.logInfo("llm runtime selected").pipe( + Effect.annotateLogs({ + "llm.runtime": "native", + "llm.provider": providerID, + "llm.model": model.id, + }), + ) + return { type: "native" as const, stream: native.stream } + } + yield* Effect.logInfo("llm runtime selected").pipe( + Effect.annotateLogs({ + "llm.runtime": "ai-sdk", + "llm.provider": providerID, + "llm.model": model.id, + "llm.native_unsupported_reason": native.reason, + }), + ) + l.info("native runtime unavailable; falling back to ai-sdk", { reason: native.reason }) + } + + yield* Effect.logInfo("llm runtime selected").pipe( + Effect.annotateLogs({ + "llm.runtime": "ai-sdk", + "llm.provider": providerID, + "llm.model": model.id, + }), + ) + + const result = streamText({ + onError(error) { + l.error("stream error", { error }) + }, + async experimental_repairToolCall(failed) { + const lower = failed.toolCall.toolName.toLowerCase() + if (lower !== failed.toolCall.toolName && prepared.tools[lower]) { + l.info("repairing tool call", { + tool: failed.toolCall.toolName, + repaired: lower, + }) + return { + ...failed.toolCall, + toolName: lower, + } + } + return { + ...failed.toolCall, + input: JSON.stringify({ + tool: failed.toolCall.toolName, + error: failed.error.message, + }), + toolName: "invalid", + } + }, + temperature: prepared.params.temperature, + topP: prepared.params.topP, + topK: prepared.params.topK, + providerOptions: ProviderTransform.providerOptions(model, prepared.params.options), + activeTools: Object.keys(prepared.tools).filter((x) => x !== "invalid"), + tools: prepared.tools, + toolChoice: input.toolChoice, + maxOutputTokens: prepared.params.maxOutputTokens, + abortSignal: abort, + headers: prepared.headers, + maxRetries: input.retries ?? 0, + messages: prepared.messages, + model: wrapLanguageModel({ + model: language, + middleware: [ + { + specificationVersion: "v3" as const, + async transformParams(args) { + if (args.type === "stream") { + // @ts-expect-error + args.params.prompt = ProviderTransform.message( + args.params.prompt, + model, + prepared.messageTransformOptions, + ) + } + return args.params + }, + }, + ], + }), + experimental_telemetry: { + isEnabled: cfg.experimental?.openTelemetry, + functionId: "session.llm", + tracer: telemetryTracer, + metadata: { + userId: cfg.username ?? "unknown", + sessionId: input.sessionID, + }, + }, + }) + + return { type: "ai-sdk" as const, result } +}) + +export function toStream(providerResult: ProviderResult): Stream.Stream { + if (providerResult.type === "native") return providerResult.stream + const state = LLMAISDK.adapterState() + return Stream.fromAsyncIterable(providerResult.result.fullStream as AsyncIterable, (e) => + e instanceof Error ? e : new Error(String(e)), + ).pipe( + Stream.mapEffect((event) => LLMAISDK.toLLMEvents(state, event)), + Stream.flatMap((events) => Stream.fromIterable(events)), + ) +} + +export * as LLMCall from "./llm-call" \ No newline at end of file diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index ea2efc99d007..bfbb0dffbc0b 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -3,28 +3,21 @@ import { serviceUse } from "@opencode-ai/core/effect/service-use" import * as Log from "@opencode-ai/core/util/log" import { Context, Effect, Layer } from "effect" import * as Stream from "effect/Stream" -import { streamText, wrapLanguageModel, type ModelMessage, type Tool } from "ai" import type { LLMEvent } from "@opencode-ai/llm" import { LLMClient, RequestExecutor, WebSocketExecutor } from "@opencode-ai/llm/route" import type { LLMClientService } from "@opencode-ai/llm/route" -import { GitLabWorkflowLanguageModel } from "gitlab-ai-provider" import { ProviderTransform } from "@/provider/transform" -import { Config } from "@/config/config" import type { Agent } from "@/agent/agent" import type { MessageV2 } from "./message-v2" -import { Plugin } from "@/plugin" +import type { ModelMessage, Tool } from "ai" import { Permission } from "@/permission" -import { PermissionID } from "@/permission/schema" -import { Bus } from "@/bus" -import { Wildcard } from "@/util/wildcard" -import { SessionID } from "@/session/schema" import { Auth } from "@/auth" -import { EffectBridge } from "@/effect/bridge" import { RuntimeFlags } from "@/effect/runtime-flags" -import * as Option from "effect/Option" -import * as OtelTracer from "@effect/opentelemetry/Tracer" -import { LLMAISDK } from "./llm/ai-sdk" -import { LLMNativeRuntime } from "./llm/native-runtime" +import { Config } from "@/config/config" +import { Bus } from "@/bus" +import { Plugin } from "@/plugin" +import { withFallback, classifyError } from "./fallback" +import { makeLLMCall } from "./llm-call" import { LLMRequestPrep } from "./llm/request" const log = Log.create({ service: "llm" }) @@ -43,6 +36,9 @@ export type StreamInput = { tools: Record retries?: number toolChoice?: "auto" | "required" | "none" + fallbacks?: Array<{ providerID: string; modelID: string }> + usedFallback?: { providerID: string; modelID: string } + wasOnFallback?: boolean } export type StreamRequest = StreamInput & { @@ -66,6 +62,7 @@ const live: Layer.Layer< | Plugin.Service | Permission.Service | LLMClientService + | Bus.Service | RuntimeFlags.Service > = Layer.effect( Service, @@ -76,6 +73,7 @@ const live: Layer.Layer< const plugin = yield* Plugin.Service const perm = yield* Permission.Service const llmClient = yield* LLMClient.Service + const bus = yield* Bus.Service const flags = yield* RuntimeFlags.Service const run = Effect.fn("LLM.run")(function* (input: StreamRequest) { @@ -92,252 +90,49 @@ const live: Layer.Layer< providerID: input.model.providerID, }) - const [language, cfg, item, info] = yield* Effect.all( - [ - provider.getLanguage(input.model), - config.get(), - provider.getProvider(input.model.providerID), - auth.get(input.model.providerID), - ], - { concurrency: "unbounded" }, - ) - - const isWorkflow = language instanceof GitLabWorkflowLanguageModel - const prepared = yield* LLMRequestPrep.prepare({ - ...input, - provider: item, - auth: info, - plugin, - flags, - isWorkflow, - }) - - // Wire up toolExecutor for DWS workflow models so that tool calls - // from the workflow service are executed via opencode's tool system - // and results sent back over the WebSocket. - if (language instanceof GitLabWorkflowLanguageModel) { - const workflowModel = language as GitLabWorkflowLanguageModel & { - sessionID?: string - sessionPreapprovedTools?: string[] - approvalHandler?: (approvalTools: { name: string; args: string }[]) => Promise<{ approved: boolean }> - } - workflowModel.sessionID = input.sessionID - workflowModel.systemPrompt = prepared.system.join("\n") - workflowModel.toolExecutor = async (toolName, argsJson, _requestID) => { - const t = prepared.tools[toolName] - if (!t || !t.execute) { - return { result: "", error: `Unknown tool: ${toolName}` } - } - try { - const result = await t.execute!(JSON.parse(argsJson), { - toolCallId: _requestID, - messages: input.messages, - abortSignal: input.abort, - }) - const output = typeof result === "string" ? result : (result?.output ?? JSON.stringify(result)) - return { - result: output, - metadata: typeof result === "object" ? result?.metadata : undefined, - title: typeof result === "object" ? result?.title : undefined, - } - } catch (e: any) { - return { result: "", error: e.message ?? String(e) } - } - } - - const ruleset = Permission.merge(input.agent.permission ?? [], input.permission ?? []) - workflowModel.sessionPreapprovedTools = Object.keys(prepared.tools).filter((name) => { - const match = ruleset.findLast((rule) => Wildcard.match(name, rule.permission)) - return !match || match.action !== "ask" - }) - - const bridge = yield* EffectBridge.make() - const approvedToolsForSession = new Set() - workflowModel.approvalHandler = bridge.bind(async (approvalTools) => { - const uniqueNames = [...new Set(approvalTools.map((t: { name: string }) => t.name))] as string[] - // Auto-approve tools that were already approved in this session - // (prevents infinite approval loops for server-side MCP tools) - if (uniqueNames.every((name) => approvedToolsForSession.has(name))) { - return { approved: true } - } - - const id = PermissionID.ascending() - let unsub: (() => void) | undefined - try { - unsub = Bus.subscribe(Permission.Event.Replied, (evt) => { - if (evt.properties.requestID === id) void evt.properties.reply - }) - const toolPatterns = approvalTools.map((t: { name: string; args: string }) => { - try { - const parsed = JSON.parse(t.args) as Record - const title = (parsed?.title ?? parsed?.name ?? "") as string - return title ? `${t.name}: ${title}` : t.name - } catch { - return t.name - } - }) - const uniquePatterns = [...new Set(toolPatterns)] as string[] - await bridge.promise( - perm.ask({ - id, - sessionID: SessionID.make(input.sessionID), - permission: "workflow_tool_approval", - patterns: uniquePatterns, - metadata: { tools: approvalTools }, - always: uniquePatterns, - ruleset: [], - }), - ) - for (const name of uniqueNames) approvedToolsForSession.add(name) - workflowModel.sessionPreapprovedTools = [...(workflowModel.sessionPreapprovedTools ?? []), ...uniqueNames] - return { approved: true } - } catch { - return { approved: false } - } finally { - unsub?.() - } - }) - } - - const tracer = cfg.experimental?.openTelemetry - ? Option.getOrUndefined(yield* Effect.serviceOption(OtelTracer.OtelTracer)) - : undefined - const telemetryTracer = tracer - ? new Proxy(tracer, { - get(target, prop, receiver) { - if (prop !== "startSpan") return Reflect.get(target, prop, receiver) - return (...args: Parameters) => { - const span = target.startSpan(...args) - span.setAttribute("session.id", input.sessionID) - return span - } - }, - }) - : undefined - - // Runtime seam: native is an opt-in adapter over @opencode-ai/llm. It - // either returns a ready LLMEvent stream or a concrete fallback reason. - if (flags.experimentalNativeLlm) { - const native = LLMNativeRuntime.stream({ - model: input.model, - provider: item, - auth: info, - llmClient, - messages: prepared.messages, - tools: prepared.tools, + const call = (model: Provider.Model, providerID: string, modelID: string) => + makeLLMCall({ + model, + providerID, + modelID, + sessionID: input.sessionID, + parentSessionID: input.parentSessionID, + user: input.user, + agent: input.agent, + permission: input.permission, + system: input.system, + messages: input.messages, + small: input.small, + tools: input.tools, + retries: input.retries, toolChoice: input.toolChoice, - temperature: prepared.params.temperature, - topP: prepared.params.topP, - topK: prepared.params.topK, - maxOutputTokens: prepared.params.maxOutputTokens, - providerOptions: prepared.params.options, - headers: prepared.headers, abort: input.abort, - }) - if (native.type === "supported") { - yield* Effect.logInfo("llm runtime selected").pipe( - Effect.annotateLogs({ - "llm.runtime": "native", - "llm.provider": input.model.providerID, - "llm.model": input.model.id, - }), - ) - return { - type: "native" as const, - stream: native.stream, - } - } - yield* Effect.logInfo("llm runtime selected").pipe( - Effect.annotateLogs({ - "llm.runtime": "ai-sdk", - "llm.provider": input.model.providerID, - "llm.model": input.model.id, - "llm.native_unsupported_reason": native.reason, - }), - ) - l.info("native runtime unavailable; falling back to ai-sdk", { reason: native.reason }) - } - - yield* Effect.logInfo("llm runtime selected").pipe( - Effect.annotateLogs({ - "llm.runtime": "ai-sdk", - "llm.provider": input.model.providerID, - "llm.model": input.model.id, - }), - ) - // Default runtime path: AI SDK owns provider execution and tool dispatch; - // LLMAISDK.toLLMEvents below normalizes fullStream parts for the processor. - return { - type: "ai-sdk" as const, - result: streamText({ - onError(error) { - l.error("stream error", { - error, - }) - }, - async experimental_repairToolCall(failed) { - const lower = failed.toolCall.toolName.toLowerCase() - if (lower !== failed.toolCall.toolName && prepared.tools[lower]) { - l.info("repairing tool call", { - tool: failed.toolCall.toolName, - repaired: lower, - }) - return { - ...failed.toolCall, - toolName: lower, - } - } - return { - ...failed.toolCall, - input: JSON.stringify({ - tool: failed.toolCall.toolName, - error: failed.error.message, - }), - toolName: "invalid", - } - }, - temperature: prepared.params.temperature, - topP: prepared.params.topP, - topK: prepared.params.topK, - providerOptions: ProviderTransform.providerOptions(input.model, prepared.params.options), - activeTools: Object.keys(prepared.tools).filter((x) => x !== "invalid"), - tools: prepared.tools, - toolChoice: input.toolChoice, - maxOutputTokens: prepared.params.maxOutputTokens, - abortSignal: input.abort, - headers: prepared.headers, - maxRetries: input.retries ?? 0, - messages: prepared.messages, - model: wrapLanguageModel({ - model: language, - middleware: [ - { - specificationVersion: "v3" as const, - async transformParams(args) { - if (args.type === "stream") { - // @ts-expect-error - args.params.prompt = ProviderTransform.message( - args.params.prompt, - input.model, - prepared.messageTransformOptions, - ) - } - return args.params - }, - }, - ], - }), - experimental_telemetry: { - isEnabled: cfg.experimental?.openTelemetry, - functionId: "session.llm", - tracer: telemetryTracer, - metadata: { - userId: cfg.username ?? "unknown", - sessionId: input.sessionID, + log: l, + deps: { + provider: { + getLanguage: provider.getLanguage.bind(provider), + getProvider: provider.getProvider.bind(provider), }, + auth: { get: auth.get.bind(auth) }, + plugin, + perm, + config, + flags, + llmClient, }, - }), - } + }) + + return yield* withFallback(input, { + provider: { + getModel: provider.getModel.bind(provider), + getProvider: provider.getProvider.bind(provider), + }, + bus, + config, + classifyError, + call, + log: l, + }) }) const stream: Interface["stream"] = (input) => @@ -349,19 +144,7 @@ const live: Layer.Layer< (ctrl) => Effect.sync(() => ctrl.abort()), ) - const result = yield* run({ ...input, abort: ctrl.signal }) - - if (result.type === "native") return result.stream - - // Adapter seam: both runtimes expose the same LLMEvent stream. Native - // already returns one; AI SDK streams are converted here. - const state = LLMAISDK.adapterState() - return Stream.fromAsyncIterable(result.result.fullStream, (e) => - e instanceof Error ? e : new Error(String(e)), - ).pipe( - Stream.mapEffect((event) => LLMAISDK.toLLMEvents(state, event)), - Stream.flatMap((events) => Stream.fromIterable(events)), - ) + return yield* run({ ...input, abort: ctrl.signal }) }), ), ) @@ -382,9 +165,10 @@ export const defaultLayer = Layer.suspend(() => LLMClient.layer.pipe(Layer.provide(Layer.mergeAll(RequestExecutor.defaultLayer, WebSocketExecutor.layer))), ), Layer.provide(RuntimeFlags.defaultLayer), + Layer.provide(Bus.defaultLayer), ), ) export const hasToolCalls = LLMRequestPrep.hasToolCalls -export * as LLM from "./llm" +export * as LLM from "./llm" \ No newline at end of file diff --git a/packages/opencode/src/session/llm/native-request.ts b/packages/opencode/src/session/llm/native-request.ts index b7f30e24c362..21e6413a2999 100644 --- a/packages/opencode/src/session/llm/native-request.ts +++ b/packages/opencode/src/session/llm/native-request.ts @@ -42,15 +42,10 @@ const providerMetadata = (value: unknown): ProviderMetadata | undefined => { return Object.keys(result).length === 0 ? undefined : result } -// Stored AI SDK parts historically kept provider-owned continuation metadata in -// `providerOptions`; native parts now use `providerMetadata` directly. -const partProviderMetadata = (part: Record) => - providerMetadata(part.providerMetadata) ?? providerMetadata(part.providerOptions) - const textPart = (part: Record) => ({ type: "text" as const, text: typeof part.text === "string" ? part.text : "", - providerMetadata: partProviderMetadata(part), + providerMetadata: providerMetadata(part.providerOptions), }) const mediaPart = (part: Record) => { @@ -73,7 +68,7 @@ const toolResult = (part: Record) => { result: "value" in output ? output.value : output, resultType: type, providerExecuted: typeof part.providerExecuted === "boolean" ? part.providerExecuted : undefined, - providerMetadata: partProviderMetadata(part), + providerMetadata: providerMetadata(part.providerOptions), }) } @@ -85,7 +80,7 @@ const contentPart = (part: unknown) => { return { type: "reasoning" as const, text: typeof part.text === "string" ? part.text : "", - providerMetadata: partProviderMetadata(part), + providerMetadata: providerMetadata(part.providerOptions), } if (part.type === "tool-call") return ToolCallPart.make({ @@ -93,7 +88,7 @@ const contentPart = (part: unknown) => { name: typeof part.toolName === "string" ? part.toolName : "", input: part.input, providerExecuted: typeof part.providerExecuted === "boolean" ? part.providerExecuted : undefined, - providerMetadata: partProviderMetadata(part), + providerMetadata: providerMetadata(part.providerOptions), }) if (part.type === "tool-result") return toolResult(part) throw new Error(`Native LLM request adapter does not support ${String(part.type)} content parts`) diff --git a/packages/opencode/src/session/llm/request.ts b/packages/opencode/src/session/llm/request.ts index 34713424053a..545f2ec0087c 100644 --- a/packages/opencode/src/session/llm/request.ts +++ b/packages/opencode/src/session/llm/request.ts @@ -137,8 +137,14 @@ export const prepare = Effect.fn("LLMRequestPrep.prepare")(function* (input: Pre ) const tools = resolveTools(input) + + const isLiteLLMProxy = + input.provider.options?.["litellmProxy"] === true || + input.model.providerID.toLowerCase().includes("litellm") || + input.model.api.id.toLowerCase().includes("litellm") + if ( - input.model.providerID.includes("github-copilot") && + (isLiteLLMProxy || input.model.providerID.includes("github-copilot")) && Object.keys(tools).length === 0 && hasToolCalls(input.messages) ) { diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index 68157f5f4d89..b6e0ac2b6181 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -100,6 +100,7 @@ export const TextPart = Schema.Struct({ text: Schema.String, synthetic: Schema.optional(Schema.Boolean), ignored: Schema.optional(Schema.Boolean), + fallbackNotice: Schema.optional(Schema.Union([Schema.Literal("using"), Schema.Literal("switch"), Schema.Literal("resume")])), time: Schema.optional( Schema.Struct({ start: NonNegativeInt, diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index a287c3b00680..5e938064744d 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -14,6 +14,7 @@ import { isOverflow } from "./overflow" import { PartID } from "./schema" import type { SessionID } from "./schema" import { SessionRetry } from "./retry" +import { FALLBACK_NOTICE_ID, FALLBACK_RESUME_ID, FALLBACK_USING_ID } from "./fallback" import { SessionStatus } from "./status" import { SessionSummary } from "./summary" import type { Provider } from "@/provider/provider" @@ -626,14 +627,21 @@ export const layer = Layer.effect( }) } } - ctx.currentText = { - id: PartID.ascending(), - messageID: ctx.assistantMessage.id, - sessionID: ctx.assistantMessage.sessionID, - type: "text", - text: "", - time: { start: Date.now() }, - metadata: value.providerMetadata, + { + const fallbackId = value.id === FALLBACK_USING_ID ? "using" as const + : value.id === FALLBACK_NOTICE_ID ? "switch" as const + : value.id === FALLBACK_RESUME_ID ? "resume" as const + : undefined + ctx.currentText = { + id: PartID.ascending(), + messageID: ctx.assistantMessage.id, + sessionID: ctx.assistantMessage.sessionID, + type: "text", + text: "", + time: { start: Date.now() }, + metadata: value.providerMetadata, + ...(fallbackId ? { ignored: true as const, synthetic: true as const, fallbackNotice: fallbackId } : {}), + } } yield* session.updatePart(ctx.currentText) return diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index 22fe4d81cd40..9c1552e1e69b 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -1441,7 +1441,13 @@ export const layer = Layer.effect( const system = [...env, ...instructions, ...(skills ? [skills] : [])] const format = lastUser.format ?? { type: "text" as const } if (format.type === "json_schema") system.push(STRUCTURED_OUTPUT_SYSTEM_PROMPT) - const result = yield* handle.process({ + const cfg = yield* config.get() + const wasOnFallback = (() => { + const lastCompleted = msgs.findLast((m) => m.info.role === "assistant" && m.info.time?.completed) + if (!lastCompleted || lastCompleted.info.role !== "assistant") return false + return lastCompleted.info.providerID !== model.providerID || lastCompleted.info.modelID !== model.id + })() + const streamInput: LLM.StreamInput = { user: lastUser, agent, permission: session.permission, @@ -1452,7 +1458,15 @@ export const layer = Layer.effect( tools, model, toolChoice: format.type === "json_schema" ? "required" : undefined, - }) + fallbacks: (agent.fallbacks?.length ? agent.fallbacks : undefined) ?? (cfg.fallbacks ? cfg.fallbacks.map((f: string) => Provider.parseModel(f)) : undefined), + wasOnFallback, + } + const result = yield* handle.process(streamInput) + + if (streamInput.usedFallback) { + handle.message.modelID = ModelID.make(streamInput.usedFallback.modelID) + handle.message.providerID = ProviderID.make(streamInput.usedFallback.providerID) + } if (structured !== undefined) { handle.message.structured = structured diff --git a/packages/opencode/src/session/retry.ts b/packages/opencode/src/session/retry.ts index 463bc27a95db..6fcf86b0cf14 100644 --- a/packages/opencode/src/session/retry.ts +++ b/packages/opencode/src/session/retry.ts @@ -12,6 +12,7 @@ export type RetryReason = "free_tier_limit" | "account_rate_limit" | (string & { export type Retryable = { message: string + quotaLimit?: boolean action?: { reason: RetryReason provider: string @@ -118,7 +119,15 @@ export function retryable(error: Err, provider: string) { }, } } - return { message: error.data.message.includes("Overloaded") ? "Provider is overloaded" : error.data.message } + const lower = error.data.message.toLowerCase() + const isQuotaLimit = + lower.includes("weekly") || + lower.includes("monthly") || + lower.includes("exceeded your") + return { + message: error.data.message.includes("Overloaded") ? "Provider is overloaded" : error.data.message, + ...(isQuotaLimit ? { quotaLimit: true } : {}), + } } // Check for rate limit patterns in plain text error messages diff --git a/packages/opencode/test/session/fallback.test.ts b/packages/opencode/test/session/fallback.test.ts new file mode 100644 index 000000000000..e7752dfaa3ee --- /dev/null +++ b/packages/opencode/test/session/fallback.test.ts @@ -0,0 +1,200 @@ +import { describe, expect, test } from "bun:test" +import { CooldownManager, pickStart, FALLBACK_NOTICE_ID, FALLBACK_RESUME_ID, FALLBACK_USING_ID } from "../../src/session/fallback" + +describe("CooldownManager", () => { + test("isCooledDown returns false when no cooldown has been set", () => { + const manager = new CooldownManager() + expect(manager.isCooledDown("ollama", "glm-5.1")).toBe(false) + }) + + test("isCooledDown returns true after put and false after expiry", async () => { + const manager = new CooldownManager() + manager.put("ollama", "glm-5.1", 100) + expect(manager.isCooledDown("ollama", "glm-5.1")).toBe(true) + await new Promise((resolve) => setTimeout(resolve, 150)) + expect(manager.isCooledDown("ollama", "glm-5.1")).toBe(false) + }) + + test("clear removes a cooldown entry", () => { + const manager = new CooldownManager() + manager.put("ollama", "glm-5.1", 60000) + expect(manager.isCooledDown("ollama", "glm-5.1")).toBe(true) + manager.clear("ollama", "glm-5.1") + expect(manager.isCooledDown("ollama", "glm-5.1")).toBe(false) + }) + + test("put updates an existing cooldown with a new expiry", async () => { + const manager = new CooldownManager() + manager.put("ollama", "glm-5.1", 60000) + manager.put("ollama", "glm-5.1", 100) + expect(manager.isCooledDown("ollama", "glm-5.1")).toBe(true) + await new Promise((resolve) => setTimeout(resolve, 150)) + expect(manager.isCooledDown("ollama", "glm-5.1")).toBe(false) + }) + + test("isCooledDown returns false for unknown provider/model", () => { + const manager = new CooldownManager() + manager.put("ollama", "glm-5.1", 60000) + expect(manager.isCooledDown("opencode", "unknown")).toBe(false) + }) +}) + +describe("fallback config validation", () => { + test("agent config accepts fallbacks array", () => { + const { ConfigParse } = require("../../src/config/parse") + const { Info: AgentInfo } = require("../../src/config/agent") + const parsed = ConfigParse.effectSchema(AgentInfo, { + model: "ollama/glm-5.1", + fallbacks: ["opencode/glm-5.1", "deepseek/deepseek-v4"], + }, "test") + expect(parsed.fallbacks).toEqual(["opencode/glm-5.1", "deepseek/deepseek-v4"]) + }) + + test("agent config works without fallbacks", () => { + const { ConfigParse } = require("../../src/config/parse") + const { Info: AgentInfo } = require("../../src/config/agent") + const parsed = ConfigParse.effectSchema(AgentInfo, { + model: "ollama/glm-5.1", + }, "test") + expect(parsed.fallbacks).toBeUndefined() + }) + + test("top-level config accepts fallbacks and cooldown_seconds", () => { + const { ConfigParse } = require("../../src/config/parse") + const { Info: ConfigInfo } = require("../../src/config/config") + const parsed = ConfigParse.effectSchema(ConfigInfo, { + model: "ollama/glm-5.1", + fallbacks: ["opencode/glm-5.1"], + cooldown_seconds: 120, + }, "test") + expect(parsed.fallbacks).toEqual(["opencode/glm-5.1"]) + expect(parsed.cooldown_seconds).toBe(120) + }) + + test("top-level config works without fallback fields", () => { + const { ConfigParse } = require("../../src/config/parse") + const { Info: ConfigInfo } = require("../../src/config/config") + const parsed = ConfigParse.effectSchema(ConfigInfo, { + model: "ollama/glm-5.1", + }, "test") + expect(parsed.fallbacks).toBeUndefined() + }) +}) + +describe("CooldownManager.remaining", () => { + test("returns undefined when no cooldown has been set", () => { + const manager = new CooldownManager() + expect(manager.remaining("ollama", "glm-5.1")).toBeUndefined() + }) + + test("returns remaining ms when cooldown is active", () => { + const manager = new CooldownManager() + manager.put("ollama", "glm-5.1", 60000) + const remaining = manager.remaining("ollama", "glm-5.1") + expect(remaining).toBeGreaterThan(59000) + expect(remaining).toBeLessThanOrEqual(60000) + }) + + test("returns undefined after cooldown expires and cleans up", async () => { + const manager = new CooldownManager() + manager.put("ollama", "glm-5.1", 50) + await new Promise((resolve) => setTimeout(resolve, 100)) + expect(manager.remaining("ollama", "glm-5.1")).toBeUndefined() + expect(manager.isCooledDown("ollama", "glm-5.1")).toBe(false) + }) +}) + +describe("pickStart", () => { + test("returns primary when primary is not on cooldown", () => { + const cm = new CooldownManager() + const result = pickStart( + { providerID: "anthropic", modelID: "claude-opus-4" }, + [{ providerID: "ollama", modelID: "glm-5.1" }], + cm, + ) + expect(result).toEqual({ kind: "primary" }) + }) + + test("returns fallback when primary is on cooldown", () => { + const cm = new CooldownManager() + cm.put("anthropic", "claude-opus-4", 60000) + const result = pickStart( + { providerID: "anthropic", modelID: "claude-opus-4" }, + [{ providerID: "ollama", modelID: "glm-5.1" }], + cm, + ) + expect(result).toEqual({ kind: "fallback", index: 0 }) + }) + + test("skips cooled-down fallback to find available one", () => { + const cm = new CooldownManager() + cm.put("anthropic", "claude-opus-4", 60000) + cm.put("ollama", "glm-5.1", 60000) + const result = pickStart( + { providerID: "anthropic", modelID: "claude-opus-4" }, + [ + { providerID: "ollama", modelID: "glm-5.1" }, + { providerID: "deepseek", modelID: "deepseek-v4" }, + ], + cm, + ) + expect(result).toEqual({ kind: "fallback", index: 1 }) + }) + + test("returns soonest when all models are on cooldown", () => { + const cm = new CooldownManager() + cm.put("anthropic", "claude-opus-4", 3600000) + cm.put("ollama", "glm-5.1", 60000) + const result = pickStart( + { providerID: "anthropic", modelID: "claude-opus-4" }, + [{ providerID: "ollama", modelID: "glm-5.1" }], + cm, + ) + expect(result).toEqual({ kind: "soonest", index: 0 }) + }) + + test("returns primary as soonest when primary expires before fallbacks", () => { + const cm = new CooldownManager() + cm.put("anthropic", "claude-opus-4", 5000) + cm.put("ollama", "glm-5.1", 3600000) + const result = pickStart( + { providerID: "anthropic", modelID: "claude-opus-4" }, + [{ providerID: "ollama", modelID: "glm-5.1" }], + cm, + ) + expect(result).toEqual({ kind: "soonest", index: -1 }) + }) + + test("returns primary when no fallbacks configured and primary not on cooldown", () => { + const cm = new CooldownManager() + const result = pickStart( + { providerID: "anthropic", modelID: "claude-opus-4" }, + [], + cm, + ) + expect(result).toEqual({ kind: "primary" }) + }) + + test("returns soonest with index -1 when no fallbacks and primary on cooldown", () => { + const cm = new CooldownManager() + cm.put("anthropic", "claude-opus-4", 60000) + const result = pickStart( + { providerID: "anthropic", modelID: "claude-opus-4" }, + [], + cm, + ) + expect(result).toEqual({ kind: "soonest", index: -1 }) + }) +}) + +describe("fallback event IDs", () => { + test("FALLBACK_NOTICE_ID is fallback-notice", () => { + expect(FALLBACK_NOTICE_ID).toBe("fallback-notice") + }) + test("FALLBACK_RESUME_ID is fallback-resume", () => { + expect(FALLBACK_RESUME_ID).toBe("fallback-resume") + }) + test("FALLBACK_USING_ID is fallback-using", () => { + expect(FALLBACK_USING_ID).toBe("fallback-using") + }) +}) diff --git a/packages/opencode/test/session/llm-native-recorded.test.ts b/packages/opencode/test/session/llm-native-recorded.test.ts index 19d8f6f42ce1..e0f456313d52 100644 --- a/packages/opencode/test/session/llm-native-recorded.test.ts +++ b/packages/opencode/test/session/llm-native-recorded.test.ts @@ -9,6 +9,7 @@ import { FetchHttpClient } from "effect/unstable/http" import path from "node:path" import z from "zod" import { Auth } from "@/auth" +import { Bus } from "@/bus" import { Config } from "@/config/config" import { Plugin } from "@/plugin" import { Provider } from "@/provider/provider" @@ -298,6 +299,7 @@ function recordedNativeLLMLayer(scenario: RecordedScenario) { provider, LLM.layer.pipe( Layer.provide(auth), + Layer.provide(Bus.layer), Layer.provide(Config.defaultLayer), Layer.provide(provider), Layer.provide(Plugin.defaultLayer), diff --git a/packages/opencode/test/session/llm-native.test.ts b/packages/opencode/test/session/llm-native.test.ts index 076d4c9f789a..814f0ad79481 100644 --- a/packages/opencode/test/session/llm-native.test.ts +++ b/packages/opencode/test/session/llm-native.test.ts @@ -290,42 +290,6 @@ describe("session.llm-native.request", () => { ]) }) - test("maps stored provider metadata to native content metadata", () => { - const reasoning = Object.assign( - { type: "reasoning" as const, text: "thinking" }, - { - providerMetadata: { - openai: { - itemId: "rs_1", - reasoningEncryptedContent: "encrypted-state", - }, - }, - }, - ) - const request = LLMNative.request({ - model: baseModel, - messages: [ - { - role: "assistant", - content: [reasoning], - }, - ], - }) - - expect(request.messages).toMatchObject([ - { - role: "assistant", - content: [ - { - type: "reasoning", - text: "thinking", - providerMetadata: { openai: { itemId: "rs_1", reasoningEncryptedContent: "encrypted-state" } }, - }, - ], - }, - ]) - }) - test("selects native request routes for provider packages", () => { const openai = LLMNative.model({ model: { ...baseModel, api: { ...baseModel.api, url: "", npm: "@ai-sdk/openai" } }, diff --git a/packages/opencode/test/session/llm.test.ts b/packages/opencode/test/session/llm.test.ts index cd381ecd014e..3d7372679845 100644 --- a/packages/opencode/test/session/llm.test.ts +++ b/packages/opencode/test/session/llm.test.ts @@ -8,6 +8,7 @@ import z from "zod" import { LLM } from "../../src/session/llm" import { LLMClient, RequestExecutor, WebSocketExecutor } from "@opencode-ai/llm/route" import { Auth } from "@/auth" +import { Bus } from "@/bus" import { Config } from "@/config/config" import { Provider } from "@/provider/provider" import { ProviderTransform } from "@/provider/transform" @@ -73,6 +74,7 @@ const drainWith = (layer: Layer.Layer, input: LLM.StreamInput) => function llmLayerWithExecutor(executor: Layer.Layer, flags: Partial = {}) { return LLM.layer.pipe( Layer.provide(Auth.defaultLayer), + Layer.provide(Bus.layer), Layer.provide(Config.defaultLayer), Layer.provide(Provider.defaultLayer), Layer.provide(Plugin.defaultLayer), @@ -1075,6 +1077,7 @@ describe("session.llm.stream", () => { yield* drainWith( LLM.layer.pipe( Layer.provide(Auth.defaultLayer), + Layer.provide(Bus.layer), Layer.provide(Config.defaultLayer), Layer.provide(Provider.defaultLayer), Layer.provide(Plugin.defaultLayer), diff --git a/packages/opencode/test/session/processor-effect.test.ts b/packages/opencode/test/session/processor-effect.test.ts index ede122297a17..8fe209217172 100644 --- a/packages/opencode/test/session/processor-effect.test.ts +++ b/packages/opencode/test/session/processor-effect.test.ts @@ -26,6 +26,7 @@ import { CrossSpawnSpawner } from "@opencode-ai/core/cross-spawn-spawner" import { provideTmpdirServer } from "../fixture/fixture" import { testEffect } from "../lib/effect" import { raw, reply, TestLLMServer } from "../lib/llm-server" +import { FallbackTriggered, FallbackUsed } from "../../src/session/fallback" import { SyncEvent } from "@/sync" import { RuntimeFlags } from "@/effect/runtime-flags" import { EventV2Bridge } from "@/event-v2-bridge" @@ -66,6 +67,18 @@ const cfg = { cost: { input: 0, output: 0 }, options: {}, }, + "fallback-model": { + id: "fallback-model", + name: "Fallback Model", + attachment: false, + reasoning: false, + temperature: false, + tool_call: true, + release_date: "2025-01-01", + limit: { context: 100000, output: 10000 }, + cost: { input: 0, output: 0 }, + options: {}, + }, }, options: { apiKey: "test-key", @@ -920,3 +933,97 @@ it.live("session.processor effect tests mark interruptions aborted without manua { config: (url) => providerCfg(url) }, ), ) + +it.live("session.processor stream error with fallbacks triggers fallback", () => + provideTmpdirServer( + ({ dir, llm }) => + Effect.gen(function* () { + const { processors, session, provider } = yield* boot() + const bus = yield* Bus.Service + + // Primary model: stream error mid-response (invalid chunk causes SDK parse error) + yield* llm.push(raw({ head: [{ role: "assistant" }, 42] })) + // Fallback model: clean response + yield* llm.text("fallback response") + + const chat = yield* session.create({}) + const parent = yield* user(chat.id, "fallback stream error") + const msg = yield* assistant(chat.id, parent.id, path.resolve(dir)) + const mdl = yield* provider.getModel(ref.providerID, ref.modelID) + + const triggered: string[] = [] + const used: string[] = [] + const off1 = yield* bus.subscribeCallback(FallbackTriggered, (evt) => { + triggered.push(evt.properties.modelID) + }) + const off2 = yield* bus.subscribeCallback(FallbackUsed, (evt) => { + used.push(evt.properties.modelID) + }) + + const handle = yield* processors.create({ + assistantMessage: msg, + sessionID: chat.id, + model: mdl, + }) + + const value = yield* handle.process({ + user: { + id: parent.id, + sessionID: chat.id, + role: "user", + time: parent.time, + agent: parent.agent, + model: { providerID: ref.providerID, modelID: ref.modelID }, + } satisfies MessageV2.User, + sessionID: chat.id, + model: mdl, + agent: agent(), + system: [], + messages: [{ role: "user", content: "fallback stream error" }], + tools: {}, + fallbacks: [{ providerID: ref.providerID, modelID: "fallback-model" as ModelID }], + }) + + off1() + off2() + + // First call failed (stream error), fallback succeeded + expect(value).toBe("continue") + expect(yield* llm.calls).toBe(2) + expect(triggered.length).toBe(1) + expect(triggered[0]).toBe("test-model") + expect(used.length).toBe(1) + expect(used[0]).toBe("fallback-model") + expect(handle.message.error).toBeUndefined() + }), + { git: true, config: (url) => providerCfg(url) }, + ), +) + +it.live("session.processor stream error without fallbacks halts with error", () => + provideTmpdirServer( + ({ dir, llm }) => + Effect.gen(function* () { + const { processors, session, provider } = yield* boot() + yield* llm.push(raw({ head: [{ role: "assistant" }, 42] })) + const chat = yield* session.create({}) + const parent = yield* user(chat.id, "no-fallback stream error") + const msg = yield* assistant(chat.id, parent.id, path.resolve(dir)) + const mdl = yield* provider.getModel(ref.providerID, ref.modelID) + const handle = yield* processors.create({ assistantMessage: msg, sessionID: chat.id, model: mdl }) + const value = yield* handle.process({ + user: { id: parent.id, sessionID: chat.id, role: "user", time: parent.time, agent: parent.agent, model: { providerID: ref.providerID, modelID: ref.modelID } } satisfies MessageV2.User, + sessionID: chat.id, + model: mdl, + agent: agent(), + system: [], + messages: [{ role: "user", content: "no-fallback stream error" }], + tools: {}, + }) + expect(value).toBe("stop") + expect(yield* llm.calls).toBe(1) + expect(handle.message.error).toBeDefined() + }), + { git: true, config: (url) => providerCfg(url) }, + ), +) diff --git a/packages/sdk/js/src/v2/gen/types.gen.ts b/packages/sdk/js/src/v2/gen/types.gen.ts index ebcb4271c431..299c7522a6d1 100644 --- a/packages/sdk/js/src/v2/gen/types.gen.ts +++ b/packages/sdk/js/src/v2/gen/types.gen.ts @@ -27,11 +27,13 @@ export type Event = | EventTodoUpdated | EventSessionStatus | EventSessionIdle + | EventLlmFallbackTriggered + | EventLlmFallbackUsed + | EventSessionCompacted | EventMcpToolsChanged | EventMcpBrowserOpenFailed | EventCommandExecuted | EventProjectUpdated - | EventSessionCompacted | EventVcsBranchUpdated | EventWorkspaceReady | EventWorkspaceFailed @@ -479,6 +481,7 @@ export type TextPart = { text: string synthetic?: boolean ignored?: boolean + fallbackNotice?: "using" | "switch" | "resume" time?: { start: number end?: number @@ -828,11 +831,13 @@ export type GlobalEvent = { | EventTodoUpdated | EventSessionStatus | EventSessionIdle + | EventLlmFallbackTriggered + | EventLlmFallbackUsed + | EventSessionCompacted | EventMcpToolsChanged | EventMcpBrowserOpenFailed | EventCommandExecuted | EventProjectUpdated - | EventSessionCompacted | EventVcsBranchUpdated | EventWorkspaceReady | EventWorkspaceFailed @@ -1008,6 +1013,7 @@ export type AgentConfig = { steps?: number maxSteps?: number permission?: PermissionConfig + fallbacks?: Array [key: string]: | unknown | string @@ -1032,6 +1038,7 @@ export type AgentConfig = { | "info" | number | PermissionConfig + | Array | undefined } @@ -1221,6 +1228,11 @@ export type Config = { disabled_providers?: Array enabled_providers?: Array model?: string + fallbacks?: Array + /** + * Duration in seconds to put a provider/model in cooldown after a retryable error (default: 300) + */ + cooldown_seconds?: number | "NaN" | "Infinity" | "-Infinity" | "Infinity" | "-Infinity" | "NaN" small_model?: string default_agent?: string username?: string @@ -1645,6 +1657,10 @@ export type Agent = { providerID: string } variant?: string + fallbacks?: Array<{ + providerID: string + modelID: string + }> prompt?: string options: { [key: string]: unknown @@ -2658,6 +2674,35 @@ export type EventSessionIdle = { } } +export type EventLlmFallbackTriggered = { + id: string + type: "llm.fallback.triggered" + properties: { + sessionID: string + modelID: string + providerID: string + reason: string + } +} + +export type EventLlmFallbackUsed = { + id: string + type: "llm.fallback.used" + properties: { + sessionID: string + modelID: string + providerID: string + } +} + +export type EventSessionCompacted = { + id: string + type: "session.compacted" + properties: { + sessionID: string + } +} + export type EventMcpToolsChanged = { id: string type: "mcp.tools.changed" @@ -2692,14 +2737,6 @@ export type EventProjectUpdated = { properties: Project } -export type EventSessionCompacted = { - id: string - type: "session.compacted" - properties: { - sessionID: string - } -} - export type EventVcsBranchUpdated = { id: string type: "vcs.branch.updated" @@ -3245,104 +3282,6 @@ export type EventSessionNextCompactionEnded = { } } -export type ModelV2Info = { - id: string - apiID: string - providerID: string - family?: string - name: string - endpoint: - | { - type: "unknown" - } - | { - type: "openai/responses" - url: string - websocket?: boolean - } - | { - type: "openai/completions" - url: string - reasoning?: - | { - type: "reasoning_content" - } - | { - type: "reasoning_details" - } - } - | { - type: "anthropic/messages" - url: string - } - | { - type: "aisdk" - package: string - url?: string - } - capabilities: { - tools: boolean - input: Array - output: Array - } - options: { - headers: { - [key: string]: string - } - body: { - [key: string]: unknown - } - aisdk: { - provider: { - [key: string]: unknown - } - request: { - [key: string]: unknown - } - } - variant?: string - } - variants: Array<{ - id: string - headers: { - [key: string]: string - } - body: { - [key: string]: unknown - } - aisdk: { - provider: { - [key: string]: unknown - } - request: { - [key: string]: unknown - } - } - }> - time: { - released: number | "NaN" | "Infinity" | "-Infinity" | "Infinity" | "-Infinity" | "NaN" - } - cost: Array<{ - tier?: { - type: "context" - size: number - } - input: number - output: number - cache: { - read: number - write: number - } - }> - status: "alpha" | "beta" | "deprecated" | "active" - enabled: boolean - limit: { - context: number - input?: number - output: number - } -} - export type EventCatalogModelUpdated = { id: string type: "catalog.model.updated" @@ -3515,6 +3454,9 @@ export type SessionMessageShell = { export type SessionMessageAssistantText = { type: "text" text: string + ignored?: boolean + synthetic?: boolean + fallbackNotice?: "using" | "switch" | "resume" } export type SessionMessageAssistantReasoning = { @@ -3644,26 +3586,12 @@ export type SessionMessage = | SessionMessageAssistant | SessionMessageCompaction -export type ProviderV2Info = { +export type ModelV2Info = { id: string + apiID: string + providerID: string + family?: string name: string - enabled: - | false - | { - via: "env" - name: string - } - | { - via: "account" - service: string - } - | { - via: "custom" - data: { - [key: string]: unknown - } - } - env: Array endpoint: | { type: "unknown" @@ -3693,6 +3621,11 @@ export type ProviderV2Info = { package: string url?: string } + capabilities: { + tools: boolean + input: Array + output: Array + } options: { headers: { [key: string]: string @@ -3708,26 +3641,69 @@ export type ProviderV2Info = { [key: string]: unknown } } + variant?: string } -} - -export type EventTuiToastShow1 = { - id: string - type: "tui.toast.show" - properties: { - title?: string - message: string - variant: "info" | "success" | "warning" | "error" - duration?: number + variants: Array<{ + id: string + headers: { + [key: string]: string + } + body: { + [key: string]: unknown + } + aisdk: { + provider: { + [key: string]: unknown + } + request: { + [key: string]: unknown + } + } + }> + time: { + released: number | "NaN" | "Infinity" | "-Infinity" | "Infinity" | "-Infinity" | "NaN" + } + cost: Array<{ + tier?: { + type: "context" + size: number + } + input: number + output: number + cache: { + read: number + write: number + } + }> + status: "alpha" | "beta" | "deprecated" | "active" + enabled: boolean + limit: { + context: number + input?: number + output: number } } -export type ModelV2Info1 = { +export type ProviderV2Info = { id: string - apiID: string - providerID: string - family?: string name: string + enabled: + | false + | { + via: "env" + name: string + } + | { + via: "account" + service: string + } + | { + via: "custom" + data: { + [key: string]: unknown + } + } + env: Array endpoint: | { type: "unknown" @@ -3757,11 +3733,6 @@ export type ModelV2Info1 = { package: string url?: string } - capabilities: { - tools: boolean - input: Array - output: Array - } options: { headers: { [key: string]: string @@ -3777,46 +3748,17 @@ export type ModelV2Info1 = { [key: string]: unknown } } - variant?: string - } - variants: Array<{ - id: string - headers: { - [key: string]: string - } - body: { - [key: string]: unknown - } - aisdk: { - provider: { - [key: string]: unknown - } - request: { - [key: string]: unknown - } - } - }> - time: { - released: number | "NaN" | "Infinity" | "-Infinity" } - cost: Array<{ - tier?: { - type: "context" - size: number - } - input: number - output: number - cache: { - read: number - write: number - } - }> - status: "alpha" | "beta" | "deprecated" | "active" - enabled: boolean - limit: { - context: number - input?: number - output: number +} + +export type EventTuiToastShow1 = { + id: string + type: "tui.toast.show" + properties: { + title?: string + message: string + variant: "info" | "success" | "warning" | "error" + duration?: number } } @@ -7461,7 +7403,7 @@ export type V2ModelListData = { body?: never path?: never query?: { - location?: { + instance?: { directory?: string workspace?: string } @@ -7499,7 +7441,7 @@ export type V2ProviderListData = { body?: never path?: never query?: { - location?: { + instance?: { directory?: string workspace?: string } @@ -7539,7 +7481,7 @@ export type V2ProviderGetData = { providerID: string } query?: { - location?: { + instance?: { directory?: string workspace?: string }