import { type ActionFunctionArgs } from '@remix-run/cloudflare'; import { streamText } from '~/lib/.server/llm/stream-text'; import type { IProviderSetting, ProviderInfo } from '~/types/model'; import { generateText } from 'ai'; import { PROVIDER_LIST } from '~/utils/constants'; import { MAX_TOKENS, PROVIDER_COMPLETION_LIMITS, isReasoningModel } from '~/lib/.server/llm/constants'; import { LLMManager } from '~/lib/modules/llm/manager'; import type { ModelInfo } from '~/lib/modules/llm/types'; import { getApiKeysFromCookie, getProviderSettingsFromCookie } from '~/lib/api/cookies'; import { createScopedLogger } from '~/utils/logger'; export async function action(args: ActionFunctionArgs) { return llmCallAction(args); } async function getModelList(options: { apiKeys?: Record; providerSettings?: Record; serverEnv?: Record; }) { const llmManager = LLMManager.getInstance(import.meta.env); return llmManager.updateModelList(options); } const logger = createScopedLogger('api.llmcall'); function getCompletionTokenLimit(modelDetails: ModelInfo): number { // 1. If model specifies completion tokens, use that if (modelDetails.maxCompletionTokens && modelDetails.maxCompletionTokens > 0) { return modelDetails.maxCompletionTokens; } // 2. Use provider-specific default const providerDefault = PROVIDER_COMPLETION_LIMITS[modelDetails.provider]; if (providerDefault) { return providerDefault; } // 3. Final fallback to MAX_TOKENS, but cap at reasonable limit for safety return Math.min(MAX_TOKENS, 16384); } async function llmCallAction({ context, request }: ActionFunctionArgs) { const { system, message, model, provider, streamOutput } = await request.json<{ system: string; message: string; model: string; provider: ProviderInfo; streamOutput?: boolean; }>(); const { name: providerName } = provider; // validate 'model' and 'provider' fields if (!model || typeof model !== 'string') { throw new Response('Invalid or missing model', { status: 400, statusText: 'Bad Request', }); } if (!providerName || typeof providerName !== 'string') { throw new Response('Invalid or missing provider', { status: 400, statusText: 'Bad Request', }); } const cookieHeader = request.headers.get('Cookie'); const apiKeys = getApiKeysFromCookie(cookieHeader); const providerSettings = getProviderSettingsFromCookie(cookieHeader); if (streamOutput) { try { const result = await streamText({ options: { system, }, messages: [ { role: 'user', content: `${message}`, }, ], env: context.cloudflare?.env as any, apiKeys, providerSettings, }); return new Response(result.textStream, { status: 200, headers: { 'Content-Type': 'text/plain; charset=utf-8', }, }); } catch (error: unknown) { console.log(error); if (error instanceof Error && error.message?.includes('API key')) { throw new Response('Invalid or missing API key', { status: 401, statusText: 'Unauthorized', }); } throw new Response(null, { status: 500, statusText: 'Internal Server Error', }); } } else { try { const models = await getModelList({ apiKeys, providerSettings, serverEnv: context.cloudflare?.env as any }); const modelDetails = models.find((m: ModelInfo) => m.name === model); if (!modelDetails) { throw new Error('Model not found'); } const dynamicMaxTokens = modelDetails ? getCompletionTokenLimit(modelDetails) : Math.min(MAX_TOKENS, 16384); const providerInfo = PROVIDER_LIST.find((p) => p.name === provider.name); if (!providerInfo) { throw new Error('Provider not found'); } logger.info(`Generating response Provider: ${provider.name}, Model: ${modelDetails.name}`); // DEBUG: Log reasoning model detection const isReasoning = isReasoningModel(modelDetails.name); logger.info(`DEBUG: Model "${modelDetails.name}" detected as reasoning model: ${isReasoning}`); // Use maxCompletionTokens for reasoning models (o1, GPT-5), maxTokens for traditional models const tokenParams = isReasoning ? { maxCompletionTokens: dynamicMaxTokens } : { maxTokens: dynamicMaxTokens }; // Filter out unsupported parameters for reasoning models const baseParams = { system, messages: [ { role: 'user' as const, content: `${message}`, }, ], model: providerInfo.getModelInstance({ model: modelDetails.name, serverEnv: context.cloudflare?.env as any, apiKeys, providerSettings, }), ...tokenParams, toolChoice: 'none' as const, }; // For reasoning models, set temperature to 1 (required by OpenAI API) const finalParams = isReasoning ? { ...baseParams, temperature: 1 } // Set to 1 for reasoning models (only supported value) : { ...baseParams, temperature: 0 }; // DEBUG: Log final parameters logger.info( `DEBUG: Final params for model "${modelDetails.name}":`, JSON.stringify( { isReasoning, hasTemperature: 'temperature' in finalParams, hasMaxTokens: 'maxTokens' in finalParams, hasMaxCompletionTokens: 'maxCompletionTokens' in finalParams, paramKeys: Object.keys(finalParams).filter((key) => !['model', 'messages', 'system'].includes(key)), tokenParams, finalParams: Object.fromEntries( Object.entries(finalParams).filter(([key]) => !['model', 'messages', 'system'].includes(key)), ), }, null, 2, ), ); const result = await generateText(finalParams); logger.info(`Generated response`); return new Response(JSON.stringify(result), { status: 200, headers: { 'Content-Type': 'application/json', }, }); } catch (error: unknown) { console.log(error); const errorResponse = { error: true, message: error instanceof Error ? error.message : 'An unexpected error occurred', statusCode: (error as any).statusCode || 500, isRetryable: (error as any).isRetryable !== false, provider: (error as any).provider || 'unknown', }; if (error instanceof Error && error.message?.includes('API key')) { return new Response( JSON.stringify({ ...errorResponse, message: 'Invalid or missing API key', statusCode: 401, isRetryable: false, }), { status: 401, headers: { 'Content-Type': 'application/json' }, statusText: 'Unauthorized', }, ); } return new Response(JSON.stringify(errorResponse), { status: errorResponse.statusCode, headers: { 'Content-Type': 'application/json' }, statusText: 'Error', }); } } }