Update LLM providers and constants (#1937)
- Updated constants in app/lib/.server/llm/constants.ts - Modified stream-text functionality in app/lib/.server/llm/stream-text.ts - Updated Anthropic provider in app/lib/modules/llm/providers/anthropic.ts - Modified GitHub provider in app/lib/modules/llm/providers/github.ts - Updated Google provider in app/lib/modules/llm/providers/google.ts - Modified OpenAI provider in app/lib/modules/llm/providers/openai.ts - Updated LLM types in app/lib/modules/llm/types.ts - Modified API route in app/routes/api.llmcall.ts
This commit is contained in:
@@ -3,7 +3,7 @@ import { streamText } from '~/lib/.server/llm/stream-text';
|
||||
import type { IProviderSetting, ProviderInfo } from '~/types/model';
|
||||
import { generateText } from 'ai';
|
||||
import { PROVIDER_LIST } from '~/utils/constants';
|
||||
import { MAX_TOKENS } from '~/lib/.server/llm/constants';
|
||||
import { MAX_TOKENS, PROVIDER_COMPLETION_LIMITS, isReasoningModel } from '~/lib/.server/llm/constants';
|
||||
import { LLMManager } from '~/lib/modules/llm/manager';
|
||||
import type { ModelInfo } from '~/lib/modules/llm/types';
|
||||
import { getApiKeysFromCookie, getProviderSettingsFromCookie } from '~/lib/api/cookies';
|
||||
@@ -24,6 +24,23 @@ async function getModelList(options: {
|
||||
|
||||
const logger = createScopedLogger('api.llmcall');
|
||||
|
||||
function getCompletionTokenLimit(modelDetails: ModelInfo): number {
|
||||
// 1. If model specifies completion tokens, use that
|
||||
if (modelDetails.maxCompletionTokens && modelDetails.maxCompletionTokens > 0) {
|
||||
return modelDetails.maxCompletionTokens;
|
||||
}
|
||||
|
||||
// 2. Use provider-specific default
|
||||
const providerDefault = PROVIDER_COMPLETION_LIMITS[modelDetails.provider];
|
||||
|
||||
if (providerDefault) {
|
||||
return providerDefault;
|
||||
}
|
||||
|
||||
// 3. Final fallback to MAX_TOKENS, but cap at reasonable limit for safety
|
||||
return Math.min(MAX_TOKENS, 16384);
|
||||
}
|
||||
|
||||
async function llmCallAction({ context, request }: ActionFunctionArgs) {
|
||||
const { system, message, model, provider, streamOutput } = await request.json<{
|
||||
system: string;
|
||||
@@ -101,7 +118,7 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {
|
||||
throw new Error('Model not found');
|
||||
}
|
||||
|
||||
const dynamicMaxTokens = modelDetails && modelDetails.maxTokenAllowed ? modelDetails.maxTokenAllowed : MAX_TOKENS;
|
||||
const dynamicMaxTokens = modelDetails ? getCompletionTokenLimit(modelDetails) : Math.min(MAX_TOKENS, 16384);
|
||||
|
||||
const providerInfo = PROVIDER_LIST.find((p) => p.name === provider.name);
|
||||
|
||||
@@ -111,11 +128,19 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {
|
||||
|
||||
logger.info(`Generating response Provider: ${provider.name}, Model: ${modelDetails.name}`);
|
||||
|
||||
const result = await generateText({
|
||||
// DEBUG: Log reasoning model detection
|
||||
const isReasoning = isReasoningModel(modelDetails.name);
|
||||
logger.info(`DEBUG: Model "${modelDetails.name}" detected as reasoning model: ${isReasoning}`);
|
||||
|
||||
// Use maxCompletionTokens for reasoning models (o1, GPT-5), maxTokens for traditional models
|
||||
const tokenParams = isReasoning ? { maxCompletionTokens: dynamicMaxTokens } : { maxTokens: dynamicMaxTokens };
|
||||
|
||||
// Filter out unsupported parameters for reasoning models
|
||||
const baseParams = {
|
||||
system,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
role: 'user' as const,
|
||||
content: `${message}`,
|
||||
},
|
||||
],
|
||||
@@ -125,9 +150,36 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {
|
||||
apiKeys,
|
||||
providerSettings,
|
||||
}),
|
||||
maxTokens: dynamicMaxTokens,
|
||||
toolChoice: 'none',
|
||||
});
|
||||
...tokenParams,
|
||||
toolChoice: 'none' as const,
|
||||
};
|
||||
|
||||
// For reasoning models, set temperature to 1 (required by OpenAI API)
|
||||
const finalParams = isReasoning
|
||||
? { ...baseParams, temperature: 1 } // Set to 1 for reasoning models (only supported value)
|
||||
: { ...baseParams, temperature: 0 };
|
||||
|
||||
// DEBUG: Log final parameters
|
||||
logger.info(
|
||||
`DEBUG: Final params for model "${modelDetails.name}":`,
|
||||
JSON.stringify(
|
||||
{
|
||||
isReasoning,
|
||||
hasTemperature: 'temperature' in finalParams,
|
||||
hasMaxTokens: 'maxTokens' in finalParams,
|
||||
hasMaxCompletionTokens: 'maxCompletionTokens' in finalParams,
|
||||
paramKeys: Object.keys(finalParams).filter((key) => !['model', 'messages', 'system'].includes(key)),
|
||||
tokenParams,
|
||||
finalParams: Object.fromEntries(
|
||||
Object.entries(finalParams).filter(([key]) => !['model', 'messages', 'system'].includes(key)),
|
||||
),
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
|
||||
const result = await generateText(finalParams);
|
||||
logger.info(`Generated response`);
|
||||
|
||||
return new Response(JSON.stringify(result), {
|
||||
|
||||
Reference in New Issue
Block a user