Update LLM providers and constants (#1937)
- Updated constants in app/lib/.server/llm/constants.ts - Modified stream-text functionality in app/lib/.server/llm/stream-text.ts - Updated Anthropic provider in app/lib/modules/llm/providers/anthropic.ts - Modified GitHub provider in app/lib/modules/llm/providers/github.ts - Updated Google provider in app/lib/modules/llm/providers/google.ts - Modified OpenAI provider in app/lib/modules/llm/providers/openai.ts - Updated LLM types in app/lib/modules/llm/types.ts - Modified API route in app/routes/api.llmcall.ts
This commit is contained in:
@@ -4,6 +4,44 @@
|
||||
*/
|
||||
export const MAX_TOKENS = 32000;
|
||||
|
||||
/*
|
||||
* Provider-specific default completion token limits
|
||||
* Used as fallbacks when model doesn't specify maxCompletionTokens
|
||||
*/
|
||||
export const PROVIDER_COMPLETION_LIMITS: Record<string, number> = {
|
||||
OpenAI: 16384,
|
||||
Github: 16384, // GitHub Models use OpenAI-compatible limits
|
||||
Anthropic: 128000,
|
||||
Google: 32768,
|
||||
Cohere: 4000,
|
||||
DeepSeek: 8192,
|
||||
Groq: 8192,
|
||||
HuggingFace: 4096,
|
||||
Mistral: 8192,
|
||||
Ollama: 8192,
|
||||
OpenRouter: 8192,
|
||||
Perplexity: 8192,
|
||||
Together: 8192,
|
||||
xAI: 8192,
|
||||
LMStudio: 8192,
|
||||
OpenAILike: 8192,
|
||||
AmazonBedrock: 8192,
|
||||
Hyperbolic: 8192,
|
||||
};
|
||||
|
||||
/*
|
||||
* Reasoning models that require maxCompletionTokens instead of maxTokens
|
||||
* These models use internal reasoning tokens and have different API parameter requirements
|
||||
*/
|
||||
export function isReasoningModel(modelName: string): boolean {
|
||||
const result = /^(o1|o3|gpt-5)/i.test(modelName);
|
||||
|
||||
// DEBUG: Test regex matching
|
||||
console.log(`REGEX TEST: "${modelName}" matches reasoning pattern: ${result}`);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// limits the number of model responses that can be returned in a single request
|
||||
export const MAX_RESPONSE_SEGMENTS = 2;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { convertToCoreMessages, streamText as _streamText, type Message } from 'ai';
|
||||
import { MAX_TOKENS, type FileMap } from './constants';
|
||||
import { MAX_TOKENS, PROVIDER_COMPLETION_LIMITS, isReasoningModel, type FileMap } from './constants';
|
||||
import { getSystemPrompt } from '~/lib/common/prompts/prompts';
|
||||
import { DEFAULT_MODEL, DEFAULT_PROVIDER, MODIFICATIONS_TAG_NAME, PROVIDER_LIST, WORK_DIR } from '~/utils/constants';
|
||||
import type { IProviderSetting } from '~/types/model';
|
||||
@@ -26,6 +26,23 @@ export interface StreamingOptions extends Omit<Parameters<typeof _streamText>[0]
|
||||
|
||||
const logger = createScopedLogger('stream-text');
|
||||
|
||||
function getCompletionTokenLimit(modelDetails: any): number {
|
||||
// 1. If model specifies completion tokens, use that
|
||||
if (modelDetails.maxCompletionTokens && modelDetails.maxCompletionTokens > 0) {
|
||||
return modelDetails.maxCompletionTokens;
|
||||
}
|
||||
|
||||
// 2. Use provider-specific default
|
||||
const providerDefault = PROVIDER_COMPLETION_LIMITS[modelDetails.provider];
|
||||
|
||||
if (providerDefault) {
|
||||
return providerDefault;
|
||||
}
|
||||
|
||||
// 3. Final fallback to MAX_TOKENS, but cap at reasonable limit for safety
|
||||
return Math.min(MAX_TOKENS, 16384);
|
||||
}
|
||||
|
||||
function sanitizeText(text: string): string {
|
||||
let sanitized = text.replace(/<div class=\\"__boltThought__\\">.*?<\/div>/s, '');
|
||||
sanitized = sanitized.replace(/<think>.*?<\/think>/s, '');
|
||||
@@ -123,10 +140,10 @@ export async function streamText(props: {
|
||||
}
|
||||
}
|
||||
|
||||
const dynamicMaxTokens = modelDetails && modelDetails.maxTokenAllowed ? modelDetails.maxTokenAllowed : MAX_TOKENS;
|
||||
const dynamicMaxTokens = modelDetails ? getCompletionTokenLimit(modelDetails) : Math.min(MAX_TOKENS, 16384);
|
||||
|
||||
// Ensure we never exceed reasonable token limits to prevent API errors
|
||||
const safeMaxTokens = Math.min(dynamicMaxTokens, 100000); // Cap at 100k for safety
|
||||
// Additional safety cap - should not be needed with proper completion limits, but kept for safety
|
||||
const safeMaxTokens = Math.min(dynamicMaxTokens, 128000);
|
||||
|
||||
logger.info(
|
||||
`Max tokens for model ${modelDetails.name} is ${safeMaxTokens} (capped from ${dynamicMaxTokens}) based on model limits`,
|
||||
@@ -204,9 +221,52 @@ export async function streamText(props: {
|
||||
|
||||
logger.info(`Sending llm call to ${provider.name} with model ${modelDetails.name}`);
|
||||
|
||||
// DEBUG: Log reasoning model detection
|
||||
const isReasoning = isReasoningModel(modelDetails.name);
|
||||
logger.info(`DEBUG STREAM: Model "${modelDetails.name}" detected as reasoning model: ${isReasoning}`);
|
||||
|
||||
// console.log(systemPrompt, processedMessages);
|
||||
|
||||
return await _streamText({
|
||||
// Use maxCompletionTokens for reasoning models (o1, GPT-5), maxTokens for traditional models
|
||||
const tokenParams = isReasoning ? { maxCompletionTokens: safeMaxTokens } : { maxTokens: safeMaxTokens };
|
||||
|
||||
// Filter out unsupported parameters for reasoning models
|
||||
const filteredOptions =
|
||||
isReasoning && options
|
||||
? Object.fromEntries(
|
||||
Object.entries(options).filter(
|
||||
([key]) =>
|
||||
![
|
||||
'temperature',
|
||||
'topP',
|
||||
'presencePenalty',
|
||||
'frequencyPenalty',
|
||||
'logprobs',
|
||||
'topLogprobs',
|
||||
'logitBias',
|
||||
].includes(key),
|
||||
),
|
||||
)
|
||||
: options || {};
|
||||
|
||||
// DEBUG: Log filtered options
|
||||
logger.info(
|
||||
`DEBUG STREAM: Options filtering for model "${modelDetails.name}":`,
|
||||
JSON.stringify(
|
||||
{
|
||||
isReasoning,
|
||||
originalOptions: options || {},
|
||||
filteredOptions,
|
||||
originalOptionsKeys: options ? Object.keys(options) : [],
|
||||
filteredOptionsKeys: Object.keys(filteredOptions),
|
||||
removedParams: options ? Object.keys(options).filter((key) => !(key in filteredOptions)) : [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
|
||||
const streamParams = {
|
||||
model: provider.getModelInstance({
|
||||
model: modelDetails.name,
|
||||
serverEnv,
|
||||
@@ -214,8 +274,31 @@ export async function streamText(props: {
|
||||
providerSettings,
|
||||
}),
|
||||
system: chatMode === 'build' ? systemPrompt : discussPrompt(),
|
||||
maxTokens: safeMaxTokens,
|
||||
...tokenParams,
|
||||
messages: convertToCoreMessages(processedMessages as any),
|
||||
...options,
|
||||
});
|
||||
...filteredOptions,
|
||||
|
||||
// Set temperature to 1 for reasoning models (required by OpenAI API)
|
||||
...(isReasoning ? { temperature: 1 } : {}),
|
||||
};
|
||||
|
||||
// DEBUG: Log final streaming parameters
|
||||
logger.info(
|
||||
`DEBUG STREAM: Final streaming params for model "${modelDetails.name}":`,
|
||||
JSON.stringify(
|
||||
{
|
||||
hasTemperature: 'temperature' in streamParams,
|
||||
hasMaxTokens: 'maxTokens' in streamParams,
|
||||
hasMaxCompletionTokens: 'maxCompletionTokens' in streamParams,
|
||||
paramKeys: Object.keys(streamParams).filter((key) => !['model', 'messages', 'system'].includes(key)),
|
||||
streamParams: Object.fromEntries(
|
||||
Object.entries(streamParams).filter(([key]) => !['model', 'messages', 'system'].includes(key)),
|
||||
),
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
);
|
||||
|
||||
return await _streamText(streamParams);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user