Update LLM providers and constants (#1937)
- Updated constants in app/lib/.server/llm/constants.ts - Modified stream-text functionality in app/lib/.server/llm/stream-text.ts - Updated Anthropic provider in app/lib/modules/llm/providers/anthropic.ts - Modified GitHub provider in app/lib/modules/llm/providers/github.ts - Updated Google provider in app/lib/modules/llm/providers/google.ts - Modified OpenAI provider in app/lib/modules/llm/providers/openai.ts - Updated LLM types in app/lib/modules/llm/types.ts - Modified API route in app/routes/api.llmcall.ts
This commit is contained in:
@@ -4,6 +4,44 @@
|
|||||||
*/
|
*/
|
||||||
export const MAX_TOKENS = 32000;
|
export const MAX_TOKENS = 32000;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Provider-specific default completion token limits
|
||||||
|
* Used as fallbacks when model doesn't specify maxCompletionTokens
|
||||||
|
*/
|
||||||
|
export const PROVIDER_COMPLETION_LIMITS: Record<string, number> = {
|
||||||
|
OpenAI: 16384,
|
||||||
|
Github: 16384, // GitHub Models use OpenAI-compatible limits
|
||||||
|
Anthropic: 128000,
|
||||||
|
Google: 32768,
|
||||||
|
Cohere: 4000,
|
||||||
|
DeepSeek: 8192,
|
||||||
|
Groq: 8192,
|
||||||
|
HuggingFace: 4096,
|
||||||
|
Mistral: 8192,
|
||||||
|
Ollama: 8192,
|
||||||
|
OpenRouter: 8192,
|
||||||
|
Perplexity: 8192,
|
||||||
|
Together: 8192,
|
||||||
|
xAI: 8192,
|
||||||
|
LMStudio: 8192,
|
||||||
|
OpenAILike: 8192,
|
||||||
|
AmazonBedrock: 8192,
|
||||||
|
Hyperbolic: 8192,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reasoning models that require maxCompletionTokens instead of maxTokens
|
||||||
|
* These models use internal reasoning tokens and have different API parameter requirements
|
||||||
|
*/
|
||||||
|
export function isReasoningModel(modelName: string): boolean {
|
||||||
|
const result = /^(o1|o3|gpt-5)/i.test(modelName);
|
||||||
|
|
||||||
|
// DEBUG: Test regex matching
|
||||||
|
console.log(`REGEX TEST: "${modelName}" matches reasoning pattern: ${result}`);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
// limits the number of model responses that can be returned in a single request
|
// limits the number of model responses that can be returned in a single request
|
||||||
export const MAX_RESPONSE_SEGMENTS = 2;
|
export const MAX_RESPONSE_SEGMENTS = 2;
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { convertToCoreMessages, streamText as _streamText, type Message } from 'ai';
|
import { convertToCoreMessages, streamText as _streamText, type Message } from 'ai';
|
||||||
import { MAX_TOKENS, type FileMap } from './constants';
|
import { MAX_TOKENS, PROVIDER_COMPLETION_LIMITS, isReasoningModel, type FileMap } from './constants';
|
||||||
import { getSystemPrompt } from '~/lib/common/prompts/prompts';
|
import { getSystemPrompt } from '~/lib/common/prompts/prompts';
|
||||||
import { DEFAULT_MODEL, DEFAULT_PROVIDER, MODIFICATIONS_TAG_NAME, PROVIDER_LIST, WORK_DIR } from '~/utils/constants';
|
import { DEFAULT_MODEL, DEFAULT_PROVIDER, MODIFICATIONS_TAG_NAME, PROVIDER_LIST, WORK_DIR } from '~/utils/constants';
|
||||||
import type { IProviderSetting } from '~/types/model';
|
import type { IProviderSetting } from '~/types/model';
|
||||||
@@ -26,6 +26,23 @@ export interface StreamingOptions extends Omit<Parameters<typeof _streamText>[0]
|
|||||||
|
|
||||||
const logger = createScopedLogger('stream-text');
|
const logger = createScopedLogger('stream-text');
|
||||||
|
|
||||||
|
function getCompletionTokenLimit(modelDetails: any): number {
|
||||||
|
// 1. If model specifies completion tokens, use that
|
||||||
|
if (modelDetails.maxCompletionTokens && modelDetails.maxCompletionTokens > 0) {
|
||||||
|
return modelDetails.maxCompletionTokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Use provider-specific default
|
||||||
|
const providerDefault = PROVIDER_COMPLETION_LIMITS[modelDetails.provider];
|
||||||
|
|
||||||
|
if (providerDefault) {
|
||||||
|
return providerDefault;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Final fallback to MAX_TOKENS, but cap at reasonable limit for safety
|
||||||
|
return Math.min(MAX_TOKENS, 16384);
|
||||||
|
}
|
||||||
|
|
||||||
function sanitizeText(text: string): string {
|
function sanitizeText(text: string): string {
|
||||||
let sanitized = text.replace(/<div class=\\"__boltThought__\\">.*?<\/div>/s, '');
|
let sanitized = text.replace(/<div class=\\"__boltThought__\\">.*?<\/div>/s, '');
|
||||||
sanitized = sanitized.replace(/<think>.*?<\/think>/s, '');
|
sanitized = sanitized.replace(/<think>.*?<\/think>/s, '');
|
||||||
@@ -123,10 +140,10 @@ export async function streamText(props: {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const dynamicMaxTokens = modelDetails && modelDetails.maxTokenAllowed ? modelDetails.maxTokenAllowed : MAX_TOKENS;
|
const dynamicMaxTokens = modelDetails ? getCompletionTokenLimit(modelDetails) : Math.min(MAX_TOKENS, 16384);
|
||||||
|
|
||||||
// Ensure we never exceed reasonable token limits to prevent API errors
|
// Additional safety cap - should not be needed with proper completion limits, but kept for safety
|
||||||
const safeMaxTokens = Math.min(dynamicMaxTokens, 100000); // Cap at 100k for safety
|
const safeMaxTokens = Math.min(dynamicMaxTokens, 128000);
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
`Max tokens for model ${modelDetails.name} is ${safeMaxTokens} (capped from ${dynamicMaxTokens}) based on model limits`,
|
`Max tokens for model ${modelDetails.name} is ${safeMaxTokens} (capped from ${dynamicMaxTokens}) based on model limits`,
|
||||||
@@ -204,9 +221,52 @@ export async function streamText(props: {
|
|||||||
|
|
||||||
logger.info(`Sending llm call to ${provider.name} with model ${modelDetails.name}`);
|
logger.info(`Sending llm call to ${provider.name} with model ${modelDetails.name}`);
|
||||||
|
|
||||||
|
// DEBUG: Log reasoning model detection
|
||||||
|
const isReasoning = isReasoningModel(modelDetails.name);
|
||||||
|
logger.info(`DEBUG STREAM: Model "${modelDetails.name}" detected as reasoning model: ${isReasoning}`);
|
||||||
|
|
||||||
// console.log(systemPrompt, processedMessages);
|
// console.log(systemPrompt, processedMessages);
|
||||||
|
|
||||||
return await _streamText({
|
// Use maxCompletionTokens for reasoning models (o1, GPT-5), maxTokens for traditional models
|
||||||
|
const tokenParams = isReasoning ? { maxCompletionTokens: safeMaxTokens } : { maxTokens: safeMaxTokens };
|
||||||
|
|
||||||
|
// Filter out unsupported parameters for reasoning models
|
||||||
|
const filteredOptions =
|
||||||
|
isReasoning && options
|
||||||
|
? Object.fromEntries(
|
||||||
|
Object.entries(options).filter(
|
||||||
|
([key]) =>
|
||||||
|
![
|
||||||
|
'temperature',
|
||||||
|
'topP',
|
||||||
|
'presencePenalty',
|
||||||
|
'frequencyPenalty',
|
||||||
|
'logprobs',
|
||||||
|
'topLogprobs',
|
||||||
|
'logitBias',
|
||||||
|
].includes(key),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
: options || {};
|
||||||
|
|
||||||
|
// DEBUG: Log filtered options
|
||||||
|
logger.info(
|
||||||
|
`DEBUG STREAM: Options filtering for model "${modelDetails.name}":`,
|
||||||
|
JSON.stringify(
|
||||||
|
{
|
||||||
|
isReasoning,
|
||||||
|
originalOptions: options || {},
|
||||||
|
filteredOptions,
|
||||||
|
originalOptionsKeys: options ? Object.keys(options) : [],
|
||||||
|
filteredOptionsKeys: Object.keys(filteredOptions),
|
||||||
|
removedParams: options ? Object.keys(options).filter((key) => !(key in filteredOptions)) : [],
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
2,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
const streamParams = {
|
||||||
model: provider.getModelInstance({
|
model: provider.getModelInstance({
|
||||||
model: modelDetails.name,
|
model: modelDetails.name,
|
||||||
serverEnv,
|
serverEnv,
|
||||||
@@ -214,8 +274,31 @@ export async function streamText(props: {
|
|||||||
providerSettings,
|
providerSettings,
|
||||||
}),
|
}),
|
||||||
system: chatMode === 'build' ? systemPrompt : discussPrompt(),
|
system: chatMode === 'build' ? systemPrompt : discussPrompt(),
|
||||||
maxTokens: safeMaxTokens,
|
...tokenParams,
|
||||||
messages: convertToCoreMessages(processedMessages as any),
|
messages: convertToCoreMessages(processedMessages as any),
|
||||||
...options,
|
...filteredOptions,
|
||||||
});
|
|
||||||
|
// Set temperature to 1 for reasoning models (required by OpenAI API)
|
||||||
|
...(isReasoning ? { temperature: 1 } : {}),
|
||||||
|
};
|
||||||
|
|
||||||
|
// DEBUG: Log final streaming parameters
|
||||||
|
logger.info(
|
||||||
|
`DEBUG STREAM: Final streaming params for model "${modelDetails.name}":`,
|
||||||
|
JSON.stringify(
|
||||||
|
{
|
||||||
|
hasTemperature: 'temperature' in streamParams,
|
||||||
|
hasMaxTokens: 'maxTokens' in streamParams,
|
||||||
|
hasMaxCompletionTokens: 'maxCompletionTokens' in streamParams,
|
||||||
|
paramKeys: Object.keys(streamParams).filter((key) => !['model', 'messages', 'system'].includes(key)),
|
||||||
|
streamParams: Object.fromEntries(
|
||||||
|
Object.entries(streamParams).filter(([key]) => !['model', 'messages', 'system'].includes(key)),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
2,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
return await _streamText(streamParams);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ export default class AnthropicProvider extends BaseProvider {
|
|||||||
label: 'Claude 3.5 Sonnet',
|
label: 'Claude 3.5 Sonnet',
|
||||||
provider: 'Anthropic',
|
provider: 'Anthropic',
|
||||||
maxTokenAllowed: 200000,
|
maxTokenAllowed: 200000,
|
||||||
|
maxCompletionTokens: 128000,
|
||||||
},
|
},
|
||||||
|
|
||||||
// Claude 3 Haiku: 200k context, fastest and most cost-effective
|
// Claude 3 Haiku: 200k context, fastest and most cost-effective
|
||||||
@@ -30,6 +31,7 @@ export default class AnthropicProvider extends BaseProvider {
|
|||||||
label: 'Claude 3 Haiku',
|
label: 'Claude 3 Haiku',
|
||||||
provider: 'Anthropic',
|
provider: 'Anthropic',
|
||||||
maxTokenAllowed: 200000,
|
maxTokenAllowed: 200000,
|
||||||
|
maxCompletionTokens: 128000,
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
@@ -84,6 +86,7 @@ export default class AnthropicProvider extends BaseProvider {
|
|||||||
label: `${m.display_name} (${Math.floor(contextWindow / 1000)}k context)`,
|
label: `${m.display_name} (${Math.floor(contextWindow / 1000)}k context)`,
|
||||||
provider: this.name,
|
provider: this.name,
|
||||||
maxTokenAllowed: contextWindow,
|
maxTokenAllowed: contextWindow,
|
||||||
|
maxCompletionTokens: 128000, // Claude models support up to 128k completion tokens
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,13 +14,31 @@ export default class GithubProvider extends BaseProvider {
|
|||||||
|
|
||||||
// find more in https://github.com/marketplace?type=models
|
// find more in https://github.com/marketplace?type=models
|
||||||
staticModels: ModelInfo[] = [
|
staticModels: ModelInfo[] = [
|
||||||
{ name: 'gpt-4o', label: 'GPT-4o', provider: 'Github', maxTokenAllowed: 8000 },
|
{ name: 'gpt-4o', label: 'GPT-4o', provider: 'Github', maxTokenAllowed: 128000, maxCompletionTokens: 16384 },
|
||||||
{ name: 'o1', label: 'o1-preview', provider: 'Github', maxTokenAllowed: 100000 },
|
{ name: 'o1', label: 'o1-preview', provider: 'Github', maxTokenAllowed: 100000, maxCompletionTokens: 16384 },
|
||||||
{ name: 'o1-mini', label: 'o1-mini', provider: 'Github', maxTokenAllowed: 8000 },
|
{ name: 'o1-mini', label: 'o1-mini', provider: 'Github', maxTokenAllowed: 65536, maxCompletionTokens: 8192 },
|
||||||
{ name: 'gpt-4o-mini', label: 'GPT-4o Mini', provider: 'Github', maxTokenAllowed: 8000 },
|
{
|
||||||
{ name: 'gpt-4-turbo', label: 'GPT-4 Turbo', provider: 'Github', maxTokenAllowed: 8000 },
|
name: 'gpt-4o-mini',
|
||||||
{ name: 'gpt-4', label: 'GPT-4', provider: 'Github', maxTokenAllowed: 8000 },
|
label: 'GPT-4o Mini',
|
||||||
{ name: 'gpt-3.5-turbo', label: 'GPT-3.5 Turbo', provider: 'Github', maxTokenAllowed: 8000 },
|
provider: 'Github',
|
||||||
|
maxTokenAllowed: 128000,
|
||||||
|
maxCompletionTokens: 16384,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'gpt-4-turbo',
|
||||||
|
label: 'GPT-4 Turbo',
|
||||||
|
provider: 'Github',
|
||||||
|
maxTokenAllowed: 128000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
|
},
|
||||||
|
{ name: 'gpt-4', label: 'GPT-4', provider: 'Github', maxTokenAllowed: 8192, maxCompletionTokens: 8192 },
|
||||||
|
{
|
||||||
|
name: 'gpt-3.5-turbo',
|
||||||
|
label: 'GPT-3.5 Turbo',
|
||||||
|
provider: 'Github',
|
||||||
|
maxTokenAllowed: 16385,
|
||||||
|
maxCompletionTokens: 4096,
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
getModelInstance(options: {
|
getModelInstance(options: {
|
||||||
|
|||||||
@@ -17,10 +17,22 @@ export default class GoogleProvider extends BaseProvider {
|
|||||||
* Essential fallback models - only the most reliable/stable ones
|
* Essential fallback models - only the most reliable/stable ones
|
||||||
* Gemini 1.5 Pro: 2M context, excellent for complex reasoning and large codebases
|
* Gemini 1.5 Pro: 2M context, excellent for complex reasoning and large codebases
|
||||||
*/
|
*/
|
||||||
{ name: 'gemini-1.5-pro', label: 'Gemini 1.5 Pro', provider: 'Google', maxTokenAllowed: 2000000 },
|
{
|
||||||
|
name: 'gemini-1.5-pro',
|
||||||
|
label: 'Gemini 1.5 Pro',
|
||||||
|
provider: 'Google',
|
||||||
|
maxTokenAllowed: 2000000,
|
||||||
|
maxCompletionTokens: 32768,
|
||||||
|
},
|
||||||
|
|
||||||
// Gemini 1.5 Flash: 1M context, fast and cost-effective
|
// Gemini 1.5 Flash: 1M context, fast and cost-effective
|
||||||
{ name: 'gemini-1.5-flash', label: 'Gemini 1.5 Flash', provider: 'Google', maxTokenAllowed: 1000000 },
|
{
|
||||||
|
name: 'gemini-1.5-flash',
|
||||||
|
label: 'Gemini 1.5 Flash',
|
||||||
|
provider: 'Google',
|
||||||
|
maxTokenAllowed: 1000000,
|
||||||
|
maxCompletionTokens: 32768,
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
async getDynamicModels(
|
async getDynamicModels(
|
||||||
@@ -89,11 +101,19 @@ export default class GoogleProvider extends BaseProvider {
|
|||||||
const maxAllowed = 2000000; // 2M tokens max
|
const maxAllowed = 2000000; // 2M tokens max
|
||||||
const finalContext = Math.min(contextWindow, maxAllowed);
|
const finalContext = Math.min(contextWindow, maxAllowed);
|
||||||
|
|
||||||
|
// Get completion token limit from Google API
|
||||||
|
let completionTokens = 32768; // default fallback
|
||||||
|
|
||||||
|
if (m.outputTokenLimit && m.outputTokenLimit > 0) {
|
||||||
|
completionTokens = Math.min(m.outputTokenLimit, 128000); // Cap at reasonable limit
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
name: modelName,
|
name: modelName,
|
||||||
label: `${m.displayName} (${finalContext >= 1000000 ? Math.floor(finalContext / 1000000) + 'M' : Math.floor(finalContext / 1000) + 'k'} context)`,
|
label: `${m.displayName} (${finalContext >= 1000000 ? Math.floor(finalContext / 1000000) + 'M' : Math.floor(finalContext / 1000) + 'k'} context)`,
|
||||||
provider: this.name,
|
provider: this.name,
|
||||||
maxTokenAllowed: finalContext,
|
maxTokenAllowed: finalContext,
|
||||||
|
maxCompletionTokens: completionTokens,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,10 +17,16 @@ export default class OpenAIProvider extends BaseProvider {
|
|||||||
* Essential fallback models - only the most stable/reliable ones
|
* Essential fallback models - only the most stable/reliable ones
|
||||||
* GPT-4o: 128k context, high performance, recommended for most tasks
|
* GPT-4o: 128k context, high performance, recommended for most tasks
|
||||||
*/
|
*/
|
||||||
{ name: 'gpt-4o', label: 'GPT-4o', provider: 'OpenAI', maxTokenAllowed: 128000 },
|
{ name: 'gpt-4o', label: 'GPT-4o', provider: 'OpenAI', maxTokenAllowed: 128000, maxCompletionTokens: 16384 },
|
||||||
|
|
||||||
// GPT-3.5-turbo: 16k context, fast and cost-effective
|
// GPT-3.5-turbo: 16k context, fast and cost-effective
|
||||||
{ name: 'gpt-3.5-turbo', label: 'GPT-3.5 Turbo', provider: 'OpenAI', maxTokenAllowed: 16000 },
|
{
|
||||||
|
name: 'gpt-3.5-turbo',
|
||||||
|
label: 'GPT-3.5 Turbo',
|
||||||
|
provider: 'OpenAI',
|
||||||
|
maxTokenAllowed: 16000,
|
||||||
|
maxCompletionTokens: 4096,
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
async getDynamicModels(
|
async getDynamicModels(
|
||||||
|
|||||||
@@ -5,7 +5,12 @@ export interface ModelInfo {
|
|||||||
name: string;
|
name: string;
|
||||||
label: string;
|
label: string;
|
||||||
provider: string;
|
provider: string;
|
||||||
|
|
||||||
|
/** Maximum context window size (input tokens) - how many tokens the model can process */
|
||||||
maxTokenAllowed: number;
|
maxTokenAllowed: number;
|
||||||
|
|
||||||
|
/** Maximum completion/output tokens - how many tokens the model can generate. If not specified, falls back to provider defaults */
|
||||||
|
maxCompletionTokens?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ProviderInfo {
|
export interface ProviderInfo {
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import { streamText } from '~/lib/.server/llm/stream-text';
|
|||||||
import type { IProviderSetting, ProviderInfo } from '~/types/model';
|
import type { IProviderSetting, ProviderInfo } from '~/types/model';
|
||||||
import { generateText } from 'ai';
|
import { generateText } from 'ai';
|
||||||
import { PROVIDER_LIST } from '~/utils/constants';
|
import { PROVIDER_LIST } from '~/utils/constants';
|
||||||
import { MAX_TOKENS } from '~/lib/.server/llm/constants';
|
import { MAX_TOKENS, PROVIDER_COMPLETION_LIMITS, isReasoningModel } from '~/lib/.server/llm/constants';
|
||||||
import { LLMManager } from '~/lib/modules/llm/manager';
|
import { LLMManager } from '~/lib/modules/llm/manager';
|
||||||
import type { ModelInfo } from '~/lib/modules/llm/types';
|
import type { ModelInfo } from '~/lib/modules/llm/types';
|
||||||
import { getApiKeysFromCookie, getProviderSettingsFromCookie } from '~/lib/api/cookies';
|
import { getApiKeysFromCookie, getProviderSettingsFromCookie } from '~/lib/api/cookies';
|
||||||
@@ -24,6 +24,23 @@ async function getModelList(options: {
|
|||||||
|
|
||||||
const logger = createScopedLogger('api.llmcall');
|
const logger = createScopedLogger('api.llmcall');
|
||||||
|
|
||||||
|
function getCompletionTokenLimit(modelDetails: ModelInfo): number {
|
||||||
|
// 1. If model specifies completion tokens, use that
|
||||||
|
if (modelDetails.maxCompletionTokens && modelDetails.maxCompletionTokens > 0) {
|
||||||
|
return modelDetails.maxCompletionTokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Use provider-specific default
|
||||||
|
const providerDefault = PROVIDER_COMPLETION_LIMITS[modelDetails.provider];
|
||||||
|
|
||||||
|
if (providerDefault) {
|
||||||
|
return providerDefault;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Final fallback to MAX_TOKENS, but cap at reasonable limit for safety
|
||||||
|
return Math.min(MAX_TOKENS, 16384);
|
||||||
|
}
|
||||||
|
|
||||||
async function llmCallAction({ context, request }: ActionFunctionArgs) {
|
async function llmCallAction({ context, request }: ActionFunctionArgs) {
|
||||||
const { system, message, model, provider, streamOutput } = await request.json<{
|
const { system, message, model, provider, streamOutput } = await request.json<{
|
||||||
system: string;
|
system: string;
|
||||||
@@ -101,7 +118,7 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {
|
|||||||
throw new Error('Model not found');
|
throw new Error('Model not found');
|
||||||
}
|
}
|
||||||
|
|
||||||
const dynamicMaxTokens = modelDetails && modelDetails.maxTokenAllowed ? modelDetails.maxTokenAllowed : MAX_TOKENS;
|
const dynamicMaxTokens = modelDetails ? getCompletionTokenLimit(modelDetails) : Math.min(MAX_TOKENS, 16384);
|
||||||
|
|
||||||
const providerInfo = PROVIDER_LIST.find((p) => p.name === provider.name);
|
const providerInfo = PROVIDER_LIST.find((p) => p.name === provider.name);
|
||||||
|
|
||||||
@@ -111,11 +128,19 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {
|
|||||||
|
|
||||||
logger.info(`Generating response Provider: ${provider.name}, Model: ${modelDetails.name}`);
|
logger.info(`Generating response Provider: ${provider.name}, Model: ${modelDetails.name}`);
|
||||||
|
|
||||||
const result = await generateText({
|
// DEBUG: Log reasoning model detection
|
||||||
|
const isReasoning = isReasoningModel(modelDetails.name);
|
||||||
|
logger.info(`DEBUG: Model "${modelDetails.name}" detected as reasoning model: ${isReasoning}`);
|
||||||
|
|
||||||
|
// Use maxCompletionTokens for reasoning models (o1, GPT-5), maxTokens for traditional models
|
||||||
|
const tokenParams = isReasoning ? { maxCompletionTokens: dynamicMaxTokens } : { maxTokens: dynamicMaxTokens };
|
||||||
|
|
||||||
|
// Filter out unsupported parameters for reasoning models
|
||||||
|
const baseParams = {
|
||||||
system,
|
system,
|
||||||
messages: [
|
messages: [
|
||||||
{
|
{
|
||||||
role: 'user',
|
role: 'user' as const,
|
||||||
content: `${message}`,
|
content: `${message}`,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
@@ -125,9 +150,36 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {
|
|||||||
apiKeys,
|
apiKeys,
|
||||||
providerSettings,
|
providerSettings,
|
||||||
}),
|
}),
|
||||||
maxTokens: dynamicMaxTokens,
|
...tokenParams,
|
||||||
toolChoice: 'none',
|
toolChoice: 'none' as const,
|
||||||
});
|
};
|
||||||
|
|
||||||
|
// For reasoning models, set temperature to 1 (required by OpenAI API)
|
||||||
|
const finalParams = isReasoning
|
||||||
|
? { ...baseParams, temperature: 1 } // Set to 1 for reasoning models (only supported value)
|
||||||
|
: { ...baseParams, temperature: 0 };
|
||||||
|
|
||||||
|
// DEBUG: Log final parameters
|
||||||
|
logger.info(
|
||||||
|
`DEBUG: Final params for model "${modelDetails.name}":`,
|
||||||
|
JSON.stringify(
|
||||||
|
{
|
||||||
|
isReasoning,
|
||||||
|
hasTemperature: 'temperature' in finalParams,
|
||||||
|
hasMaxTokens: 'maxTokens' in finalParams,
|
||||||
|
hasMaxCompletionTokens: 'maxCompletionTokens' in finalParams,
|
||||||
|
paramKeys: Object.keys(finalParams).filter((key) => !['model', 'messages', 'system'].includes(key)),
|
||||||
|
tokenParams,
|
||||||
|
finalParams: Object.fromEntries(
|
||||||
|
Object.entries(finalParams).filter(([key]) => !['model', 'messages', 'system'].includes(key)),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
2,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
const result = await generateText(finalParams);
|
||||||
logger.info(`Generated response`);
|
logger.info(`Generated response`);
|
||||||
|
|
||||||
return new Response(JSON.stringify(result), {
|
return new Response(JSON.stringify(result), {
|
||||||
|
|||||||
Reference in New Issue
Block a user