From ff8b0d7af14bf51c09aeef49bbf4be3265d902db Mon Sep 17 00:00:00 2001 From: Stijnus <72551117+Stijnus@users.noreply.github.com> Date: Fri, 29 Aug 2025 23:13:58 +0200 Subject: [PATCH] fix: maxCompletionTokens Implementation for All Providers (#1938) * Update LLM providers and constants - Updated constants in app/lib/.server/llm/constants.ts - Modified stream-text functionality in app/lib/.server/llm/stream-text.ts - Updated Anthropic provider in app/lib/modules/llm/providers/anthropic.ts - Modified GitHub provider in app/lib/modules/llm/providers/github.ts - Updated Google provider in app/lib/modules/llm/providers/google.ts - Modified OpenAI provider in app/lib/modules/llm/providers/openai.ts - Updated LLM types in app/lib/modules/llm/types.ts - Modified API route in app/routes/api.llmcall.ts * Fix maxCompletionTokens Implementation for All Providers - Cohere: Added maxCompletionTokens: 4000 to all 10 static models - DeepSeek: Added maxCompletionTokens: 8192 to all 3 static models - Groq: Added maxCompletionTokens: 8192 to both static models - Mistral: Added maxCompletionTokens: 8192 to all 9 static models - Together: Added maxCompletionTokens: 8192 to both static models - Groq: Fixed getDynamicModels to include maxCompletionTokens: 8192 - Together: Fixed getDynamicModels to include maxCompletionTokens: 8192 - OpenAI: Fixed getDynamicModels with proper logic for reasoning models (o1: 16384, o1-mini: 8192) and standard models --- app/lib/modules/llm/providers/cohere.ts | 68 +++++++++++++++++---- app/lib/modules/llm/providers/deepseek.ts | 24 +++++++- app/lib/modules/llm/providers/groq.ts | 10 +++- app/lib/modules/llm/providers/mistral.ts | 72 ++++++++++++++++++++--- app/lib/modules/llm/providers/openai.ts | 15 +++++ app/lib/modules/llm/providers/together.ts | 3 + 6 files changed, 169 insertions(+), 23 deletions(-) diff --git a/app/lib/modules/llm/providers/cohere.ts b/app/lib/modules/llm/providers/cohere.ts index 9233b96..7654dd7 100644 --- a/app/lib/modules/llm/providers/cohere.ts +++ b/app/lib/modules/llm/providers/cohere.ts @@ -13,16 +13,64 @@ export default class CohereProvider extends BaseProvider { }; staticModels: ModelInfo[] = [ - { name: 'command-r-plus-08-2024', label: 'Command R plus Latest', provider: 'Cohere', maxTokenAllowed: 4096 }, - { name: 'command-r-08-2024', label: 'Command R Latest', provider: 'Cohere', maxTokenAllowed: 4096 }, - { name: 'command-r-plus', label: 'Command R plus', provider: 'Cohere', maxTokenAllowed: 4096 }, - { name: 'command-r', label: 'Command R', provider: 'Cohere', maxTokenAllowed: 4096 }, - { name: 'command', label: 'Command', provider: 'Cohere', maxTokenAllowed: 4096 }, - { name: 'command-nightly', label: 'Command Nightly', provider: 'Cohere', maxTokenAllowed: 4096 }, - { name: 'command-light', label: 'Command Light', provider: 'Cohere', maxTokenAllowed: 4096 }, - { name: 'command-light-nightly', label: 'Command Light Nightly', provider: 'Cohere', maxTokenAllowed: 4096 }, - { name: 'c4ai-aya-expanse-8b', label: 'c4AI Aya Expanse 8b', provider: 'Cohere', maxTokenAllowed: 4096 }, - { name: 'c4ai-aya-expanse-32b', label: 'c4AI Aya Expanse 32b', provider: 'Cohere', maxTokenAllowed: 4096 }, + { + name: 'command-r-plus-08-2024', + label: 'Command R plus Latest', + provider: 'Cohere', + maxTokenAllowed: 4096, + maxCompletionTokens: 4000, + }, + { + name: 'command-r-08-2024', + label: 'Command R Latest', + provider: 'Cohere', + maxTokenAllowed: 4096, + maxCompletionTokens: 4000, + }, + { + name: 'command-r-plus', + label: 'Command R plus', + provider: 'Cohere', + maxTokenAllowed: 4096, + maxCompletionTokens: 4000, + }, + { name: 'command-r', label: 'Command R', provider: 'Cohere', maxTokenAllowed: 4096, maxCompletionTokens: 4000 }, + { name: 'command', label: 'Command', provider: 'Cohere', maxTokenAllowed: 4096, maxCompletionTokens: 4000 }, + { + name: 'command-nightly', + label: 'Command Nightly', + provider: 'Cohere', + maxTokenAllowed: 4096, + maxCompletionTokens: 4000, + }, + { + name: 'command-light', + label: 'Command Light', + provider: 'Cohere', + maxTokenAllowed: 4096, + maxCompletionTokens: 4000, + }, + { + name: 'command-light-nightly', + label: 'Command Light Nightly', + provider: 'Cohere', + maxTokenAllowed: 4096, + maxCompletionTokens: 4000, + }, + { + name: 'c4ai-aya-expanse-8b', + label: 'c4AI Aya Expanse 8b', + provider: 'Cohere', + maxTokenAllowed: 4096, + maxCompletionTokens: 4000, + }, + { + name: 'c4ai-aya-expanse-32b', + label: 'c4AI Aya Expanse 32b', + provider: 'Cohere', + maxTokenAllowed: 4096, + maxCompletionTokens: 4000, + }, ]; getModelInstance(options: { diff --git a/app/lib/modules/llm/providers/deepseek.ts b/app/lib/modules/llm/providers/deepseek.ts index e0e7257..7c9042d 100644 --- a/app/lib/modules/llm/providers/deepseek.ts +++ b/app/lib/modules/llm/providers/deepseek.ts @@ -13,9 +13,27 @@ export default class DeepseekProvider extends BaseProvider { }; staticModels: ModelInfo[] = [ - { name: 'deepseek-coder', label: 'Deepseek-Coder', provider: 'Deepseek', maxTokenAllowed: 8000 }, - { name: 'deepseek-chat', label: 'Deepseek-Chat', provider: 'Deepseek', maxTokenAllowed: 8000 }, - { name: 'deepseek-reasoner', label: 'Deepseek-Reasoner', provider: 'Deepseek', maxTokenAllowed: 8000 }, + { + name: 'deepseek-coder', + label: 'Deepseek-Coder', + provider: 'Deepseek', + maxTokenAllowed: 8000, + maxCompletionTokens: 8192, + }, + { + name: 'deepseek-chat', + label: 'Deepseek-Chat', + provider: 'Deepseek', + maxTokenAllowed: 8000, + maxCompletionTokens: 8192, + }, + { + name: 'deepseek-reasoner', + label: 'Deepseek-Reasoner', + provider: 'Deepseek', + maxTokenAllowed: 8000, + maxCompletionTokens: 8192, + }, ]; getModelInstance(options: { diff --git a/app/lib/modules/llm/providers/groq.ts b/app/lib/modules/llm/providers/groq.ts index 9ac21c7..95edb14 100644 --- a/app/lib/modules/llm/providers/groq.ts +++ b/app/lib/modules/llm/providers/groq.ts @@ -17,7 +17,13 @@ export default class GroqProvider extends BaseProvider { * Essential fallback models - only the most stable/reliable ones * Llama 3.1 8B: 128k context, fast and efficient */ - { name: 'llama-3.1-8b-instant', label: 'Llama 3.1 8B', provider: 'Groq', maxTokenAllowed: 128000 }, + { + name: 'llama-3.1-8b-instant', + label: 'Llama 3.1 8B', + provider: 'Groq', + maxTokenAllowed: 128000, + maxCompletionTokens: 8192, + }, // Llama 3.3 70B: 128k context, most capable model { @@ -25,6 +31,7 @@ export default class GroqProvider extends BaseProvider { label: 'Llama 3.3 70B', provider: 'Groq', maxTokenAllowed: 128000, + maxCompletionTokens: 8192, }, ]; @@ -62,6 +69,7 @@ export default class GroqProvider extends BaseProvider { label: `${m.id} - context ${m.context_window ? Math.floor(m.context_window / 1000) + 'k' : 'N/A'} [ by ${m.owned_by}]`, provider: this.name, maxTokenAllowed: Math.min(m.context_window || 8192, 16384), + maxCompletionTokens: 8192, })); } diff --git a/app/lib/modules/llm/providers/mistral.ts b/app/lib/modules/llm/providers/mistral.ts index c5becee..72b6e34 100644 --- a/app/lib/modules/llm/providers/mistral.ts +++ b/app/lib/modules/llm/providers/mistral.ts @@ -13,15 +13,69 @@ export default class MistralProvider extends BaseProvider { }; staticModels: ModelInfo[] = [ - { name: 'open-mistral-7b', label: 'Mistral 7B', provider: 'Mistral', maxTokenAllowed: 8000 }, - { name: 'open-mixtral-8x7b', label: 'Mistral 8x7B', provider: 'Mistral', maxTokenAllowed: 8000 }, - { name: 'open-mixtral-8x22b', label: 'Mistral 8x22B', provider: 'Mistral', maxTokenAllowed: 8000 }, - { name: 'open-codestral-mamba', label: 'Codestral Mamba', provider: 'Mistral', maxTokenAllowed: 8000 }, - { name: 'open-mistral-nemo', label: 'Mistral Nemo', provider: 'Mistral', maxTokenAllowed: 8000 }, - { name: 'ministral-8b-latest', label: 'Mistral 8B', provider: 'Mistral', maxTokenAllowed: 8000 }, - { name: 'mistral-small-latest', label: 'Mistral Small', provider: 'Mistral', maxTokenAllowed: 8000 }, - { name: 'codestral-latest', label: 'Codestral', provider: 'Mistral', maxTokenAllowed: 8000 }, - { name: 'mistral-large-latest', label: 'Mistral Large Latest', provider: 'Mistral', maxTokenAllowed: 8000 }, + { + name: 'open-mistral-7b', + label: 'Mistral 7B', + provider: 'Mistral', + maxTokenAllowed: 8000, + maxCompletionTokens: 8192, + }, + { + name: 'open-mixtral-8x7b', + label: 'Mistral 8x7B', + provider: 'Mistral', + maxTokenAllowed: 8000, + maxCompletionTokens: 8192, + }, + { + name: 'open-mixtral-8x22b', + label: 'Mistral 8x22B', + provider: 'Mistral', + maxTokenAllowed: 8000, + maxCompletionTokens: 8192, + }, + { + name: 'open-codestral-mamba', + label: 'Codestral Mamba', + provider: 'Mistral', + maxTokenAllowed: 8000, + maxCompletionTokens: 8192, + }, + { + name: 'open-mistral-nemo', + label: 'Mistral Nemo', + provider: 'Mistral', + maxTokenAllowed: 8000, + maxCompletionTokens: 8192, + }, + { + name: 'ministral-8b-latest', + label: 'Mistral 8B', + provider: 'Mistral', + maxTokenAllowed: 8000, + maxCompletionTokens: 8192, + }, + { + name: 'mistral-small-latest', + label: 'Mistral Small', + provider: 'Mistral', + maxTokenAllowed: 8000, + maxCompletionTokens: 8192, + }, + { + name: 'codestral-latest', + label: 'Codestral', + provider: 'Mistral', + maxTokenAllowed: 8000, + maxCompletionTokens: 8192, + }, + { + name: 'mistral-large-latest', + label: 'Mistral Large Latest', + provider: 'Mistral', + maxTokenAllowed: 8000, + maxCompletionTokens: 8192, + }, ]; getModelInstance(options: { diff --git a/app/lib/modules/llm/providers/openai.ts b/app/lib/modules/llm/providers/openai.ts index 66e7142..5792090 100644 --- a/app/lib/modules/llm/providers/openai.ts +++ b/app/lib/modules/llm/providers/openai.ts @@ -79,11 +79,26 @@ export default class OpenAIProvider extends BaseProvider { contextWindow = 16385; // GPT-3.5-turbo has 16k context } + // Determine completion token limits based on model type + let maxCompletionTokens = 16384; // default for most models + + if (m.id?.startsWith('o1-preview') || m.id?.startsWith('o1-mini') || m.id?.startsWith('o1')) { + // Reasoning models have specific completion limits + maxCompletionTokens = m.id?.includes('mini') ? 8192 : 16384; + } else if (m.id?.includes('gpt-4o')) { + maxCompletionTokens = 16384; + } else if (m.id?.includes('gpt-4')) { + maxCompletionTokens = 8192; + } else if (m.id?.includes('gpt-3.5-turbo')) { + maxCompletionTokens = 4096; + } + return { name: m.id, label: `${m.id} (${Math.floor(contextWindow / 1000)}k context)`, provider: this.name, maxTokenAllowed: Math.min(contextWindow, 128000), // Cap at 128k for safety + maxCompletionTokens, }; }); } diff --git a/app/lib/modules/llm/providers/together.ts b/app/lib/modules/llm/providers/together.ts index 987ba03..ba4cc5e 100644 --- a/app/lib/modules/llm/providers/together.ts +++ b/app/lib/modules/llm/providers/together.ts @@ -22,6 +22,7 @@ export default class TogetherProvider extends BaseProvider { label: 'Llama 3.2 90B Vision', provider: 'Together', maxTokenAllowed: 128000, + maxCompletionTokens: 8192, }, // Mixtral 8x7B: 32k context, strong performance @@ -30,6 +31,7 @@ export default class TogetherProvider extends BaseProvider { label: 'Mixtral 8x7B Instruct', provider: 'Together', maxTokenAllowed: 32000, + maxCompletionTokens: 8192, }, ]; @@ -67,6 +69,7 @@ export default class TogetherProvider extends BaseProvider { label: `${m.display_name} - in:$${m.pricing.input.toFixed(2)} out:$${m.pricing.output.toFixed(2)} - context ${Math.floor(m.context_length / 1000)}k`, provider: this.name, maxTokenAllowed: 8000, + maxCompletionTokens: 8192, })); }