fix: maxCompletionTokens Implementation for All Providers (#1938)
* Update LLM providers and constants - Updated constants in app/lib/.server/llm/constants.ts - Modified stream-text functionality in app/lib/.server/llm/stream-text.ts - Updated Anthropic provider in app/lib/modules/llm/providers/anthropic.ts - Modified GitHub provider in app/lib/modules/llm/providers/github.ts - Updated Google provider in app/lib/modules/llm/providers/google.ts - Modified OpenAI provider in app/lib/modules/llm/providers/openai.ts - Updated LLM types in app/lib/modules/llm/types.ts - Modified API route in app/routes/api.llmcall.ts * Fix maxCompletionTokens Implementation for All Providers - Cohere: Added maxCompletionTokens: 4000 to all 10 static models - DeepSeek: Added maxCompletionTokens: 8192 to all 3 static models - Groq: Added maxCompletionTokens: 8192 to both static models - Mistral: Added maxCompletionTokens: 8192 to all 9 static models - Together: Added maxCompletionTokens: 8192 to both static models - Groq: Fixed getDynamicModels to include maxCompletionTokens: 8192 - Together: Fixed getDynamicModels to include maxCompletionTokens: 8192 - OpenAI: Fixed getDynamicModels with proper logic for reasoning models (o1: 16384, o1-mini: 8192) and standard models
This commit is contained in:
@@ -13,16 +13,64 @@ export default class CohereProvider extends BaseProvider {
|
|||||||
};
|
};
|
||||||
|
|
||||||
staticModels: ModelInfo[] = [
|
staticModels: ModelInfo[] = [
|
||||||
{ name: 'command-r-plus-08-2024', label: 'Command R plus Latest', provider: 'Cohere', maxTokenAllowed: 4096 },
|
{
|
||||||
{ name: 'command-r-08-2024', label: 'Command R Latest', provider: 'Cohere', maxTokenAllowed: 4096 },
|
name: 'command-r-plus-08-2024',
|
||||||
{ name: 'command-r-plus', label: 'Command R plus', provider: 'Cohere', maxTokenAllowed: 4096 },
|
label: 'Command R plus Latest',
|
||||||
{ name: 'command-r', label: 'Command R', provider: 'Cohere', maxTokenAllowed: 4096 },
|
provider: 'Cohere',
|
||||||
{ name: 'command', label: 'Command', provider: 'Cohere', maxTokenAllowed: 4096 },
|
maxTokenAllowed: 4096,
|
||||||
{ name: 'command-nightly', label: 'Command Nightly', provider: 'Cohere', maxTokenAllowed: 4096 },
|
maxCompletionTokens: 4000,
|
||||||
{ name: 'command-light', label: 'Command Light', provider: 'Cohere', maxTokenAllowed: 4096 },
|
},
|
||||||
{ name: 'command-light-nightly', label: 'Command Light Nightly', provider: 'Cohere', maxTokenAllowed: 4096 },
|
{
|
||||||
{ name: 'c4ai-aya-expanse-8b', label: 'c4AI Aya Expanse 8b', provider: 'Cohere', maxTokenAllowed: 4096 },
|
name: 'command-r-08-2024',
|
||||||
{ name: 'c4ai-aya-expanse-32b', label: 'c4AI Aya Expanse 32b', provider: 'Cohere', maxTokenAllowed: 4096 },
|
label: 'Command R Latest',
|
||||||
|
provider: 'Cohere',
|
||||||
|
maxTokenAllowed: 4096,
|
||||||
|
maxCompletionTokens: 4000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'command-r-plus',
|
||||||
|
label: 'Command R plus',
|
||||||
|
provider: 'Cohere',
|
||||||
|
maxTokenAllowed: 4096,
|
||||||
|
maxCompletionTokens: 4000,
|
||||||
|
},
|
||||||
|
{ name: 'command-r', label: 'Command R', provider: 'Cohere', maxTokenAllowed: 4096, maxCompletionTokens: 4000 },
|
||||||
|
{ name: 'command', label: 'Command', provider: 'Cohere', maxTokenAllowed: 4096, maxCompletionTokens: 4000 },
|
||||||
|
{
|
||||||
|
name: 'command-nightly',
|
||||||
|
label: 'Command Nightly',
|
||||||
|
provider: 'Cohere',
|
||||||
|
maxTokenAllowed: 4096,
|
||||||
|
maxCompletionTokens: 4000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'command-light',
|
||||||
|
label: 'Command Light',
|
||||||
|
provider: 'Cohere',
|
||||||
|
maxTokenAllowed: 4096,
|
||||||
|
maxCompletionTokens: 4000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'command-light-nightly',
|
||||||
|
label: 'Command Light Nightly',
|
||||||
|
provider: 'Cohere',
|
||||||
|
maxTokenAllowed: 4096,
|
||||||
|
maxCompletionTokens: 4000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'c4ai-aya-expanse-8b',
|
||||||
|
label: 'c4AI Aya Expanse 8b',
|
||||||
|
provider: 'Cohere',
|
||||||
|
maxTokenAllowed: 4096,
|
||||||
|
maxCompletionTokens: 4000,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'c4ai-aya-expanse-32b',
|
||||||
|
label: 'c4AI Aya Expanse 32b',
|
||||||
|
provider: 'Cohere',
|
||||||
|
maxTokenAllowed: 4096,
|
||||||
|
maxCompletionTokens: 4000,
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
getModelInstance(options: {
|
getModelInstance(options: {
|
||||||
|
|||||||
@@ -13,9 +13,27 @@ export default class DeepseekProvider extends BaseProvider {
|
|||||||
};
|
};
|
||||||
|
|
||||||
staticModels: ModelInfo[] = [
|
staticModels: ModelInfo[] = [
|
||||||
{ name: 'deepseek-coder', label: 'Deepseek-Coder', provider: 'Deepseek', maxTokenAllowed: 8000 },
|
{
|
||||||
{ name: 'deepseek-chat', label: 'Deepseek-Chat', provider: 'Deepseek', maxTokenAllowed: 8000 },
|
name: 'deepseek-coder',
|
||||||
{ name: 'deepseek-reasoner', label: 'Deepseek-Reasoner', provider: 'Deepseek', maxTokenAllowed: 8000 },
|
label: 'Deepseek-Coder',
|
||||||
|
provider: 'Deepseek',
|
||||||
|
maxTokenAllowed: 8000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'deepseek-chat',
|
||||||
|
label: 'Deepseek-Chat',
|
||||||
|
provider: 'Deepseek',
|
||||||
|
maxTokenAllowed: 8000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'deepseek-reasoner',
|
||||||
|
label: 'Deepseek-Reasoner',
|
||||||
|
provider: 'Deepseek',
|
||||||
|
maxTokenAllowed: 8000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
getModelInstance(options: {
|
getModelInstance(options: {
|
||||||
|
|||||||
@@ -17,7 +17,13 @@ export default class GroqProvider extends BaseProvider {
|
|||||||
* Essential fallback models - only the most stable/reliable ones
|
* Essential fallback models - only the most stable/reliable ones
|
||||||
* Llama 3.1 8B: 128k context, fast and efficient
|
* Llama 3.1 8B: 128k context, fast and efficient
|
||||||
*/
|
*/
|
||||||
{ name: 'llama-3.1-8b-instant', label: 'Llama 3.1 8B', provider: 'Groq', maxTokenAllowed: 128000 },
|
{
|
||||||
|
name: 'llama-3.1-8b-instant',
|
||||||
|
label: 'Llama 3.1 8B',
|
||||||
|
provider: 'Groq',
|
||||||
|
maxTokenAllowed: 128000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
|
},
|
||||||
|
|
||||||
// Llama 3.3 70B: 128k context, most capable model
|
// Llama 3.3 70B: 128k context, most capable model
|
||||||
{
|
{
|
||||||
@@ -25,6 +31,7 @@ export default class GroqProvider extends BaseProvider {
|
|||||||
label: 'Llama 3.3 70B',
|
label: 'Llama 3.3 70B',
|
||||||
provider: 'Groq',
|
provider: 'Groq',
|
||||||
maxTokenAllowed: 128000,
|
maxTokenAllowed: 128000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
@@ -62,6 +69,7 @@ export default class GroqProvider extends BaseProvider {
|
|||||||
label: `${m.id} - context ${m.context_window ? Math.floor(m.context_window / 1000) + 'k' : 'N/A'} [ by ${m.owned_by}]`,
|
label: `${m.id} - context ${m.context_window ? Math.floor(m.context_window / 1000) + 'k' : 'N/A'} [ by ${m.owned_by}]`,
|
||||||
provider: this.name,
|
provider: this.name,
|
||||||
maxTokenAllowed: Math.min(m.context_window || 8192, 16384),
|
maxTokenAllowed: Math.min(m.context_window || 8192, 16384),
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -13,15 +13,69 @@ export default class MistralProvider extends BaseProvider {
|
|||||||
};
|
};
|
||||||
|
|
||||||
staticModels: ModelInfo[] = [
|
staticModels: ModelInfo[] = [
|
||||||
{ name: 'open-mistral-7b', label: 'Mistral 7B', provider: 'Mistral', maxTokenAllowed: 8000 },
|
{
|
||||||
{ name: 'open-mixtral-8x7b', label: 'Mistral 8x7B', provider: 'Mistral', maxTokenAllowed: 8000 },
|
name: 'open-mistral-7b',
|
||||||
{ name: 'open-mixtral-8x22b', label: 'Mistral 8x22B', provider: 'Mistral', maxTokenAllowed: 8000 },
|
label: 'Mistral 7B',
|
||||||
{ name: 'open-codestral-mamba', label: 'Codestral Mamba', provider: 'Mistral', maxTokenAllowed: 8000 },
|
provider: 'Mistral',
|
||||||
{ name: 'open-mistral-nemo', label: 'Mistral Nemo', provider: 'Mistral', maxTokenAllowed: 8000 },
|
maxTokenAllowed: 8000,
|
||||||
{ name: 'ministral-8b-latest', label: 'Mistral 8B', provider: 'Mistral', maxTokenAllowed: 8000 },
|
maxCompletionTokens: 8192,
|
||||||
{ name: 'mistral-small-latest', label: 'Mistral Small', provider: 'Mistral', maxTokenAllowed: 8000 },
|
},
|
||||||
{ name: 'codestral-latest', label: 'Codestral', provider: 'Mistral', maxTokenAllowed: 8000 },
|
{
|
||||||
{ name: 'mistral-large-latest', label: 'Mistral Large Latest', provider: 'Mistral', maxTokenAllowed: 8000 },
|
name: 'open-mixtral-8x7b',
|
||||||
|
label: 'Mistral 8x7B',
|
||||||
|
provider: 'Mistral',
|
||||||
|
maxTokenAllowed: 8000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'open-mixtral-8x22b',
|
||||||
|
label: 'Mistral 8x22B',
|
||||||
|
provider: 'Mistral',
|
||||||
|
maxTokenAllowed: 8000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'open-codestral-mamba',
|
||||||
|
label: 'Codestral Mamba',
|
||||||
|
provider: 'Mistral',
|
||||||
|
maxTokenAllowed: 8000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'open-mistral-nemo',
|
||||||
|
label: 'Mistral Nemo',
|
||||||
|
provider: 'Mistral',
|
||||||
|
maxTokenAllowed: 8000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'ministral-8b-latest',
|
||||||
|
label: 'Mistral 8B',
|
||||||
|
provider: 'Mistral',
|
||||||
|
maxTokenAllowed: 8000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'mistral-small-latest',
|
||||||
|
label: 'Mistral Small',
|
||||||
|
provider: 'Mistral',
|
||||||
|
maxTokenAllowed: 8000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'codestral-latest',
|
||||||
|
label: 'Codestral',
|
||||||
|
provider: 'Mistral',
|
||||||
|
maxTokenAllowed: 8000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'mistral-large-latest',
|
||||||
|
label: 'Mistral Large Latest',
|
||||||
|
provider: 'Mistral',
|
||||||
|
maxTokenAllowed: 8000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
getModelInstance(options: {
|
getModelInstance(options: {
|
||||||
|
|||||||
@@ -79,11 +79,26 @@ export default class OpenAIProvider extends BaseProvider {
|
|||||||
contextWindow = 16385; // GPT-3.5-turbo has 16k context
|
contextWindow = 16385; // GPT-3.5-turbo has 16k context
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Determine completion token limits based on model type
|
||||||
|
let maxCompletionTokens = 16384; // default for most models
|
||||||
|
|
||||||
|
if (m.id?.startsWith('o1-preview') || m.id?.startsWith('o1-mini') || m.id?.startsWith('o1')) {
|
||||||
|
// Reasoning models have specific completion limits
|
||||||
|
maxCompletionTokens = m.id?.includes('mini') ? 8192 : 16384;
|
||||||
|
} else if (m.id?.includes('gpt-4o')) {
|
||||||
|
maxCompletionTokens = 16384;
|
||||||
|
} else if (m.id?.includes('gpt-4')) {
|
||||||
|
maxCompletionTokens = 8192;
|
||||||
|
} else if (m.id?.includes('gpt-3.5-turbo')) {
|
||||||
|
maxCompletionTokens = 4096;
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
name: m.id,
|
name: m.id,
|
||||||
label: `${m.id} (${Math.floor(contextWindow / 1000)}k context)`,
|
label: `${m.id} (${Math.floor(contextWindow / 1000)}k context)`,
|
||||||
provider: this.name,
|
provider: this.name,
|
||||||
maxTokenAllowed: Math.min(contextWindow, 128000), // Cap at 128k for safety
|
maxTokenAllowed: Math.min(contextWindow, 128000), // Cap at 128k for safety
|
||||||
|
maxCompletionTokens,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ export default class TogetherProvider extends BaseProvider {
|
|||||||
label: 'Llama 3.2 90B Vision',
|
label: 'Llama 3.2 90B Vision',
|
||||||
provider: 'Together',
|
provider: 'Together',
|
||||||
maxTokenAllowed: 128000,
|
maxTokenAllowed: 128000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
},
|
},
|
||||||
|
|
||||||
// Mixtral 8x7B: 32k context, strong performance
|
// Mixtral 8x7B: 32k context, strong performance
|
||||||
@@ -30,6 +31,7 @@ export default class TogetherProvider extends BaseProvider {
|
|||||||
label: 'Mixtral 8x7B Instruct',
|
label: 'Mixtral 8x7B Instruct',
|
||||||
provider: 'Together',
|
provider: 'Together',
|
||||||
maxTokenAllowed: 32000,
|
maxTokenAllowed: 32000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
@@ -67,6 +69,7 @@ export default class TogetherProvider extends BaseProvider {
|
|||||||
label: `${m.display_name} - in:$${m.pricing.input.toFixed(2)} out:$${m.pricing.output.toFixed(2)} - context ${Math.floor(m.context_length / 1000)}k`,
|
label: `${m.display_name} - in:$${m.pricing.input.toFixed(2)} out:$${m.pricing.output.toFixed(2)} - context ${Math.floor(m.context_length / 1000)}k`,
|
||||||
provider: this.name,
|
provider: this.name,
|
||||||
maxTokenAllowed: 8000,
|
maxTokenAllowed: 8000,
|
||||||
|
maxCompletionTokens: 8192,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user