feat: add Moonshot AI (Kimi) provider and update xAI Grok models (#1953)
- Add comprehensive Moonshot AI provider with 11 models including: * Legacy moonshot-v1 series (8k, 32k, 128k context) * Latest Kimi K2 models (K2 Preview, Turbo, Thinking) * Vision-enabled models for multimodal capabilities * Auto-selecting model variants - Update xAI provider with latest Grok models: * Add Grok 4 (256K context) and Grok 4 (07-09) variant * Add Grok 3 Mini Beta and Mini Fast Beta variants * Update context limits to match actual model capabilities * Remove outdated grok-beta and grok-2-1212 models - Add MOONSHOT_API_KEY to environment configuration - Register Moonshot provider in service status monitoring - Full OpenAI-compatible API integration via api.moonshot.ai - Fix TypeScript errors in GitHub provider 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -41,6 +41,29 @@ function getCompletionTokenLimit(modelDetails: ModelInfo): number {
|
||||
return Math.min(MAX_TOKENS, 16384);
|
||||
}
|
||||
|
||||
function validateTokenLimits(modelDetails: ModelInfo, requestedTokens: number): { valid: boolean; error?: string } {
|
||||
const modelMaxTokens = modelDetails.maxTokenAllowed || 128000;
|
||||
const maxCompletionTokens = getCompletionTokenLimit(modelDetails);
|
||||
|
||||
// Check against model's context window
|
||||
if (requestedTokens > modelMaxTokens) {
|
||||
return {
|
||||
valid: false,
|
||||
error: `Requested tokens (${requestedTokens}) exceed model's context window (${modelMaxTokens}). Please reduce your request size.`,
|
||||
};
|
||||
}
|
||||
|
||||
// Check against completion token limits
|
||||
if (requestedTokens > maxCompletionTokens) {
|
||||
return {
|
||||
valid: false,
|
||||
error: `Requested tokens (${requestedTokens}) exceed model's completion limit (${maxCompletionTokens}). Consider using a model with higher token limits.`,
|
||||
};
|
||||
}
|
||||
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
async function llmCallAction({ context, request }: ActionFunctionArgs) {
|
||||
const { system, message, model, provider, streamOutput } = await request.json<{
|
||||
system: string;
|
||||
@@ -104,6 +127,23 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {
|
||||
});
|
||||
}
|
||||
|
||||
// Handle token limit errors with helpful messages
|
||||
if (
|
||||
error instanceof Error &&
|
||||
(error.message?.includes('max_tokens') ||
|
||||
error.message?.includes('token') ||
|
||||
error.message?.includes('exceeds') ||
|
||||
error.message?.includes('maximum'))
|
||||
) {
|
||||
throw new Response(
|
||||
`Token limit error: ${error.message}. Try reducing your request size or using a model with higher token limits.`,
|
||||
{
|
||||
status: 400,
|
||||
statusText: 'Token Limit Exceeded',
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
throw new Response(null, {
|
||||
status: 500,
|
||||
statusText: 'Internal Server Error',
|
||||
@@ -120,6 +160,16 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {
|
||||
|
||||
const dynamicMaxTokens = modelDetails ? getCompletionTokenLimit(modelDetails) : Math.min(MAX_TOKENS, 16384);
|
||||
|
||||
// Validate token limits before making API request
|
||||
const validation = validateTokenLimits(modelDetails, dynamicMaxTokens);
|
||||
|
||||
if (!validation.valid) {
|
||||
throw new Response(validation.error, {
|
||||
status: 400,
|
||||
statusText: 'Token Limit Exceeded',
|
||||
});
|
||||
}
|
||||
|
||||
const providerInfo = PROVIDER_LIST.find((p) => p.name === provider.name);
|
||||
|
||||
if (!providerInfo) {
|
||||
@@ -215,6 +265,29 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {
|
||||
);
|
||||
}
|
||||
|
||||
// Handle token limit errors with helpful messages
|
||||
if (
|
||||
error instanceof Error &&
|
||||
(error.message?.includes('max_tokens') ||
|
||||
error.message?.includes('token') ||
|
||||
error.message?.includes('exceeds') ||
|
||||
error.message?.includes('maximum'))
|
||||
) {
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
...errorResponse,
|
||||
message: `Token limit error: ${error.message}. Try reducing your request size or using a model with higher token limits.`,
|
||||
statusCode: 400,
|
||||
isRetryable: false,
|
||||
}),
|
||||
{
|
||||
status: 400,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
statusText: 'Token Limit Exceeded',
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
return new Response(JSON.stringify(errorResponse), {
|
||||
status: errorResponse.statusCode,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
|
||||
Reference in New Issue
Block a user