feat: add Moonshot AI (Kimi) provider and update xAI Grok models (#1953)

- Add comprehensive Moonshot AI provider with 11 models including:
  * Legacy moonshot-v1 series (8k, 32k, 128k context)
  * Latest Kimi K2 models (K2 Preview, Turbo, Thinking)
  * Vision-enabled models for multimodal capabilities
  * Auto-selecting model variants

- Update xAI provider with latest Grok models:
  * Add Grok 4 (256K context) and Grok 4 (07-09) variant
  * Add Grok 3 Mini Beta and Mini Fast Beta variants
  * Update context limits to match actual model capabilities
  * Remove outdated grok-beta and grok-2-1212 models

- Add MOONSHOT_API_KEY to environment configuration
- Register Moonshot provider in service status monitoring
- Full OpenAI-compatible API integration via api.moonshot.ai
- Fix TypeScript errors in GitHub provider

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Stijnus
2025-08-31 18:54:14 +02:00
committed by GitHub
parent 56f5d6f68c
commit df242a7935
18 changed files with 810 additions and 192 deletions

View File

@@ -41,6 +41,29 @@ function getCompletionTokenLimit(modelDetails: ModelInfo): number {
return Math.min(MAX_TOKENS, 16384);
}
function validateTokenLimits(modelDetails: ModelInfo, requestedTokens: number): { valid: boolean; error?: string } {
const modelMaxTokens = modelDetails.maxTokenAllowed || 128000;
const maxCompletionTokens = getCompletionTokenLimit(modelDetails);
// Check against model's context window
if (requestedTokens > modelMaxTokens) {
return {
valid: false,
error: `Requested tokens (${requestedTokens}) exceed model's context window (${modelMaxTokens}). Please reduce your request size.`,
};
}
// Check against completion token limits
if (requestedTokens > maxCompletionTokens) {
return {
valid: false,
error: `Requested tokens (${requestedTokens}) exceed model's completion limit (${maxCompletionTokens}). Consider using a model with higher token limits.`,
};
}
return { valid: true };
}
async function llmCallAction({ context, request }: ActionFunctionArgs) {
const { system, message, model, provider, streamOutput } = await request.json<{
system: string;
@@ -104,6 +127,23 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {
});
}
// Handle token limit errors with helpful messages
if (
error instanceof Error &&
(error.message?.includes('max_tokens') ||
error.message?.includes('token') ||
error.message?.includes('exceeds') ||
error.message?.includes('maximum'))
) {
throw new Response(
`Token limit error: ${error.message}. Try reducing your request size or using a model with higher token limits.`,
{
status: 400,
statusText: 'Token Limit Exceeded',
},
);
}
throw new Response(null, {
status: 500,
statusText: 'Internal Server Error',
@@ -120,6 +160,16 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {
const dynamicMaxTokens = modelDetails ? getCompletionTokenLimit(modelDetails) : Math.min(MAX_TOKENS, 16384);
// Validate token limits before making API request
const validation = validateTokenLimits(modelDetails, dynamicMaxTokens);
if (!validation.valid) {
throw new Response(validation.error, {
status: 400,
statusText: 'Token Limit Exceeded',
});
}
const providerInfo = PROVIDER_LIST.find((p) => p.name === provider.name);
if (!providerInfo) {
@@ -215,6 +265,29 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {
);
}
// Handle token limit errors with helpful messages
if (
error instanceof Error &&
(error.message?.includes('max_tokens') ||
error.message?.includes('token') ||
error.message?.includes('exceeds') ||
error.message?.includes('maximum'))
) {
return new Response(
JSON.stringify({
...errorResponse,
message: `Token limit error: ${error.message}. Try reducing your request size or using a model with higher token limits.`,
statusCode: 400,
isRetryable: false,
}),
{
status: 400,
headers: { 'Content-Type': 'application/json' },
statusText: 'Token Limit Exceeded',
},
);
}
return new Response(JSON.stringify(errorResponse), {
status: errorResponse.statusCode,
headers: { 'Content-Type': 'application/json' },