feat: add Moonshot AI (Kimi) provider and update xAI Grok models (#1953)

- Add comprehensive Moonshot AI provider with 11 models including: * Legacy moonshot-v1 series (8k, 32k, 128k context) * Latest Kimi K2 models (K2 Preview, Turbo, Thinking) * Vision-enabled models for multimodal capabilities * Auto-selecting model variants - Update xAI provider with latest Grok models: * Add Grok 4 (256K context) and Grok 4 (07-09) variant * Add Grok 3 Mini Beta and Mini Fast Beta variants * Update context limits to match actual model capabilities * Remove outdated grok-beta and grok-2-1212 models - Add MOONSHOT_API_KEY to environment configuration - Register Moonshot provider in service status monitoring - Full OpenAI-compatible API integration via api.moonshot.ai - Fix TypeScript errors in GitHub provider 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Claude <noreply@anthropic.com>
2025-08-31 18:54:14 +02:00
parent 56f5d6f68c
commit df242a7935
18 changed files with 810 additions and 192 deletions
--- a/app/routes/api.llmcall.ts
+++ b/app/routes/api.llmcall.ts
@@ -41,6 +41,29 @@ function getCompletionTokenLimit(modelDetails: ModelInfo): number {
  return Math.min(MAX_TOKENS, 16384);
 }

+function validateTokenLimits(modelDetails: ModelInfo, requestedTokens: number): { valid: boolean; error?: string } {
+  const modelMaxTokens = modelDetails.maxTokenAllowed || 128000;
+  const maxCompletionTokens = getCompletionTokenLimit(modelDetails);
+
+  // Check against model's context window
+  if (requestedTokens > modelMaxTokens) {
+    return {
+      valid: false,
+      error: `Requested tokens (${requestedTokens}) exceed model's context window (${modelMaxTokens}). Please reduce your request size.`,
+    };
+  }
+
+  // Check against completion token limits
+  if (requestedTokens > maxCompletionTokens) {
+    return {
+      valid: false,
+      error: `Requested tokens (${requestedTokens}) exceed model's completion limit (${maxCompletionTokens}). Consider using a model with higher token limits.`,
+    };
+  }
+
+  return { valid: true };
+}
+
 async function llmCallAction({ context, request }: ActionFunctionArgs) {
  const { system, message, model, provider, streamOutput } = await request.json<{
    system: string;
@@ -104,6 +127,23 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {
        });
      }

+      // Handle token limit errors with helpful messages
+      if (
+        error instanceof Error &&
+        (error.message?.includes('max_tokens') ||
+          error.message?.includes('token') ||
+          error.message?.includes('exceeds') ||
+          error.message?.includes('maximum'))
+      ) {
+        throw new Response(
+          `Token limit error: ${error.message}. Try reducing your request size or using a model with higher token limits.`,
+          {
+            status: 400,
+            statusText: 'Token Limit Exceeded',
+          },
+        );
+      }
+
      throw new Response(null, {
        status: 500,
        statusText: 'Internal Server Error',
@@ -120,6 +160,16 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {

      const dynamicMaxTokens = modelDetails ? getCompletionTokenLimit(modelDetails) : Math.min(MAX_TOKENS, 16384);

+      // Validate token limits before making API request
+      const validation = validateTokenLimits(modelDetails, dynamicMaxTokens);
+
+      if (!validation.valid) {
+        throw new Response(validation.error, {
+          status: 400,
+          statusText: 'Token Limit Exceeded',
+        });
+      }
+
      const providerInfo = PROVIDER_LIST.find((p) => p.name === provider.name);

      if (!providerInfo) {
@@ -215,6 +265,29 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) {
        );
      }

+      // Handle token limit errors with helpful messages
+      if (
+        error instanceof Error &&
+        (error.message?.includes('max_tokens') ||
+          error.message?.includes('token') ||
+          error.message?.includes('exceeds') ||
+          error.message?.includes('maximum'))
+      ) {
+        return new Response(
+          JSON.stringify({
+            ...errorResponse,
+            message: `Token limit error: ${error.message}. Try reducing your request size or using a model with higher token limits.`,
+            statusCode: 400,
+            isRetryable: false,
+          }),
+          {
+            status: 400,
+            headers: { 'Content-Type': 'application/json' },
+            statusText: 'Token Limit Exceeded',
+          },
+        );
+      }
+
      return new Response(JSON.stringify(errorResponse), {
        status: errorResponse.statusCode,
        headers: { 'Content-Type': 'application/json' },