feat: add Moonshot AI (Kimi) provider and update xAI Grok models (#1953)

- Add comprehensive Moonshot AI provider with 11 models including:
  * Legacy moonshot-v1 series (8k, 32k, 128k context)
  * Latest Kimi K2 models (K2 Preview, Turbo, Thinking)
  * Vision-enabled models for multimodal capabilities
  * Auto-selecting model variants

- Update xAI provider with latest Grok models:
  * Add Grok 4 (256K context) and Grok 4 (07-09) variant
  * Add Grok 3 Mini Beta and Mini Fast Beta variants
  * Update context limits to match actual model capabilities
  * Remove outdated grok-beta and grok-2-1212 models

- Add MOONSHOT_API_KEY to environment configuration
- Register Moonshot provider in service status monitoring
- Full OpenAI-compatible API integration via api.moonshot.ai
- Fix TypeScript errors in GitHub provider

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Stijnus
2025-08-31 18:54:14 +02:00
committed by GitHub
parent 56f5d6f68c
commit df242a7935
18 changed files with 810 additions and 192 deletions

View File

@@ -1,18 +1,19 @@
/*
* Maximum tokens for response generation (conservative default for older models)
* Modern models can handle much higher limits - specific limits are set per model
* Maximum tokens for response generation (updated for modern model capabilities)
* This serves as a fallback when model-specific limits are unavailable
* Modern models like Claude 3.5, GPT-4o, and Gemini Pro support 128k+ tokens
*/
export const MAX_TOKENS = 32000;
export const MAX_TOKENS = 128000;
/*
* Provider-specific default completion token limits
* Used as fallbacks when model doesn't specify maxCompletionTokens
*/
export const PROVIDER_COMPLETION_LIMITS: Record<string, number> = {
OpenAI: 16384,
Github: 16384, // GitHub Models use OpenAI-compatible limits
Anthropic: 128000,
Google: 32768,
OpenAI: 4096, // Standard GPT models (o1 models have much higher limits)
Github: 4096, // GitHub Models use OpenAI-compatible limits
Anthropic: 64000, // Conservative limit for Claude 4 models (Opus: 32k, Sonnet: 64k)
Google: 8192, // Gemini 1.5 Pro/Flash standard limit
Cohere: 4000,
DeepSeek: 8192,
Groq: 8192,

View File

@@ -142,11 +142,11 @@ export async function streamText(props: {
const dynamicMaxTokens = modelDetails ? getCompletionTokenLimit(modelDetails) : Math.min(MAX_TOKENS, 16384);
// Additional safety cap - should not be needed with proper completion limits, but kept for safety
const safeMaxTokens = Math.min(dynamicMaxTokens, 128000);
// Use model-specific limits directly - no artificial cap needed
const safeMaxTokens = dynamicMaxTokens;
logger.info(
`Max tokens for model ${modelDetails.name} is ${safeMaxTokens} (capped from ${dynamicMaxTokens}) based on model limits`,
`Token limits for model ${modelDetails.name}: maxTokens=${safeMaxTokens}, maxTokenAllowed=${modelDetails.maxTokenAllowed}, maxCompletionTokens=${modelDetails.maxCompletionTokens}`,
);
let systemPrompt =
@@ -221,11 +221,18 @@ export async function streamText(props: {
logger.info(`Sending llm call to ${provider.name} with model ${modelDetails.name}`);
// DEBUG: Log reasoning model detection
// Log reasoning model detection and token parameters
const isReasoning = isReasoningModel(modelDetails.name);
logger.info(`DEBUG STREAM: Model "${modelDetails.name}" detected as reasoning model: ${isReasoning}`);
logger.info(
`Model "${modelDetails.name}" is reasoning model: ${isReasoning}, using ${isReasoning ? 'maxCompletionTokens' : 'maxTokens'}: ${safeMaxTokens}`,
);
// console.log(systemPrompt, processedMessages);
// Validate token limits before API call
if (safeMaxTokens > (modelDetails.maxTokenAllowed || 128000)) {
logger.warn(
`Token limit warning: requesting ${safeMaxTokens} tokens but model supports max ${modelDetails.maxTokenAllowed || 128000}`,
);
}
// Use maxCompletionTokens for reasoning models (o1, GPT-5), maxTokens for traditional models
const tokenParams = isReasoning ? { maxCompletionTokens: safeMaxTokens } : { maxTokens: safeMaxTokens };