From df242a79356756b15dc0d734cff3b064523bb4ae Mon Sep 17 00:00:00 2001 From: Stijnus <72551117+Stijnus@users.noreply.github.com> Date: Sun, 31 Aug 2025 18:54:14 +0200 Subject: [PATCH] feat: add Moonshot AI (Kimi) provider and update xAI Grok models (#1953) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add comprehensive Moonshot AI provider with 11 models including: * Legacy moonshot-v1 series (8k, 32k, 128k context) * Latest Kimi K2 models (K2 Preview, Turbo, Thinking) * Vision-enabled models for multimodal capabilities * Auto-selecting model variants - Update xAI provider with latest Grok models: * Add Grok 4 (256K context) and Grok 4 (07-09) variant * Add Grok 3 Mini Beta and Mini Fast Beta variants * Update context limits to match actual model capabilities * Remove outdated grok-beta and grok-2-1212 models - Add MOONSHOT_API_KEY to environment configuration - Register Moonshot provider in service status monitoring - Full OpenAI-compatible API integration via api.moonshot.ai - Fix TypeScript errors in GitHub provider 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Claude --- .env.example | 219 ++++++------ .../providers/cloud/CloudProvidersTab.tsx | 5 +- .../service-status/provider-factory.ts | 9 + .../service-status/providers/moonshot.ts | 37 ++ .../tabs/providers/service-status/types.ts | 1 + app/components/chat/ModelSelector.tsx | 332 ++++++++++++++++-- app/lib/.server/llm/constants.ts | 15 +- app/lib/.server/llm/stream-text.ts | 19 +- app/lib/modules/llm/providers/anthropic.ts | 22 +- app/lib/modules/llm/providers/github.ts | 116 +++++- app/lib/modules/llm/providers/google.ts | 12 +- app/lib/modules/llm/providers/moonshot.ts | 71 ++++ app/lib/modules/llm/providers/openai.ts | 46 ++- app/lib/modules/llm/providers/perplexity.ts | 12 +- app/lib/modules/llm/providers/xai.ts | 8 +- app/lib/modules/llm/registry.ts | 2 + app/routes/api.llmcall.ts | 73 ++++ vite.config.ts | 3 + 18 files changed, 810 insertions(+), 192 deletions(-) create mode 100644 app/components/@settings/tabs/providers/service-status/providers/moonshot.ts create mode 100644 app/lib/modules/llm/providers/moonshot.ts diff --git a/.env.example b/.env.example index 4b333be..35fe67e 100644 --- a/.env.example +++ b/.env.example @@ -1,131 +1,142 @@ -# Rename this file to .env once you have filled in the below environment variables! +# ====================================== +# Environment Variables for Bolt.diy +# ====================================== +# Copy this file to .env.local and fill in your API keys +# See README.md for setup instructions -# Get your GROQ API Key here - -# https://console.groq.com/keys -# You only need this environment variable set if you want to use Groq models -GROQ_API_KEY= +# ====================================== +# AI PROVIDER API KEYS +# ====================================== -# Get your HuggingFace API Key here - -# https://huggingface.co/settings/tokens -# You only need this environment variable set if you want to use HuggingFace models -HuggingFace_API_KEY= +# Anthropic Claude +# Get your API key from: https://console.anthropic.com/ +ANTHROPIC_API_KEY=your_anthropic_api_key_here +# OpenAI GPT models +# Get your API key from: https://platform.openai.com/api-keys +OPENAI_API_KEY=your_openai_api_key_here -# Get your Open AI API Key by following these instructions - -# https://help.openai.com/en/articles/4936850-where-do-i-find-my-openai-api-key -# You only need this environment variable set if you want to use GPT models -OPENAI_API_KEY= +# GitHub Models (OpenAI models hosted by GitHub) +# Get your Personal Access Token from: https://github.com/settings/tokens +# - Select "Fine-grained tokens" +# - Set repository access to "All repositories" +# - Enable "GitHub Models" permission +GITHUB_API_KEY=github_pat_your_personal_access_token_here -# Get your Anthropic API Key in your account settings - -# https://console.anthropic.com/settings/keys -# You only need this environment variable set if you want to use Claude models -ANTHROPIC_API_KEY= +# Perplexity AI (Search-augmented models) +# Get your API key from: https://www.perplexity.ai/settings/api +PERPLEXITY_API_KEY=your_perplexity_api_key_here -# Get your OpenRouter API Key in your account settings - -# https://openrouter.ai/settings/keys -# You only need this environment variable set if you want to use OpenRouter models -OPEN_ROUTER_API_KEY= +# DeepSeek +# Get your API key from: https://platform.deepseek.com/api_keys +DEEPSEEK_API_KEY=your_deepseek_api_key_here -# Get your Google Generative AI API Key by following these instructions - -# https://console.cloud.google.com/apis/credentials -# You only need this environment variable set if you want to use Google Generative AI models -GOOGLE_GENERATIVE_AI_API_KEY= +# Google Gemini +# Get your API key from: https://makersuite.google.com/app/apikey +GOOGLE_GENERATIVE_AI_API_KEY=your_google_gemini_api_key_here -# You only need this environment variable set if you want to use oLLAMA models -# DONT USE http://localhost:11434 due to IPV6 issues -# USE EXAMPLE http://127.0.0.1:11434 -OLLAMA_API_BASE_URL= +# Cohere +# Get your API key from: https://dashboard.cohere.ai/api-keys +COHERE_API_KEY=your_cohere_api_key_here -# You only need this environment variable set if you want to use OpenAI Like models -OPENAI_LIKE_API_BASE_URL= +# Groq (Fast inference) +# Get your API key from: https://console.groq.com/keys +GROQ_API_KEY=your_groq_api_key_here -# You only need this environment variable set if you want to use Together AI models -TOGETHER_API_BASE_URL= +# Mistral +# Get your API key from: https://console.mistral.ai/api-keys/ +MISTRAL_API_KEY=your_mistral_api_key_here -# You only need this environment variable set if you want to use DeepSeek models through their API -DEEPSEEK_API_KEY= +# Together AI +# Get your API key from: https://api.together.xyz/settings/api-keys +TOGETHER_API_KEY=your_together_api_key_here -# Get your OpenAI Like API Key -OPENAI_LIKE_API_KEY= +# X.AI (Elon Musk's company) +# Get your API key from: https://console.x.ai/ +XAI_API_KEY=your_xai_api_key_here -# Get your Together API Key -TOGETHER_API_KEY= +# Moonshot AI (Kimi models) +# Get your API key from: https://platform.moonshot.ai/console/api-keys +MOONSHOT_API_KEY=your_moonshot_api_key_here -# You only need this environment variable set if you want to use Hyperbolic models -#Get your Hyperbolics API Key at https://app.hyperbolic.xyz/settings -#baseURL="https://api.hyperbolic.xyz/v1/chat/completions" -HYPERBOLIC_API_KEY= -HYPERBOLIC_API_BASE_URL= +# Hugging Face +# Get your API key from: https://huggingface.co/settings/tokens +HuggingFace_API_KEY=your_huggingface_api_key_here -# Get your Mistral API Key by following these instructions - -# https://console.mistral.ai/api-keys/ -# You only need this environment variable set if you want to use Mistral models -MISTRAL_API_KEY= +# Hyperbolic +# Get your API key from: https://app.hyperbolic.xyz/settings +HYPERBOLIC_API_KEY=your_hyperbolic_api_key_here -# Get the Cohere Api key by following these instructions - -# https://dashboard.cohere.com/api-keys -# You only need this environment variable set if you want to use Cohere models -COHERE_API_KEY= +# OpenRouter (Meta routing for multiple providers) +# Get your API key from: https://openrouter.ai/keys +OPEN_ROUTER_API_KEY=your_openrouter_api_key_here -# Get LMStudio Base URL from LM Studio Developer Console -# Make sure to enable CORS -# DONT USE http://localhost:1234 due to IPV6 issues -# Example: http://127.0.0.1:1234 -LMSTUDIO_API_BASE_URL= +# ====================================== +# CUSTOM PROVIDER BASE URLS (Optional) +# ====================================== -# Get your xAI API key -# https://x.ai/api -# You only need this environment variable set if you want to use xAI models -XAI_API_KEY= +# Ollama (Local models) +# DON'T USE http://localhost:11434 due to IPv6 issues +# USE: http://127.0.0.1:11434 +OLLAMA_API_BASE_URL=http://127.0.0.1:11434 -# Get your Perplexity API Key here - -# https://www.perplexity.ai/settings/api -# You only need this environment variable set if you want to use Perplexity models -PERPLEXITY_API_KEY= +# OpenAI-like API (Compatible providers) +OPENAI_LIKE_API_BASE_URL=your_openai_like_base_url_here +OPENAI_LIKE_API_KEY=your_openai_like_api_key_here -# Get your AWS configuration -# https://console.aws.amazon.com/iam/home -# The JSON should include the following keys: -# - region: The AWS region where Bedrock is available. -# - accessKeyId: Your AWS access key ID. -# - secretAccessKey: Your AWS secret access key. -# - sessionToken (optional): Temporary session token if using an IAM role or temporary credentials. -# Example JSON: -# {"region": "us-east-1", "accessKeyId": "yourAccessKeyId", "secretAccessKey": "yourSecretAccessKey", "sessionToken": "yourSessionToken"} -AWS_BEDROCK_CONFIG= +# Together AI Base URL +TOGETHER_API_BASE_URL=your_together_base_url_here -# Include this environment variable if you want more logging for debugging locally -VITE_LOG_LEVEL=debug +# Hyperbolic Base URL +HYPERBOLIC_API_BASE_URL=https://api.hyperbolic.xyz/v1/chat/completions -# Get your GitHub Personal Access Token here - -# https://github.com/settings/tokens -# This token is used for: -# 1. Importing/cloning GitHub repositories without rate limiting -# 2. Accessing private repositories -# 3. Automatic GitHub authentication (no need to manually connect in the UI) -# -# For classic tokens, ensure it has these scopes: repo, read:org, read:user -# For fine-grained tokens, ensure it has Repository and Organization access -VITE_GITHUB_ACCESS_TOKEN= +# LMStudio (Local models) +# Make sure to enable CORS in LMStudio +# DON'T USE http://localhost:1234 due to IPv6 issues +# USE: http://127.0.0.1:1234 +LMSTUDIO_API_BASE_URL=http://127.0.0.1:1234 -# Specify the type of GitHub token you're using -# Can be 'classic' or 'fine-grained' -# Classic tokens are recommended for broader access +# ====================================== +# CLOUD SERVICES CONFIGURATION +# ====================================== + +# AWS Bedrock Configuration (JSON format) +# Get your credentials from: https://console.aws.amazon.com/iam/home +# Example: {"region": "us-east-1", "accessKeyId": "yourAccessKeyId", "secretAccessKey": "yourSecretAccessKey"} +AWS_BEDROCK_CONFIG=your_aws_bedrock_config_json_here + +# ====================================== +# GITHUB INTEGRATION +# ====================================== + +# GitHub Personal Access Token +# Get from: https://github.com/settings/tokens +# Used for importing/cloning repositories and accessing private repos +VITE_GITHUB_ACCESS_TOKEN=your_github_personal_access_token_here + +# GitHub Token Type ('classic' or 'fine-grained') VITE_GITHUB_TOKEN_TYPE=classic -# Bug Report Configuration (Server-side only) -# GitHub token for creating bug reports - requires 'public_repo' scope -# This token should be configured on the server/deployment environment -# GITHUB_BUG_REPORT_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +# ====================================== +# DEVELOPMENT SETTINGS +# ====================================== -# Repository where bug reports will be created -# Format: "owner/repository" -# BUG_REPORT_REPO=stackblitz-labs/bolt.diy +# Development Mode +NODE_ENV=development -# Example Context Values for qwen2.5-coder:32b -# -# DEFAULT_NUM_CTX=32768 # Consumes 36GB of VRAM -# DEFAULT_NUM_CTX=24576 # Consumes 32GB of VRAM -# DEFAULT_NUM_CTX=12288 # Consumes 26GB of VRAM -# DEFAULT_NUM_CTX=6144 # Consumes 24GB of VRAM -DEFAULT_NUM_CTX= +# Application Port (optional, defaults to 3000) +PORT=3000 + +# Logging Level (debug, info, warn, error) +VITE_LOG_LEVEL=debug + +# Default Context Window Size (for local models) +DEFAULT_NUM_CTX=32768 + +# ====================================== +# INSTRUCTIONS +# ====================================== +# 1. Copy this file to .env.local: cp .env.example .env.local +# 2. Fill in the API keys you want to use +# 3. Restart your development server: npm run dev +# 4. Go to Settings > Providers to enable/configure providers \ No newline at end of file diff --git a/app/components/@settings/tabs/providers/cloud/CloudProvidersTab.tsx b/app/components/@settings/tabs/providers/cloud/CloudProvidersTab.tsx index 9f85b76..7311851 100644 --- a/app/components/@settings/tabs/providers/cloud/CloudProvidersTab.tsx +++ b/app/components/@settings/tabs/providers/cloud/CloudProvidersTab.tsx @@ -8,7 +8,7 @@ import { motion } from 'framer-motion'; import { classNames } from '~/utils/classNames'; import { toast } from 'react-toastify'; import { providerBaseUrlEnvKeys } from '~/utils/constants'; -import { SiAmazon, SiGoogle, SiHuggingface, SiPerplexity, SiOpenai } from 'react-icons/si'; +import { SiAmazon, SiGoogle, SiGithub, SiHuggingface, SiPerplexity, SiOpenai } from 'react-icons/si'; import { BsRobot, BsCloud } from 'react-icons/bs'; import { TbBrain, TbCloudComputing } from 'react-icons/tb'; import { BiCodeBlock, BiChip } from 'react-icons/bi'; @@ -21,6 +21,7 @@ type ProviderName = | 'Anthropic' | 'Cohere' | 'Deepseek' + | 'Github' | 'Google' | 'Groq' | 'HuggingFace' @@ -38,6 +39,7 @@ const PROVIDER_ICONS: Record = { Anthropic: FaBrain, Cohere: BiChip, Deepseek: BiCodeBlock, + Github: SiGithub, Google: SiGoogle, Groq: BsCloud, HuggingFace: SiHuggingface, @@ -53,6 +55,7 @@ const PROVIDER_ICONS: Record = { // Update PROVIDER_DESCRIPTIONS to use the same type const PROVIDER_DESCRIPTIONS: Partial> = { Anthropic: 'Access Claude and other Anthropic models', + Github: 'Use OpenAI models hosted through GitHub infrastructure', OpenAI: 'Use GPT-4, GPT-3.5, and other OpenAI models', }; diff --git a/app/components/@settings/tabs/providers/service-status/provider-factory.ts b/app/components/@settings/tabs/providers/service-status/provider-factory.ts index 3887781..d9f627d 100644 --- a/app/components/@settings/tabs/providers/service-status/provider-factory.ts +++ b/app/components/@settings/tabs/providers/service-status/provider-factory.ts @@ -13,6 +13,7 @@ import { OpenRouterStatusChecker } from './providers/openrouter'; import { PerplexityStatusChecker } from './providers/perplexity'; import { TogetherStatusChecker } from './providers/together'; import { XAIStatusChecker } from './providers/xai'; +import { MoonshotStatusChecker } from './providers/moonshot'; export class ProviderStatusCheckerFactory { private static _providerConfigs: Record = { @@ -82,6 +83,12 @@ export class ProviderStatusCheckerFactory { headers: {}, testModel: 'mistralai/Mixtral-8x7B-Instruct-v0.1', }, + Moonshot: { + statusUrl: 'https://status.moonshot.ai/', + apiUrl: 'https://api.moonshot.ai/v1/models', + headers: {}, + testModel: 'moonshot-v1-8k', + }, XAI: { statusUrl: 'https://status.x.ai/', apiUrl: 'https://api.x.ai/v1/models', @@ -120,6 +127,8 @@ export class ProviderStatusCheckerFactory { return new PerplexityStatusChecker(config); case 'Together': return new TogetherStatusChecker(config); + case 'Moonshot': + return new MoonshotStatusChecker(config); case 'XAI': return new XAIStatusChecker(config); default: diff --git a/app/components/@settings/tabs/providers/service-status/providers/moonshot.ts b/app/components/@settings/tabs/providers/service-status/providers/moonshot.ts new file mode 100644 index 0000000..718d755 --- /dev/null +++ b/app/components/@settings/tabs/providers/service-status/providers/moonshot.ts @@ -0,0 +1,37 @@ +import { BaseProviderChecker } from '~/components/@settings/tabs/providers/service-status/base-provider'; +import type { StatusCheckResult } from '~/components/@settings/tabs/providers/service-status/types'; + +export class MoonshotStatusChecker extends BaseProviderChecker { + async checkStatus(): Promise { + try { + // Check Moonshot API endpoint + const apiEndpoint = 'https://api.moonshot.ai/v1/models'; + const apiStatus = await this.checkEndpoint(apiEndpoint); + + // Check their main website + const websiteStatus = await this.checkEndpoint('https://www.moonshot.ai'); + + let status: StatusCheckResult['status'] = 'operational'; + let message = 'All systems operational'; + + if (apiStatus !== 'reachable' || websiteStatus !== 'reachable') { + status = apiStatus !== 'reachable' ? 'down' : 'degraded'; + message = apiStatus !== 'reachable' ? 'API appears to be down' : 'Service may be experiencing issues'; + } + + return { + status, + message, + incidents: [], // No public incident tracking available yet + }; + } catch (error) { + console.error('Error checking Moonshot status:', error); + + return { + status: 'degraded', + message: 'Unable to determine service status', + incidents: ['Note: Limited status information available'], + }; + } + } +} diff --git a/app/components/@settings/tabs/providers/service-status/types.ts b/app/components/@settings/tabs/providers/service-status/types.ts index 188a474..d09a865 100644 --- a/app/components/@settings/tabs/providers/service-status/types.ts +++ b/app/components/@settings/tabs/providers/service-status/types.ts @@ -9,6 +9,7 @@ export type ProviderName = | 'HuggingFace' | 'Hyperbolic' | 'Mistral' + | 'Moonshot' | 'OpenRouter' | 'Perplexity' | 'Together' diff --git a/app/components/chat/ModelSelector.tsx b/app/components/chat/ModelSelector.tsx index a15bce4..2ccb9a5 100644 --- a/app/components/chat/ModelSelector.tsx +++ b/app/components/chat/ModelSelector.tsx @@ -1,9 +1,84 @@ import type { ProviderInfo } from '~/types/model'; -import { useEffect, useState, useRef } from 'react'; +import { useEffect, useState, useRef, useMemo, useCallback } from 'react'; import type { KeyboardEvent } from 'react'; import type { ModelInfo } from '~/lib/modules/llm/types'; import { classNames } from '~/utils/classNames'; +// Fuzzy search utilities +const levenshteinDistance = (str1: string, str2: string): number => { + const matrix = []; + + for (let i = 0; i <= str2.length; i++) { + matrix[i] = [i]; + } + + for (let j = 0; j <= str1.length; j++) { + matrix[0][j] = j; + } + + for (let i = 1; i <= str2.length; i++) { + for (let j = 1; j <= str1.length; j++) { + if (str2.charAt(i - 1) === str1.charAt(j - 1)) { + matrix[i][j] = matrix[i - 1][j - 1]; + } else { + matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j] + 1); + } + } + } + + return matrix[str2.length][str1.length]; +}; + +const fuzzyMatch = (query: string, text: string): { score: number; matches: boolean } => { + if (!query) { + return { score: 0, matches: true }; + } + + if (!text) { + return { score: 0, matches: false }; + } + + const queryLower = query.toLowerCase(); + const textLower = text.toLowerCase(); + + // Exact substring match gets highest score + if (textLower.includes(queryLower)) { + return { score: 100 - (textLower.indexOf(queryLower) / textLower.length) * 20, matches: true }; + } + + // Fuzzy match with reasonable threshold + const distance = levenshteinDistance(queryLower, textLower); + const maxLen = Math.max(queryLower.length, textLower.length); + const similarity = 1 - distance / maxLen; + + return { + score: similarity > 0.6 ? similarity * 80 : 0, + matches: similarity > 0.6, + }; +}; + +const highlightText = (text: string, query: string): string => { + if (!query) { + return text; + } + + const regex = new RegExp(`(${query.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi'); + + return text.replace(regex, '$1'); +}; + +const formatContextSize = (tokens: number): string => { + if (tokens >= 1000000) { + return `${(tokens / 1000000).toFixed(1)}M`; + } + + if (tokens >= 1000) { + return `${(tokens / 1000).toFixed(0)}K`; + } + + return tokens.toString(); +}; + interface ModelSelectorProps { model?: string; setModel?: (model: string) => void; @@ -40,12 +115,14 @@ export const ModelSelector = ({ modelLoading, }: ModelSelectorProps) => { const [modelSearchQuery, setModelSearchQuery] = useState(''); + const [debouncedModelSearchQuery, setDebouncedModelSearchQuery] = useState(''); const [isModelDropdownOpen, setIsModelDropdownOpen] = useState(false); const [focusedModelIndex, setFocusedModelIndex] = useState(-1); const modelSearchInputRef = useRef(null); const modelOptionsRef = useRef<(HTMLDivElement | null)[]>([]); const modelDropdownRef = useRef(null); const [providerSearchQuery, setProviderSearchQuery] = useState(''); + const [debouncedProviderSearchQuery, setDebouncedProviderSearchQuery] = useState(''); const [isProviderDropdownOpen, setIsProviderDropdownOpen] = useState(false); const [focusedProviderIndex, setFocusedProviderIndex] = useState(-1); const providerSearchInputRef = useRef(null); @@ -53,6 +130,23 @@ export const ModelSelector = ({ const providerDropdownRef = useRef(null); const [showFreeModelsOnly, setShowFreeModelsOnly] = useState(false); + // Debounce search queries + useEffect(() => { + const timer = setTimeout(() => { + setDebouncedModelSearchQuery(modelSearchQuery); + }, 150); + + return () => clearTimeout(timer); + }, [modelSearchQuery]); + + useEffect(() => { + const timer = setTimeout(() => { + setDebouncedProviderSearchQuery(providerSearchQuery); + }, 150); + + return () => clearTimeout(timer); + }, [providerSearchQuery]); + useEffect(() => { const handleClickOutside = (event: MouseEvent) => { if (modelDropdownRef.current && !modelDropdownRef.current.contains(event.target as Node)) { @@ -71,24 +165,64 @@ export const ModelSelector = ({ return () => document.removeEventListener('mousedown', handleClickOutside); }, []); - const filteredModels = [...modelList] - .filter((e) => e.provider === provider?.name && e.name) - .filter((model) => { - // Apply free models filter - if (showFreeModelsOnly && !isModelLikelyFree(model, provider?.name)) { - return false; - } + const filteredModels = useMemo(() => { + const baseModels = [...modelList].filter((e) => e.provider === provider?.name && e.name); - // Apply search filter - return ( - model.label.toLowerCase().includes(modelSearchQuery.toLowerCase()) || - model.name.toLowerCase().includes(modelSearchQuery.toLowerCase()) - ); - }); + return baseModels + .filter((model) => { + // Apply free models filter + if (showFreeModelsOnly && !isModelLikelyFree(model, provider?.name)) { + return false; + } - const filteredProviders = providerList.filter((p) => - p.name.toLowerCase().includes(providerSearchQuery.toLowerCase()), - ); + return true; + }) + .map((model) => { + // Calculate search scores for fuzzy matching + const labelMatch = fuzzyMatch(debouncedModelSearchQuery, model.label); + const nameMatch = fuzzyMatch(debouncedModelSearchQuery, model.name); + const contextMatch = fuzzyMatch(debouncedModelSearchQuery, formatContextSize(model.maxTokenAllowed)); + + const bestScore = Math.max(labelMatch.score, nameMatch.score, contextMatch.score); + const matches = labelMatch.matches || nameMatch.matches || contextMatch.matches || !debouncedModelSearchQuery; // Show all if no query + + return { + ...model, + searchScore: bestScore, + searchMatches: matches, + highlightedLabel: highlightText(model.label, debouncedModelSearchQuery), + highlightedName: highlightText(model.name, debouncedModelSearchQuery), + }; + }) + .filter((model) => model.searchMatches) + .sort((a, b) => { + // Sort by search score (highest first), then by label + if (debouncedModelSearchQuery) { + return b.searchScore - a.searchScore; + } + + return a.label.localeCompare(b.label); + }); + }, [modelList, provider?.name, showFreeModelsOnly, debouncedModelSearchQuery]); + + const filteredProviders = useMemo(() => { + if (!debouncedProviderSearchQuery) { + return providerList; + } + + return providerList + .map((provider) => { + const match = fuzzyMatch(debouncedProviderSearchQuery, provider.name); + return { + ...provider, + searchScore: match.score, + searchMatches: match.matches, + highlightedName: highlightText(provider.name, debouncedProviderSearchQuery), + }; + }) + .filter((provider) => provider.searchMatches) + .sort((a, b) => b.searchScore - a.searchScore); + }, [providerList, debouncedProviderSearchQuery]); // Reset free models filter when provider changes useEffect(() => { @@ -97,11 +231,30 @@ export const ModelSelector = ({ useEffect(() => { setFocusedModelIndex(-1); - }, [modelSearchQuery, isModelDropdownOpen, showFreeModelsOnly]); + }, [debouncedModelSearchQuery, isModelDropdownOpen, showFreeModelsOnly]); useEffect(() => { setFocusedProviderIndex(-1); - }, [providerSearchQuery, isProviderDropdownOpen]); + }, [debouncedProviderSearchQuery, isProviderDropdownOpen]); + + // Clear search functions + const clearModelSearch = useCallback(() => { + setModelSearchQuery(''); + setDebouncedModelSearchQuery(''); + + if (modelSearchInputRef.current) { + modelSearchInputRef.current.focus(); + } + }, []); + + const clearProviderSearch = useCallback(() => { + setProviderSearchQuery(''); + setDebouncedProviderSearchQuery(''); + + if (providerSearchInputRef.current) { + providerSearchInputRef.current.focus(); + } + }, []); useEffect(() => { if (isModelDropdownOpen && modelSearchInputRef.current) { @@ -137,6 +290,7 @@ export const ModelSelector = ({ setModel?.(selectedModel.name); setIsModelDropdownOpen(false); setModelSearchQuery(''); + setDebouncedModelSearchQuery(''); } break; @@ -144,12 +298,20 @@ export const ModelSelector = ({ e.preventDefault(); setIsModelDropdownOpen(false); setModelSearchQuery(''); + setDebouncedModelSearchQuery(''); break; case 'Tab': if (!e.shiftKey && focusedModelIndex === filteredModels.length - 1) { setIsModelDropdownOpen(false); } + break; + case 'k': + if (e.ctrlKey || e.metaKey) { + e.preventDefault(); + clearModelSearch(); + } + break; } }; @@ -186,6 +348,7 @@ export const ModelSelector = ({ setIsProviderDropdownOpen(false); setProviderSearchQuery(''); + setDebouncedProviderSearchQuery(''); } break; @@ -193,12 +356,20 @@ export const ModelSelector = ({ e.preventDefault(); setIsProviderDropdownOpen(false); setProviderSearchQuery(''); + setDebouncedProviderSearchQuery(''); break; case 'Tab': if (!e.shiftKey && focusedProviderIndex === filteredProviders.length - 1) { setIsProviderDropdownOpen(false); } + break; + case 'k': + if (e.ctrlKey || e.metaKey) { + e.preventDefault(); + clearProviderSearch(); + } + break; } }; @@ -292,9 +463,9 @@ export const ModelSelector = ({ type="text" value={providerSearchQuery} onChange={(e) => setProviderSearchQuery(e.target.value)} - placeholder="Search providers..." + placeholder="Search providers... (⌘K to clear)" className={classNames( - 'w-full pl-2 py-1.5 rounded-md text-sm', + 'w-full pl-8 pr-8 py-1.5 rounded-md text-sm', 'bg-bolt-elements-background-depth-2 border border-bolt-elements-borderColor', 'text-bolt-elements-textPrimary placeholder:text-bolt-elements-textTertiary', 'focus:outline-none focus:ring-2 focus:ring-bolt-elements-focus', @@ -307,6 +478,19 @@ export const ModelSelector = ({
+ {providerSearchQuery && ( + + )} @@ -327,7 +511,18 @@ export const ModelSelector = ({ )} > {filteredProviders.length === 0 ? ( -
No providers found
+
+
+ {debouncedProviderSearchQuery + ? `No providers match "${debouncedProviderSearchQuery}"` + : 'No providers found'} +
+ {debouncedProviderSearchQuery && ( +
+ Try searching for provider names like "OpenAI", "Anthropic", or "Google" +
+ )} +
) : ( filteredProviders.map((providerOption, index) => (
- {providerOption.name} +
)) )} @@ -441,6 +641,14 @@ export const ModelSelector = ({
)} + {/* Search Result Count */} + {debouncedModelSearchQuery && filteredModels.length > 0 && ( +
+ {filteredModels.length} model{filteredModels.length !== 1 ? 's' : ''} found + {filteredModels.length > 5 && ' (showing best matches)'} +
+ )} + {/* Search Input */}
setModelSearchQuery(e.target.value)} - placeholder="Search models..." + placeholder="Search models... (⌘K to clear)" className={classNames( - 'w-full pl-2 py-1.5 rounded-md text-sm', + 'w-full pl-8 pr-8 py-1.5 rounded-md text-sm', 'bg-bolt-elements-background-depth-2 border border-bolt-elements-borderColor', 'text-bolt-elements-textPrimary placeholder:text-bolt-elements-textTertiary', 'focus:outline-none focus:ring-2 focus:ring-bolt-elements-focus', @@ -463,6 +671,19 @@ export const ModelSelector = ({
+ {modelSearchQuery && ( + + )}
@@ -483,16 +704,37 @@ export const ModelSelector = ({ )} > {modelLoading === 'all' || modelLoading === provider?.name ? ( -
Loading...
+
+
+ + Loading models... +
+
) : filteredModels.length === 0 ? ( -
- {showFreeModelsOnly ? 'No free models found' : 'No models found'} +
+
+ {debouncedModelSearchQuery + ? `No models match "${debouncedModelSearchQuery}"${showFreeModelsOnly ? ' (free only)' : ''}` + : showFreeModelsOnly + ? 'No free models available' + : 'No models available'} +
+ {debouncedModelSearchQuery && ( +
+ Try searching for model names, context sizes (e.g., "128k", "1M"), or capabilities +
+ )} + {showFreeModelsOnly && !debouncedModelSearchQuery && ( +
+ Try disabling the "Free models only" filter to see all available models +
+ )}
) : ( filteredModels.map((modelOption, index) => (
(modelOptionsRef.current[index] = el)} - key={index} // Consider using modelOption.name if unique + key={modelOption.name} role="option" aria-selected={model === modelOption.name} className={classNames( @@ -510,14 +752,38 @@ export const ModelSelector = ({ setModel?.(modelOption.name); setIsModelDropdownOpen(false); setModelSearchQuery(''); + setDebouncedModelSearchQuery(''); }} tabIndex={focusedModelIndex === index ? 0 : -1} >
- {modelOption.label} - {isModelLikelyFree(modelOption, provider?.name) && ( - - )} +
+
+ +
+
+ + {formatContextSize(modelOption.maxTokenAllowed)} tokens + + {debouncedModelSearchQuery && (modelOption as any).searchScore > 70 && ( + + {(modelOption as any).searchScore.toFixed(0)}% match + + )} +
+
+
+ {isModelLikelyFree(modelOption, provider?.name) && ( + + )} + {model === modelOption.name && ( + + )} +
)) diff --git a/app/lib/.server/llm/constants.ts b/app/lib/.server/llm/constants.ts index 6840816..a78b33b 100644 --- a/app/lib/.server/llm/constants.ts +++ b/app/lib/.server/llm/constants.ts @@ -1,18 +1,19 @@ /* - * Maximum tokens for response generation (conservative default for older models) - * Modern models can handle much higher limits - specific limits are set per model + * Maximum tokens for response generation (updated for modern model capabilities) + * This serves as a fallback when model-specific limits are unavailable + * Modern models like Claude 3.5, GPT-4o, and Gemini Pro support 128k+ tokens */ -export const MAX_TOKENS = 32000; +export const MAX_TOKENS = 128000; /* * Provider-specific default completion token limits * Used as fallbacks when model doesn't specify maxCompletionTokens */ export const PROVIDER_COMPLETION_LIMITS: Record = { - OpenAI: 16384, - Github: 16384, // GitHub Models use OpenAI-compatible limits - Anthropic: 128000, - Google: 32768, + OpenAI: 4096, // Standard GPT models (o1 models have much higher limits) + Github: 4096, // GitHub Models use OpenAI-compatible limits + Anthropic: 64000, // Conservative limit for Claude 4 models (Opus: 32k, Sonnet: 64k) + Google: 8192, // Gemini 1.5 Pro/Flash standard limit Cohere: 4000, DeepSeek: 8192, Groq: 8192, diff --git a/app/lib/.server/llm/stream-text.ts b/app/lib/.server/llm/stream-text.ts index c458c89..40774a8 100644 --- a/app/lib/.server/llm/stream-text.ts +++ b/app/lib/.server/llm/stream-text.ts @@ -142,11 +142,11 @@ export async function streamText(props: { const dynamicMaxTokens = modelDetails ? getCompletionTokenLimit(modelDetails) : Math.min(MAX_TOKENS, 16384); - // Additional safety cap - should not be needed with proper completion limits, but kept for safety - const safeMaxTokens = Math.min(dynamicMaxTokens, 128000); + // Use model-specific limits directly - no artificial cap needed + const safeMaxTokens = dynamicMaxTokens; logger.info( - `Max tokens for model ${modelDetails.name} is ${safeMaxTokens} (capped from ${dynamicMaxTokens}) based on model limits`, + `Token limits for model ${modelDetails.name}: maxTokens=${safeMaxTokens}, maxTokenAllowed=${modelDetails.maxTokenAllowed}, maxCompletionTokens=${modelDetails.maxCompletionTokens}`, ); let systemPrompt = @@ -221,11 +221,18 @@ export async function streamText(props: { logger.info(`Sending llm call to ${provider.name} with model ${modelDetails.name}`); - // DEBUG: Log reasoning model detection + // Log reasoning model detection and token parameters const isReasoning = isReasoningModel(modelDetails.name); - logger.info(`DEBUG STREAM: Model "${modelDetails.name}" detected as reasoning model: ${isReasoning}`); + logger.info( + `Model "${modelDetails.name}" is reasoning model: ${isReasoning}, using ${isReasoning ? 'maxCompletionTokens' : 'maxTokens'}: ${safeMaxTokens}`, + ); - // console.log(systemPrompt, processedMessages); + // Validate token limits before API call + if (safeMaxTokens > (modelDetails.maxTokenAllowed || 128000)) { + logger.warn( + `Token limit warning: requesting ${safeMaxTokens} tokens but model supports max ${modelDetails.maxTokenAllowed || 128000}`, + ); + } // Use maxCompletionTokens for reasoning models (o1, GPT-5), maxTokens for traditional models const tokenParams = isReasoning ? { maxCompletionTokens: safeMaxTokens } : { maxTokens: safeMaxTokens }; diff --git a/app/lib/modules/llm/providers/anthropic.ts b/app/lib/modules/llm/providers/anthropic.ts index 4529d40..56899e0 100644 --- a/app/lib/modules/llm/providers/anthropic.ts +++ b/app/lib/modules/llm/providers/anthropic.ts @@ -33,6 +33,15 @@ export default class AnthropicProvider extends BaseProvider { maxTokenAllowed: 200000, maxCompletionTokens: 128000, }, + + // Claude Opus 4: 200k context, 32k output limit (latest flagship model) + { + name: 'claude-opus-4-20250514', + label: 'Claude 4 Opus', + provider: 'Anthropic', + maxTokenAllowed: 200000, + maxCompletionTokens: 32000, + }, ]; async getDynamicModels( @@ -81,12 +90,23 @@ export default class AnthropicProvider extends BaseProvider { contextWindow = 200000; // Claude 3 Sonnet has 200k context } + // Determine completion token limits based on specific model + let maxCompletionTokens = 128000; // default for older Claude 3 models + + if (m.id?.includes('claude-opus-4')) { + maxCompletionTokens = 32000; // Claude 4 Opus: 32K output limit + } else if (m.id?.includes('claude-sonnet-4')) { + maxCompletionTokens = 64000; // Claude 4 Sonnet: 64K output limit + } else if (m.id?.includes('claude-4')) { + maxCompletionTokens = 32000; // Other Claude 4 models: conservative 32K limit + } + return { name: m.id, label: `${m.display_name} (${Math.floor(contextWindow / 1000)}k context)`, provider: this.name, maxTokenAllowed: contextWindow, - maxCompletionTokens: 128000, // Claude models support up to 128k completion tokens + maxCompletionTokens, }; }); } diff --git a/app/lib/modules/llm/providers/github.ts b/app/lib/modules/llm/providers/github.ts index 322be6b..b54bb26 100644 --- a/app/lib/modules/llm/providers/github.ts +++ b/app/lib/modules/llm/providers/github.ts @@ -12,35 +12,114 @@ export default class GithubProvider extends BaseProvider { apiTokenKey: 'GITHUB_API_KEY', }; - // find more in https://github.com/marketplace?type=models + /* + * GitHub Models - Available models through GitHub's native API + * Updated for the new GitHub Models API at https://models.github.ai + * Model IDs use the format: publisher/model-name + */ staticModels: ModelInfo[] = [ - { name: 'gpt-4o', label: 'GPT-4o', provider: 'Github', maxTokenAllowed: 128000, maxCompletionTokens: 16384 }, - { name: 'o1', label: 'o1-preview', provider: 'Github', maxTokenAllowed: 100000, maxCompletionTokens: 16384 }, - { name: 'o1-mini', label: 'o1-mini', provider: 'Github', maxTokenAllowed: 65536, maxCompletionTokens: 8192 }, + { name: 'openai/gpt-4o', label: 'GPT-4o', provider: 'Github', maxTokenAllowed: 131072, maxCompletionTokens: 4096 }, { - name: 'gpt-4o-mini', + name: 'openai/gpt-4o-mini', label: 'GPT-4o Mini', provider: 'Github', - maxTokenAllowed: 128000, - maxCompletionTokens: 16384, + maxTokenAllowed: 131072, + maxCompletionTokens: 4096, }, { - name: 'gpt-4-turbo', - label: 'GPT-4 Turbo', + name: 'openai/o1-preview', + label: 'o1-preview', provider: 'Github', maxTokenAllowed: 128000, - maxCompletionTokens: 8192, + maxCompletionTokens: 32000, }, - { name: 'gpt-4', label: 'GPT-4', provider: 'Github', maxTokenAllowed: 8192, maxCompletionTokens: 8192 }, { - name: 'gpt-3.5-turbo', - label: 'GPT-3.5 Turbo', + name: 'openai/o1-mini', + label: 'o1-mini', provider: 'Github', - maxTokenAllowed: 16385, + maxTokenAllowed: 128000, + maxCompletionTokens: 65000, + }, + { name: 'openai/o1', label: 'o1', provider: 'Github', maxTokenAllowed: 200000, maxCompletionTokens: 100000 }, + { + name: 'openai/gpt-4.1', + label: 'GPT-4.1', + provider: 'Github', + maxTokenAllowed: 1048576, + maxCompletionTokens: 32768, + }, + { + name: 'openai/gpt-4.1-mini', + label: 'GPT-4.1-mini', + provider: 'Github', + maxTokenAllowed: 1048576, + maxCompletionTokens: 32768, + }, + { + name: 'deepseek/deepseek-r1', + label: 'DeepSeek-R1', + provider: 'Github', + maxTokenAllowed: 128000, maxCompletionTokens: 4096, }, ]; + async getDynamicModels( + apiKeys?: Record, + settings?: IProviderSetting, + serverEnv?: Record, + ): Promise { + const { apiKey } = this.getProviderBaseUrlAndKey({ + apiKeys, + providerSettings: settings, + serverEnv: serverEnv as any, + defaultBaseUrlKey: '', + defaultApiTokenKey: 'GITHUB_API_KEY', + }); + + if (!apiKey) { + console.log('GitHub: No API key found. Make sure GITHUB_API_KEY is set in your .env.local file'); + + // Return static models if no API key is available + return this.staticModels; + } + + console.log('GitHub: API key found, attempting to fetch dynamic models...'); + + try { + // Try to fetch dynamic models from GitHub API + const response = await fetch('https://models.github.ai/v1/models', { + headers: { + Authorization: `Bearer ${apiKey}`, + }, + }); + + if (response.ok) { + const data = (await response.json()) as { data?: any[] }; + console.log('GitHub: Successfully fetched models from API'); + + if (data.data && Array.isArray(data.data)) { + return data.data.map((model: any) => ({ + name: model.id, + label: model.name || model.id.split('/').pop() || model.id, + provider: 'Github', + maxTokenAllowed: model.limits?.max_input_tokens || 128000, + maxCompletionTokens: model.limits?.max_output_tokens || 16384, + })); + } + } else { + console.warn('GitHub: API request failed with status:', response.status, response.statusText); + } + } catch (error) { + console.warn('GitHub: Failed to fetch models, using static models:', error); + } + + // Fallback to static models + console.log('GitHub: Using static models as fallback'); + + return this.staticModels; + } + getModelInstance(options: { model: string; serverEnv: Env; @@ -49,6 +128,8 @@ export default class GithubProvider extends BaseProvider { }): LanguageModelV1 { const { model, serverEnv, apiKeys, providerSettings } = options; + console.log(`GitHub: Creating model instance for ${model}`); + const { apiKey } = this.getProviderBaseUrlAndKey({ apiKeys, providerSettings: providerSettings?.[this.name], @@ -58,14 +139,19 @@ export default class GithubProvider extends BaseProvider { }); if (!apiKey) { + console.error('GitHub: No API key found'); throw new Error(`Missing API key for ${this.name} provider`); } + console.log(`GitHub: Using API key (first 8 chars): ${apiKey.substring(0, 8)}...`); + const openai = createOpenAI({ - baseURL: 'https://models.inference.ai.azure.com', + baseURL: 'https://models.github.ai/inference', apiKey, }); + console.log(`GitHub: Created OpenAI client, requesting model: ${model}`); + return openai(model); } } diff --git a/app/lib/modules/llm/providers/google.ts b/app/lib/modules/llm/providers/google.ts index c24c087..16e9de1 100644 --- a/app/lib/modules/llm/providers/google.ts +++ b/app/lib/modules/llm/providers/google.ts @@ -15,23 +15,23 @@ export default class GoogleProvider extends BaseProvider { staticModels: ModelInfo[] = [ /* * Essential fallback models - only the most reliable/stable ones - * Gemini 1.5 Pro: 2M context, excellent for complex reasoning and large codebases + * Gemini 1.5 Pro: 2M context, 8K output limit (verified from API docs) */ { name: 'gemini-1.5-pro', label: 'Gemini 1.5 Pro', provider: 'Google', maxTokenAllowed: 2000000, - maxCompletionTokens: 32768, + maxCompletionTokens: 8192, }, - // Gemini 1.5 Flash: 1M context, fast and cost-effective + // Gemini 1.5 Flash: 1M context, 8K output limit, fast and cost-effective { name: 'gemini-1.5-flash', label: 'Gemini 1.5 Flash', provider: 'Google', maxTokenAllowed: 1000000, - maxCompletionTokens: 32768, + maxCompletionTokens: 8192, }, ]; @@ -102,10 +102,10 @@ export default class GoogleProvider extends BaseProvider { const finalContext = Math.min(contextWindow, maxAllowed); // Get completion token limit from Google API - let completionTokens = 32768; // default fallback + let completionTokens = 8192; // default fallback (Gemini 1.5 standard limit) if (m.outputTokenLimit && m.outputTokenLimit > 0) { - completionTokens = Math.min(m.outputTokenLimit, 128000); // Cap at reasonable limit + completionTokens = Math.min(m.outputTokenLimit, 128000); // Use API value, cap at reasonable limit } return { diff --git a/app/lib/modules/llm/providers/moonshot.ts b/app/lib/modules/llm/providers/moonshot.ts new file mode 100644 index 0000000..a59f80e --- /dev/null +++ b/app/lib/modules/llm/providers/moonshot.ts @@ -0,0 +1,71 @@ +import { BaseProvider } from '~/lib/modules/llm/base-provider'; +import type { ModelInfo } from '~/lib/modules/llm/types'; +import type { IProviderSetting } from '~/types/model'; +import type { LanguageModelV1 } from 'ai'; +import { createOpenAI } from '@ai-sdk/openai'; + +export default class MoonshotProvider extends BaseProvider { + name = 'Moonshot'; + getApiKeyLink = 'https://platform.moonshot.ai/console/api-keys'; + + config = { + apiTokenKey: 'MOONSHOT_API_KEY', + }; + + staticModels: ModelInfo[] = [ + { name: 'moonshot-v1-8k', label: 'Moonshot v1 8K', provider: 'Moonshot', maxTokenAllowed: 8000 }, + { name: 'moonshot-v1-32k', label: 'Moonshot v1 32K', provider: 'Moonshot', maxTokenAllowed: 32000 }, + { name: 'moonshot-v1-128k', label: 'Moonshot v1 128K', provider: 'Moonshot', maxTokenAllowed: 128000 }, + { name: 'moonshot-v1-auto', label: 'Moonshot v1 Auto', provider: 'Moonshot', maxTokenAllowed: 128000 }, + { + name: 'moonshot-v1-8k-vision-preview', + label: 'Moonshot v1 8K Vision', + provider: 'Moonshot', + maxTokenAllowed: 8000, + }, + { + name: 'moonshot-v1-32k-vision-preview', + label: 'Moonshot v1 32K Vision', + provider: 'Moonshot', + maxTokenAllowed: 32000, + }, + { + name: 'moonshot-v1-128k-vision-preview', + label: 'Moonshot v1 128K Vision', + provider: 'Moonshot', + maxTokenAllowed: 128000, + }, + { name: 'kimi-latest', label: 'Kimi Latest', provider: 'Moonshot', maxTokenAllowed: 128000 }, + { name: 'kimi-k2-0711-preview', label: 'Kimi K2 Preview', provider: 'Moonshot', maxTokenAllowed: 128000 }, + { name: 'kimi-k2-turbo-preview', label: 'Kimi K2 Turbo', provider: 'Moonshot', maxTokenAllowed: 128000 }, + { name: 'kimi-thinking-preview', label: 'Kimi Thinking', provider: 'Moonshot', maxTokenAllowed: 128000 }, + ]; + + getModelInstance(options: { + model: string; + serverEnv: Env; + apiKeys?: Record; + providerSettings?: Record; + }): LanguageModelV1 { + const { model, serverEnv, apiKeys, providerSettings } = options; + + const { apiKey } = this.getProviderBaseUrlAndKey({ + apiKeys, + providerSettings: providerSettings?.[this.name], + serverEnv: serverEnv as any, + defaultBaseUrlKey: '', + defaultApiTokenKey: 'MOONSHOT_API_KEY', + }); + + if (!apiKey) { + throw new Error(`Missing API key for ${this.name} provider`); + } + + const openai = createOpenAI({ + baseURL: 'https://api.moonshot.ai/v1', + apiKey, + }); + + return openai(model); + } +} diff --git a/app/lib/modules/llm/providers/openai.ts b/app/lib/modules/llm/providers/openai.ts index 5792090..fef7294 100644 --- a/app/lib/modules/llm/providers/openai.ts +++ b/app/lib/modules/llm/providers/openai.ts @@ -15,9 +15,18 @@ export default class OpenAIProvider extends BaseProvider { staticModels: ModelInfo[] = [ /* * Essential fallback models - only the most stable/reliable ones - * GPT-4o: 128k context, high performance, recommended for most tasks + * GPT-4o: 128k context, 4k standard output (64k with long output mode) */ - { name: 'gpt-4o', label: 'GPT-4o', provider: 'OpenAI', maxTokenAllowed: 128000, maxCompletionTokens: 16384 }, + { name: 'gpt-4o', label: 'GPT-4o', provider: 'OpenAI', maxTokenAllowed: 128000, maxCompletionTokens: 4096 }, + + // GPT-4o Mini: 128k context, cost-effective alternative + { + name: 'gpt-4o-mini', + label: 'GPT-4o Mini', + provider: 'OpenAI', + maxTokenAllowed: 128000, + maxCompletionTokens: 4096, + }, // GPT-3.5-turbo: 16k context, fast and cost-effective { @@ -27,6 +36,18 @@ export default class OpenAIProvider extends BaseProvider { maxTokenAllowed: 16000, maxCompletionTokens: 4096, }, + + // o1-preview: 128k context, 32k output limit (reasoning model) + { + name: 'o1-preview', + label: 'o1-preview', + provider: 'OpenAI', + maxTokenAllowed: 128000, + maxCompletionTokens: 32000, + }, + + // o1-mini: 128k context, 65k output limit (reasoning model) + { name: 'o1-mini', label: 'o1-mini', provider: 'OpenAI', maxTokenAllowed: 128000, maxCompletionTokens: 65000 }, ]; async getDynamicModels( @@ -79,18 +100,23 @@ export default class OpenAIProvider extends BaseProvider { contextWindow = 16385; // GPT-3.5-turbo has 16k context } - // Determine completion token limits based on model type - let maxCompletionTokens = 16384; // default for most models + // Determine completion token limits based on model type (accurate 2025 limits) + let maxCompletionTokens = 4096; // default for most models - if (m.id?.startsWith('o1-preview') || m.id?.startsWith('o1-mini') || m.id?.startsWith('o1')) { - // Reasoning models have specific completion limits - maxCompletionTokens = m.id?.includes('mini') ? 8192 : 16384; + if (m.id?.startsWith('o1-preview')) { + maxCompletionTokens = 32000; // o1-preview: 32K output limit + } else if (m.id?.startsWith('o1-mini')) { + maxCompletionTokens = 65000; // o1-mini: 65K output limit + } else if (m.id?.startsWith('o1')) { + maxCompletionTokens = 32000; // Other o1 models: 32K limit + } else if (m.id?.includes('o3') || m.id?.includes('o4')) { + maxCompletionTokens = 100000; // o3/o4 models: 100K output limit } else if (m.id?.includes('gpt-4o')) { - maxCompletionTokens = 16384; + maxCompletionTokens = 4096; // GPT-4o standard: 4K (64K with long output mode) } else if (m.id?.includes('gpt-4')) { - maxCompletionTokens = 8192; + maxCompletionTokens = 8192; // Standard GPT-4: 8K output limit } else if (m.id?.includes('gpt-3.5-turbo')) { - maxCompletionTokens = 4096; + maxCompletionTokens = 4096; // GPT-3.5-turbo: 4K output limit } return { diff --git a/app/lib/modules/llm/providers/perplexity.ts b/app/lib/modules/llm/providers/perplexity.ts index eb58d74..8d98aff 100644 --- a/app/lib/modules/llm/providers/perplexity.ts +++ b/app/lib/modules/llm/providers/perplexity.ts @@ -14,20 +14,20 @@ export default class PerplexityProvider extends BaseProvider { staticModels: ModelInfo[] = [ { - name: 'llama-3.1-sonar-small-128k-online', - label: 'Sonar Small Online', + name: 'sonar', + label: 'Sonar', provider: 'Perplexity', maxTokenAllowed: 8192, }, { - name: 'llama-3.1-sonar-large-128k-online', - label: 'Sonar Large Online', + name: 'sonar-pro', + label: 'Sonar Pro', provider: 'Perplexity', maxTokenAllowed: 8192, }, { - name: 'llama-3.1-sonar-huge-128k-online', - label: 'Sonar Huge Online', + name: 'sonar-reasoning-pro', + label: 'Sonar Reasoning Pro', provider: 'Perplexity', maxTokenAllowed: 8192, }, diff --git a/app/lib/modules/llm/providers/xai.ts b/app/lib/modules/llm/providers/xai.ts index 64191d2..6785bdd 100644 --- a/app/lib/modules/llm/providers/xai.ts +++ b/app/lib/modules/llm/providers/xai.ts @@ -13,9 +13,11 @@ export default class XAIProvider extends BaseProvider { }; staticModels: ModelInfo[] = [ - { name: 'grok-3-beta', label: 'xAI Grok 3 Beta', provider: 'xAI', maxTokenAllowed: 8000 }, - { name: 'grok-beta', label: 'xAI Grok Beta', provider: 'xAI', maxTokenAllowed: 8000 }, - { name: 'grok-2-1212', label: 'xAI Grok2 1212', provider: 'xAI', maxTokenAllowed: 8000 }, + { name: 'grok-4', label: 'xAI Grok 4', provider: 'xAI', maxTokenAllowed: 256000 }, + { name: 'grok-4-07-09', label: 'xAI Grok 4 (07-09)', provider: 'xAI', maxTokenAllowed: 256000 }, + { name: 'grok-3-beta', label: 'xAI Grok 3 Beta', provider: 'xAI', maxTokenAllowed: 131000 }, + { name: 'grok-3-mini-beta', label: 'xAI Grok 3 Mini Beta', provider: 'xAI', maxTokenAllowed: 131000 }, + { name: 'grok-3-mini-fast-beta', label: 'xAI Grok 3 Mini Fast Beta', provider: 'xAI', maxTokenAllowed: 131000 }, ]; getModelInstance(options: { diff --git a/app/lib/modules/llm/registry.ts b/app/lib/modules/llm/registry.ts index 6edba6d..a28e4f9 100644 --- a/app/lib/modules/llm/registry.ts +++ b/app/lib/modules/llm/registry.ts @@ -16,6 +16,7 @@ import XAIProvider from './providers/xai'; import HyperbolicProvider from './providers/hyperbolic'; import AmazonBedrockProvider from './providers/amazon-bedrock'; import GithubProvider from './providers/github'; +import MoonshotProvider from './providers/moonshot'; export { AnthropicProvider, @@ -26,6 +27,7 @@ export { HuggingFaceProvider, HyperbolicProvider, MistralProvider, + MoonshotProvider, OllamaProvider, OpenAIProvider, OpenRouterProvider, diff --git a/app/routes/api.llmcall.ts b/app/routes/api.llmcall.ts index 64f4d6c..fe5e89b 100644 --- a/app/routes/api.llmcall.ts +++ b/app/routes/api.llmcall.ts @@ -41,6 +41,29 @@ function getCompletionTokenLimit(modelDetails: ModelInfo): number { return Math.min(MAX_TOKENS, 16384); } +function validateTokenLimits(modelDetails: ModelInfo, requestedTokens: number): { valid: boolean; error?: string } { + const modelMaxTokens = modelDetails.maxTokenAllowed || 128000; + const maxCompletionTokens = getCompletionTokenLimit(modelDetails); + + // Check against model's context window + if (requestedTokens > modelMaxTokens) { + return { + valid: false, + error: `Requested tokens (${requestedTokens}) exceed model's context window (${modelMaxTokens}). Please reduce your request size.`, + }; + } + + // Check against completion token limits + if (requestedTokens > maxCompletionTokens) { + return { + valid: false, + error: `Requested tokens (${requestedTokens}) exceed model's completion limit (${maxCompletionTokens}). Consider using a model with higher token limits.`, + }; + } + + return { valid: true }; +} + async function llmCallAction({ context, request }: ActionFunctionArgs) { const { system, message, model, provider, streamOutput } = await request.json<{ system: string; @@ -104,6 +127,23 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) { }); } + // Handle token limit errors with helpful messages + if ( + error instanceof Error && + (error.message?.includes('max_tokens') || + error.message?.includes('token') || + error.message?.includes('exceeds') || + error.message?.includes('maximum')) + ) { + throw new Response( + `Token limit error: ${error.message}. Try reducing your request size or using a model with higher token limits.`, + { + status: 400, + statusText: 'Token Limit Exceeded', + }, + ); + } + throw new Response(null, { status: 500, statusText: 'Internal Server Error', @@ -120,6 +160,16 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) { const dynamicMaxTokens = modelDetails ? getCompletionTokenLimit(modelDetails) : Math.min(MAX_TOKENS, 16384); + // Validate token limits before making API request + const validation = validateTokenLimits(modelDetails, dynamicMaxTokens); + + if (!validation.valid) { + throw new Response(validation.error, { + status: 400, + statusText: 'Token Limit Exceeded', + }); + } + const providerInfo = PROVIDER_LIST.find((p) => p.name === provider.name); if (!providerInfo) { @@ -215,6 +265,29 @@ async function llmCallAction({ context, request }: ActionFunctionArgs) { ); } + // Handle token limit errors with helpful messages + if ( + error instanceof Error && + (error.message?.includes('max_tokens') || + error.message?.includes('token') || + error.message?.includes('exceeds') || + error.message?.includes('maximum')) + ) { + return new Response( + JSON.stringify({ + ...errorResponse, + message: `Token limit error: ${error.message}. Try reducing your request size or using a model with higher token limits.`, + statusCode: 400, + isRetryable: false, + }), + { + status: 400, + headers: { 'Content-Type': 'application/json' }, + statusText: 'Token Limit Exceeded', + }, + ); + } + return new Response(JSON.stringify(errorResponse), { status: errorResponse.statusCode, headers: { 'Content-Type': 'application/json' }, diff --git a/vite.config.ts b/vite.config.ts index 1db5872..7847a17 100644 --- a/vite.config.ts +++ b/vite.config.ts @@ -6,6 +6,9 @@ import { optimizeCssModules } from 'vite-plugin-optimize-css-modules'; import tsconfigPaths from 'vite-tsconfig-paths'; import * as dotenv from 'dotenv'; +// Load environment variables from multiple files +dotenv.config({ path: '.env.local' }); +dotenv.config({ path: '.env' }); dotenv.config(); export default defineConfig((config) => {