Files
bolt-diy/app/lib/.server/llm/stream-text.ts
Stijnus b5d9055851 🔧 Fix Token Limits & Invalid JSON Response Errors (#1934)
ISSUES FIXED:
-  Invalid JSON response errors during streaming
-  Incorrect token limits causing API rejections
-  Outdated hardcoded model configurations
-  Poor error messages for API failures

SOLUTIONS IMPLEMENTED:

🎯 ACCURATE TOKEN LIMITS & CONTEXT SIZES
- OpenAI GPT-4o: 128k context (was 8k)
- OpenAI GPT-3.5-turbo: 16k context (was 8k)
- Anthropic Claude 3.5 Sonnet: 200k context (was 8k)
- Anthropic Claude 3 Haiku: 200k context (was 8k)
- Google Gemini 1.5 Pro: 2M context (was 8k)
- Google Gemini 1.5 Flash: 1M context (was 8k)
- Groq Llama models: 128k context (was 8k)
- Together models: Updated with accurate limits

�� DYNAMIC MODEL FETCHING ENHANCED
- Smart context detection from provider APIs
- Automatic fallback to known limits when API unavailable
- Safety caps to prevent token overflow (100k max)
- Intelligent model filtering and deduplication

🛡️ IMPROVED ERROR HANDLING
- Specific error messages for Invalid JSON responses
- Token limit exceeded warnings with solutions
- API key validation with clear guidance
- Rate limiting detection and user guidance
- Network timeout handling

 PERFORMANCE OPTIMIZATIONS
- Reduced static models from 40+ to 12 essential
- Enhanced streaming error detection
- Better API response validation
- Improved context window display (shows M/k units)

🔧 TECHNICAL IMPROVEMENTS
- Dynamic model context detection from APIs
- Enhanced streaming reliability
- Better token limit enforcement
- Comprehensive error categorization
- Smart model validation before API calls

IMPACT:
 Eliminates Invalid JSON response errors
 Prevents token limit API rejections
 Provides accurate model capabilities
 Improves user experience with clear errors
 Enables full utilization of modern LLM context windows
2025-08-29 20:53:57 +02:00

222 lines
7.2 KiB
TypeScript

import { convertToCoreMessages, streamText as _streamText, type Message } from 'ai';
import { MAX_TOKENS, type FileMap } from './constants';
import { getSystemPrompt } from '~/lib/common/prompts/prompts';
import { DEFAULT_MODEL, DEFAULT_PROVIDER, MODIFICATIONS_TAG_NAME, PROVIDER_LIST, WORK_DIR } from '~/utils/constants';
import type { IProviderSetting } from '~/types/model';
import { PromptLibrary } from '~/lib/common/prompt-library';
import { allowedHTMLElements } from '~/utils/markdown';
import { LLMManager } from '~/lib/modules/llm/manager';
import { createScopedLogger } from '~/utils/logger';
import { createFilesContext, extractPropertiesFromMessage } from './utils';
import { discussPrompt } from '~/lib/common/prompts/discuss-prompt';
import type { DesignScheme } from '~/types/design-scheme';
export type Messages = Message[];
export interface StreamingOptions extends Omit<Parameters<typeof _streamText>[0], 'model'> {
supabaseConnection?: {
isConnected: boolean;
hasSelectedProject: boolean;
credentials?: {
anonKey?: string;
supabaseUrl?: string;
};
};
}
const logger = createScopedLogger('stream-text');
function sanitizeText(text: string): string {
let sanitized = text.replace(/<div class=\\"__boltThought__\\">.*?<\/div>/s, '');
sanitized = sanitized.replace(/<think>.*?<\/think>/s, '');
sanitized = sanitized.replace(/<boltAction type="file" filePath="package-lock\.json">[\s\S]*?<\/boltAction>/g, '');
return sanitized.trim();
}
export async function streamText(props: {
messages: Omit<Message, 'id'>[];
env?: Env;
options?: StreamingOptions;
apiKeys?: Record<string, string>;
files?: FileMap;
providerSettings?: Record<string, IProviderSetting>;
promptId?: string;
contextOptimization?: boolean;
contextFiles?: FileMap;
summary?: string;
messageSliceId?: number;
chatMode?: 'discuss' | 'build';
designScheme?: DesignScheme;
}) {
const {
messages,
env: serverEnv,
options,
apiKeys,
files,
providerSettings,
promptId,
contextOptimization,
contextFiles,
summary,
chatMode,
designScheme,
} = props;
let currentModel = DEFAULT_MODEL;
let currentProvider = DEFAULT_PROVIDER.name;
let processedMessages = messages.map((message) => {
const newMessage = { ...message };
if (message.role === 'user') {
const { model, provider, content } = extractPropertiesFromMessage(message);
currentModel = model;
currentProvider = provider;
newMessage.content = sanitizeText(content);
} else if (message.role == 'assistant') {
newMessage.content = sanitizeText(message.content);
}
// Sanitize all text parts in parts array, if present
if (Array.isArray(message.parts)) {
newMessage.parts = message.parts.map((part) =>
part.type === 'text' ? { ...part, text: sanitizeText(part.text) } : part,
);
}
return newMessage;
});
const provider = PROVIDER_LIST.find((p) => p.name === currentProvider) || DEFAULT_PROVIDER;
const staticModels = LLMManager.getInstance().getStaticModelListFromProvider(provider);
let modelDetails = staticModels.find((m) => m.name === currentModel);
if (!modelDetails) {
const modelsList = [
...(provider.staticModels || []),
...(await LLMManager.getInstance().getModelListFromProvider(provider, {
apiKeys,
providerSettings,
serverEnv: serverEnv as any,
})),
];
if (!modelsList.length) {
throw new Error(`No models found for provider ${provider.name}`);
}
modelDetails = modelsList.find((m) => m.name === currentModel);
if (!modelDetails) {
// Check if it's a Google provider and the model name looks like it might be incorrect
if (provider.name === 'Google' && currentModel.includes('2.5')) {
throw new Error(
`Model "${currentModel}" not found. Gemini 2.5 Pro doesn't exist. Available Gemini models include: gemini-1.5-pro, gemini-2.0-flash, gemini-1.5-flash. Please select a valid model.`,
);
}
// Fallback to first model with warning
logger.warn(
`MODEL [${currentModel}] not found in provider [${provider.name}]. Falling back to first model. ${modelsList[0].name}`,
);
modelDetails = modelsList[0];
}
}
const dynamicMaxTokens = modelDetails && modelDetails.maxTokenAllowed ? modelDetails.maxTokenAllowed : MAX_TOKENS;
// Ensure we never exceed reasonable token limits to prevent API errors
const safeMaxTokens = Math.min(dynamicMaxTokens, 100000); // Cap at 100k for safety
logger.info(
`Max tokens for model ${modelDetails.name} is ${safeMaxTokens} (capped from ${dynamicMaxTokens}) based on model limits`,
);
let systemPrompt =
PromptLibrary.getPropmtFromLibrary(promptId || 'default', {
cwd: WORK_DIR,
allowedHtmlElements: allowedHTMLElements,
modificationTagName: MODIFICATIONS_TAG_NAME,
designScheme,
supabase: {
isConnected: options?.supabaseConnection?.isConnected || false,
hasSelectedProject: options?.supabaseConnection?.hasSelectedProject || false,
credentials: options?.supabaseConnection?.credentials || undefined,
},
}) ?? getSystemPrompt();
if (chatMode === 'build' && contextFiles && contextOptimization) {
const codeContext = createFilesContext(contextFiles, true);
systemPrompt = `${systemPrompt}
Below is the artifact containing the context loaded into context buffer for you to have knowledge of and might need changes to fullfill current user request.
CONTEXT BUFFER:
---
${codeContext}
---
`;
if (summary) {
systemPrompt = `${systemPrompt}
below is the chat history till now
CHAT SUMMARY:
---
${props.summary}
---
`;
if (props.messageSliceId) {
processedMessages = processedMessages.slice(props.messageSliceId);
} else {
const lastMessage = processedMessages.pop();
if (lastMessage) {
processedMessages = [lastMessage];
}
}
}
}
const effectiveLockedFilePaths = new Set<string>();
if (files) {
for (const [filePath, fileDetails] of Object.entries(files)) {
if (fileDetails?.isLocked) {
effectiveLockedFilePaths.add(filePath);
}
}
}
if (effectiveLockedFilePaths.size > 0) {
const lockedFilesListString = Array.from(effectiveLockedFilePaths)
.map((filePath) => `- ${filePath}`)
.join('\n');
systemPrompt = `${systemPrompt}
IMPORTANT: The following files are locked and MUST NOT be modified in any way. Do not suggest or make any changes to these files. You can proceed with the request but DO NOT make any changes to these files specifically:
${lockedFilesListString}
---
`;
} else {
console.log('No locked files found from any source for prompt.');
}
logger.info(`Sending llm call to ${provider.name} with model ${modelDetails.name}`);
// console.log(systemPrompt, processedMessages);
return await _streamText({
model: provider.getModelInstance({
model: modelDetails.name,
serverEnv,
apiKeys,
providerSettings,
}),
system: chatMode === 'build' ? systemPrompt : discussPrompt(),
maxTokens: safeMaxTokens,
messages: convertToCoreMessages(processedMessages as any),
...options,
});
}