ISSUES FIXED: - ❌ Invalid JSON response errors during streaming - ❌ Incorrect token limits causing API rejections - ❌ Outdated hardcoded model configurations - ❌ Poor error messages for API failures SOLUTIONS IMPLEMENTED: 🎯 ACCURATE TOKEN LIMITS & CONTEXT SIZES - OpenAI GPT-4o: 128k context (was 8k) - OpenAI GPT-3.5-turbo: 16k context (was 8k) - Anthropic Claude 3.5 Sonnet: 200k context (was 8k) - Anthropic Claude 3 Haiku: 200k context (was 8k) - Google Gemini 1.5 Pro: 2M context (was 8k) - Google Gemini 1.5 Flash: 1M context (was 8k) - Groq Llama models: 128k context (was 8k) - Together models: Updated with accurate limits �� DYNAMIC MODEL FETCHING ENHANCED - Smart context detection from provider APIs - Automatic fallback to known limits when API unavailable - Safety caps to prevent token overflow (100k max) - Intelligent model filtering and deduplication 🛡️ IMPROVED ERROR HANDLING - Specific error messages for Invalid JSON responses - Token limit exceeded warnings with solutions - API key validation with clear guidance - Rate limiting detection and user guidance - Network timeout handling ⚡ PERFORMANCE OPTIMIZATIONS - Reduced static models from 40+ to 12 essential - Enhanced streaming error detection - Better API response validation - Improved context window display (shows M/k units) 🔧 TECHNICAL IMPROVEMENTS - Dynamic model context detection from APIs - Enhanced streaming reliability - Better token limit enforcement - Comprehensive error categorization - Smart model validation before API calls IMPACT: ✅ Eliminates Invalid JSON response errors ✅ Prevents token limit API rejections ✅ Provides accurate model capabilities ✅ Improves user experience with clear errors ✅ Enables full utilization of modern LLM context windows
449 lines
16 KiB
TypeScript
449 lines
16 KiB
TypeScript
import { type ActionFunctionArgs } from '@remix-run/cloudflare';
|
|
import { createDataStream, generateId } from 'ai';
|
|
import { MAX_RESPONSE_SEGMENTS, MAX_TOKENS, type FileMap } from '~/lib/.server/llm/constants';
|
|
import { CONTINUE_PROMPT } from '~/lib/common/prompts/prompts';
|
|
import { streamText, type Messages, type StreamingOptions } from '~/lib/.server/llm/stream-text';
|
|
import SwitchableStream from '~/lib/.server/llm/switchable-stream';
|
|
import type { IProviderSetting } from '~/types/model';
|
|
import { createScopedLogger } from '~/utils/logger';
|
|
import { getFilePaths, selectContext } from '~/lib/.server/llm/select-context';
|
|
import type { ContextAnnotation, ProgressAnnotation } from '~/types/context';
|
|
import { WORK_DIR } from '~/utils/constants';
|
|
import { createSummary } from '~/lib/.server/llm/create-summary';
|
|
import { extractPropertiesFromMessage } from '~/lib/.server/llm/utils';
|
|
import type { DesignScheme } from '~/types/design-scheme';
|
|
import { MCPService } from '~/lib/services/mcpService';
|
|
|
|
export async function action(args: ActionFunctionArgs) {
|
|
return chatAction(args);
|
|
}
|
|
|
|
const logger = createScopedLogger('api.chat');
|
|
|
|
function parseCookies(cookieHeader: string): Record<string, string> {
|
|
const cookies: Record<string, string> = {};
|
|
|
|
const items = cookieHeader.split(';').map((cookie) => cookie.trim());
|
|
|
|
items.forEach((item) => {
|
|
const [name, ...rest] = item.split('=');
|
|
|
|
if (name && rest) {
|
|
const decodedName = decodeURIComponent(name.trim());
|
|
const decodedValue = decodeURIComponent(rest.join('=').trim());
|
|
cookies[decodedName] = decodedValue;
|
|
}
|
|
});
|
|
|
|
return cookies;
|
|
}
|
|
|
|
async function chatAction({ context, request }: ActionFunctionArgs) {
|
|
const { messages, files, promptId, contextOptimization, supabase, chatMode, designScheme, maxLLMSteps } =
|
|
await request.json<{
|
|
messages: Messages;
|
|
files: any;
|
|
promptId?: string;
|
|
contextOptimization: boolean;
|
|
chatMode: 'discuss' | 'build';
|
|
designScheme?: DesignScheme;
|
|
supabase?: {
|
|
isConnected: boolean;
|
|
hasSelectedProject: boolean;
|
|
credentials?: {
|
|
anonKey?: string;
|
|
supabaseUrl?: string;
|
|
};
|
|
};
|
|
maxLLMSteps: number;
|
|
}>();
|
|
|
|
const cookieHeader = request.headers.get('Cookie');
|
|
const apiKeys = JSON.parse(parseCookies(cookieHeader || '').apiKeys || '{}');
|
|
const providerSettings: Record<string, IProviderSetting> = JSON.parse(
|
|
parseCookies(cookieHeader || '').providers || '{}',
|
|
);
|
|
|
|
const stream = new SwitchableStream();
|
|
|
|
const cumulativeUsage = {
|
|
completionTokens: 0,
|
|
promptTokens: 0,
|
|
totalTokens: 0,
|
|
};
|
|
const encoder: TextEncoder = new TextEncoder();
|
|
let progressCounter: number = 1;
|
|
|
|
try {
|
|
const mcpService = MCPService.getInstance();
|
|
const totalMessageContent = messages.reduce((acc, message) => acc + message.content, '');
|
|
logger.debug(`Total message length: ${totalMessageContent.split(' ').length}, words`);
|
|
|
|
let lastChunk: string | undefined = undefined;
|
|
|
|
const dataStream = createDataStream({
|
|
async execute(dataStream) {
|
|
const filePaths = getFilePaths(files || {});
|
|
let filteredFiles: FileMap | undefined = undefined;
|
|
let summary: string | undefined = undefined;
|
|
let messageSliceId = 0;
|
|
|
|
const processedMessages = await mcpService.processToolInvocations(messages, dataStream);
|
|
|
|
if (processedMessages.length > 3) {
|
|
messageSliceId = processedMessages.length - 3;
|
|
}
|
|
|
|
if (filePaths.length > 0 && contextOptimization) {
|
|
logger.debug('Generating Chat Summary');
|
|
dataStream.writeData({
|
|
type: 'progress',
|
|
label: 'summary',
|
|
status: 'in-progress',
|
|
order: progressCounter++,
|
|
message: 'Analysing Request',
|
|
} satisfies ProgressAnnotation);
|
|
|
|
// Create a summary of the chat
|
|
console.log(`Messages count: ${processedMessages.length}`);
|
|
|
|
summary = await createSummary({
|
|
messages: [...processedMessages],
|
|
env: context.cloudflare?.env,
|
|
apiKeys,
|
|
providerSettings,
|
|
promptId,
|
|
contextOptimization,
|
|
onFinish(resp) {
|
|
if (resp.usage) {
|
|
logger.debug('createSummary token usage', JSON.stringify(resp.usage));
|
|
cumulativeUsage.completionTokens += resp.usage.completionTokens || 0;
|
|
cumulativeUsage.promptTokens += resp.usage.promptTokens || 0;
|
|
cumulativeUsage.totalTokens += resp.usage.totalTokens || 0;
|
|
}
|
|
},
|
|
});
|
|
dataStream.writeData({
|
|
type: 'progress',
|
|
label: 'summary',
|
|
status: 'complete',
|
|
order: progressCounter++,
|
|
message: 'Analysis Complete',
|
|
} satisfies ProgressAnnotation);
|
|
|
|
dataStream.writeMessageAnnotation({
|
|
type: 'chatSummary',
|
|
summary,
|
|
chatId: processedMessages.slice(-1)?.[0]?.id,
|
|
} as ContextAnnotation);
|
|
|
|
// Update context buffer
|
|
logger.debug('Updating Context Buffer');
|
|
dataStream.writeData({
|
|
type: 'progress',
|
|
label: 'context',
|
|
status: 'in-progress',
|
|
order: progressCounter++,
|
|
message: 'Determining Files to Read',
|
|
} satisfies ProgressAnnotation);
|
|
|
|
// Select context files
|
|
console.log(`Messages count: ${processedMessages.length}`);
|
|
filteredFiles = await selectContext({
|
|
messages: [...processedMessages],
|
|
env: context.cloudflare?.env,
|
|
apiKeys,
|
|
files,
|
|
providerSettings,
|
|
promptId,
|
|
contextOptimization,
|
|
summary,
|
|
onFinish(resp) {
|
|
if (resp.usage) {
|
|
logger.debug('selectContext token usage', JSON.stringify(resp.usage));
|
|
cumulativeUsage.completionTokens += resp.usage.completionTokens || 0;
|
|
cumulativeUsage.promptTokens += resp.usage.promptTokens || 0;
|
|
cumulativeUsage.totalTokens += resp.usage.totalTokens || 0;
|
|
}
|
|
},
|
|
});
|
|
|
|
if (filteredFiles) {
|
|
logger.debug(`files in context : ${JSON.stringify(Object.keys(filteredFiles))}`);
|
|
}
|
|
|
|
dataStream.writeMessageAnnotation({
|
|
type: 'codeContext',
|
|
files: Object.keys(filteredFiles).map((key) => {
|
|
let path = key;
|
|
|
|
if (path.startsWith(WORK_DIR)) {
|
|
path = path.replace(WORK_DIR, '');
|
|
}
|
|
|
|
return path;
|
|
}),
|
|
} as ContextAnnotation);
|
|
|
|
dataStream.writeData({
|
|
type: 'progress',
|
|
label: 'context',
|
|
status: 'complete',
|
|
order: progressCounter++,
|
|
message: 'Code Files Selected',
|
|
} satisfies ProgressAnnotation);
|
|
|
|
// logger.debug('Code Files Selected');
|
|
}
|
|
|
|
const options: StreamingOptions = {
|
|
supabaseConnection: supabase,
|
|
toolChoice: 'auto',
|
|
tools: mcpService.toolsWithoutExecute,
|
|
maxSteps: maxLLMSteps,
|
|
onStepFinish: ({ toolCalls }) => {
|
|
// add tool call annotations for frontend processing
|
|
toolCalls.forEach((toolCall) => {
|
|
mcpService.processToolCall(toolCall, dataStream);
|
|
});
|
|
},
|
|
onFinish: async ({ text: content, finishReason, usage }) => {
|
|
logger.debug('usage', JSON.stringify(usage));
|
|
|
|
if (usage) {
|
|
cumulativeUsage.completionTokens += usage.completionTokens || 0;
|
|
cumulativeUsage.promptTokens += usage.promptTokens || 0;
|
|
cumulativeUsage.totalTokens += usage.totalTokens || 0;
|
|
}
|
|
|
|
if (finishReason !== 'length') {
|
|
dataStream.writeMessageAnnotation({
|
|
type: 'usage',
|
|
value: {
|
|
completionTokens: cumulativeUsage.completionTokens,
|
|
promptTokens: cumulativeUsage.promptTokens,
|
|
totalTokens: cumulativeUsage.totalTokens,
|
|
},
|
|
});
|
|
dataStream.writeData({
|
|
type: 'progress',
|
|
label: 'response',
|
|
status: 'complete',
|
|
order: progressCounter++,
|
|
message: 'Response Generated',
|
|
} satisfies ProgressAnnotation);
|
|
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
|
|
// stream.close();
|
|
return;
|
|
}
|
|
|
|
if (stream.switches >= MAX_RESPONSE_SEGMENTS) {
|
|
throw Error('Cannot continue message: Maximum segments reached');
|
|
}
|
|
|
|
const switchesLeft = MAX_RESPONSE_SEGMENTS - stream.switches;
|
|
|
|
logger.info(`Reached max token limit (${MAX_TOKENS}): Continuing message (${switchesLeft} switches left)`);
|
|
|
|
const lastUserMessage = processedMessages.filter((x) => x.role == 'user').slice(-1)[0];
|
|
const { model, provider } = extractPropertiesFromMessage(lastUserMessage);
|
|
processedMessages.push({ id: generateId(), role: 'assistant', content });
|
|
processedMessages.push({
|
|
id: generateId(),
|
|
role: 'user',
|
|
content: `[Model: ${model}]\n\n[Provider: ${provider}]\n\n${CONTINUE_PROMPT}`,
|
|
});
|
|
|
|
const result = await streamText({
|
|
messages: [...processedMessages],
|
|
env: context.cloudflare?.env,
|
|
options,
|
|
apiKeys,
|
|
files,
|
|
providerSettings,
|
|
promptId,
|
|
contextOptimization,
|
|
contextFiles: filteredFiles,
|
|
chatMode,
|
|
designScheme,
|
|
summary,
|
|
messageSliceId,
|
|
});
|
|
|
|
result.mergeIntoDataStream(dataStream);
|
|
|
|
(async () => {
|
|
for await (const part of result.fullStream) {
|
|
if (part.type === 'error') {
|
|
const error: any = part.error;
|
|
logger.error(`${error}`);
|
|
|
|
return;
|
|
}
|
|
}
|
|
})();
|
|
|
|
return;
|
|
},
|
|
};
|
|
|
|
dataStream.writeData({
|
|
type: 'progress',
|
|
label: 'response',
|
|
status: 'in-progress',
|
|
order: progressCounter++,
|
|
message: 'Generating Response',
|
|
} satisfies ProgressAnnotation);
|
|
|
|
const result = await streamText({
|
|
messages: [...processedMessages],
|
|
env: context.cloudflare?.env,
|
|
options,
|
|
apiKeys,
|
|
files,
|
|
providerSettings,
|
|
promptId,
|
|
contextOptimization,
|
|
contextFiles: filteredFiles,
|
|
chatMode,
|
|
designScheme,
|
|
summary,
|
|
messageSliceId,
|
|
});
|
|
|
|
(async () => {
|
|
for await (const part of result.fullStream) {
|
|
if (part.type === 'error') {
|
|
const error: any = part.error;
|
|
logger.error('Streaming error:', error);
|
|
|
|
// Enhanced error handling for common streaming issues
|
|
if (error.message?.includes('Invalid JSON response')) {
|
|
logger.error('Invalid JSON response detected - likely malformed API response');
|
|
} else if (error.message?.includes('token')) {
|
|
logger.error('Token-related error detected - possible token limit exceeded');
|
|
}
|
|
|
|
return;
|
|
}
|
|
}
|
|
})();
|
|
result.mergeIntoDataStream(dataStream);
|
|
},
|
|
onError: (error: any) => {
|
|
// Provide more specific error messages for common issues
|
|
const errorMessage = error.message || 'Unknown error';
|
|
|
|
if (errorMessage.includes('model') && errorMessage.includes('not found')) {
|
|
return 'Custom error: Invalid model selected. Please check that the model name is correct and available.';
|
|
}
|
|
|
|
if (errorMessage.includes('Invalid JSON response')) {
|
|
return 'Custom error: The AI service returned an invalid response. This may be due to an invalid model name, API rate limiting, or server issues. Try selecting a different model or check your API key.';
|
|
}
|
|
|
|
if (
|
|
errorMessage.includes('API key') ||
|
|
errorMessage.includes('unauthorized') ||
|
|
errorMessage.includes('authentication')
|
|
) {
|
|
return 'Custom error: Invalid or missing API key. Please check your API key configuration.';
|
|
}
|
|
|
|
if (errorMessage.includes('token') && errorMessage.includes('limit')) {
|
|
return 'Custom error: Token limit exceeded. The conversation is too long for the selected model. Try using a model with larger context window or start a new conversation.';
|
|
}
|
|
|
|
if (errorMessage.includes('rate limit') || errorMessage.includes('429')) {
|
|
return 'Custom error: API rate limit exceeded. Please wait a moment before trying again.';
|
|
}
|
|
|
|
if (errorMessage.includes('network') || errorMessage.includes('timeout')) {
|
|
return 'Custom error: Network error. Please check your internet connection and try again.';
|
|
}
|
|
|
|
return `Custom error: ${errorMessage}`;
|
|
},
|
|
}).pipeThrough(
|
|
new TransformStream({
|
|
transform: (chunk, controller) => {
|
|
if (!lastChunk) {
|
|
lastChunk = ' ';
|
|
}
|
|
|
|
if (typeof chunk === 'string') {
|
|
if (chunk.startsWith('g') && !lastChunk.startsWith('g')) {
|
|
controller.enqueue(encoder.encode(`0: "<div class=\\"__boltThought__\\">"\n`));
|
|
}
|
|
|
|
if (lastChunk.startsWith('g') && !chunk.startsWith('g')) {
|
|
controller.enqueue(encoder.encode(`0: "</div>\\n"\n`));
|
|
}
|
|
}
|
|
|
|
lastChunk = chunk;
|
|
|
|
let transformedChunk = chunk;
|
|
|
|
if (typeof chunk === 'string' && chunk.startsWith('g')) {
|
|
let content = chunk.split(':').slice(1).join(':');
|
|
|
|
if (content.endsWith('\n')) {
|
|
content = content.slice(0, content.length - 1);
|
|
}
|
|
|
|
transformedChunk = `0:${content}\n`;
|
|
}
|
|
|
|
// Convert the string stream to a byte stream
|
|
const str = typeof transformedChunk === 'string' ? transformedChunk : JSON.stringify(transformedChunk);
|
|
controller.enqueue(encoder.encode(str));
|
|
},
|
|
}),
|
|
);
|
|
|
|
return new Response(dataStream, {
|
|
status: 200,
|
|
headers: {
|
|
'Content-Type': 'text/event-stream; charset=utf-8',
|
|
Connection: 'keep-alive',
|
|
'Cache-Control': 'no-cache',
|
|
'Text-Encoding': 'chunked',
|
|
},
|
|
});
|
|
} catch (error: any) {
|
|
logger.error(error);
|
|
|
|
const errorResponse = {
|
|
error: true,
|
|
message: error.message || 'An unexpected error occurred',
|
|
statusCode: error.statusCode || 500,
|
|
isRetryable: error.isRetryable !== false, // Default to retryable unless explicitly false
|
|
provider: error.provider || 'unknown',
|
|
};
|
|
|
|
if (error.message?.includes('API key')) {
|
|
return new Response(
|
|
JSON.stringify({
|
|
...errorResponse,
|
|
message: 'Invalid or missing API key',
|
|
statusCode: 401,
|
|
isRetryable: false,
|
|
}),
|
|
{
|
|
status: 401,
|
|
headers: { 'Content-Type': 'application/json' },
|
|
statusText: 'Unauthorized',
|
|
},
|
|
);
|
|
}
|
|
|
|
return new Response(JSON.stringify(errorResponse), {
|
|
status: errorResponse.statusCode,
|
|
headers: { 'Content-Type': 'application/json' },
|
|
statusText: 'Error',
|
|
});
|
|
}
|
|
}
|