Revert "fix: resolve chat conversation hanging and stream interruption issues (#1971)"

This reverts commit e68593f22d.
2025-09-07 00:14:13 +02:00
parent e68593f22d
commit 37217a5c7b
61 changed files with 1432 additions and 8811 deletions
--- a/app/lib/.server/llm/stream-text.ts
+++ b/app/lib/.server/llm/stream-text.ts
@@ -11,65 +11,6 @@ import { createFilesContext, extractPropertiesFromMessage } from './utils';
 import { discussPrompt } from '~/lib/common/prompts/discuss-prompt';
 import type { DesignScheme } from '~/types/design-scheme';

-function getSmartAISystemPrompt(basePrompt: string): string {
-  const smartAIEnhancement = `
-## SmartAI Mode - Enhanced Conversational Coding Assistant
-
-You are operating in SmartAI mode, a premium Bolt.gives feature that provides detailed, educational feedback throughout the coding process.
-
-### Your Communication Style:
- Be conversational and friendly, as if pair programming with a colleague
- Explain your thought process clearly and educationally
- Use natural language, not technical jargon unless necessary
- Keep responses visible and engaging
-
-### What to Communicate:
-
-**When Starting Tasks:**
-✨ "I see you want [task description]. Let me [approach explanation]..."
-✨ Explain your understanding and planned approach
-✨ Share why you're choosing specific solutions
-
-**During Implementation:**
-📝 "Now I'm creating/updating [file] to [purpose]..."
-📝 Explain what each code section does
-📝 Share the patterns and best practices you're using
-📝 Discuss any trade-offs or alternatives considered
-
-**When Problem-Solving:**
-🔍 "I noticed [issue]. This is likely because [reasoning]..."
-🔍 Share your debugging thought process
-🔍 Explain how you're identifying and fixing issues
-🔍 Describe why your solution will work
-
-**After Completing Work:**
-✅ "I've successfully [what was done]. The key changes include..."
-✅ Summarize what was accomplished
-✅ Highlight important decisions made
-✅ Suggest potential improvements or next steps
-
-### Example Responses:
-
-Instead of silence:
-"I understand you need a contact form. Let me create a modern, accessible form with proper validation. I'll start by setting up the form structure with semantic HTML..."
-
-While coding:
-"I'm now adding email validation to ensure users enter valid email addresses. I'll use a regex pattern that covers most common email formats while keeping it user-friendly..."
-
-When debugging:
-"I see the button isn't aligning properly with the other elements. This looks like a flexbox issue. Let me adjust the container's display properties to fix the alignment..."
-
-### Remember:
- Users chose SmartAI to learn from your process
- Make every action visible and understandable
- Be their coding companion, not just a silent worker
- Keep the conversation flowing naturally
-
-${basePrompt}`;
-
-  return smartAIEnhancement;
-}
-
 export type Messages = Message[];

 export interface StreamingOptions extends Omit<Parameters<typeof _streamText>[0], 'model'> {
@@ -141,19 +82,13 @@ export async function streamText(props: {
  } = props;
  let currentModel = DEFAULT_MODEL;
  let currentProvider = DEFAULT_PROVIDER.name;
-  let smartAIEnabled = false;
  let processedMessages = messages.map((message) => {
    const newMessage = { ...message };

    if (message.role === 'user') {
-      const { model, provider, content, smartAI } = extractPropertiesFromMessage(message);
+      const { model, provider, content } = extractPropertiesFromMessage(message);
      currentModel = model;
      currentProvider = provider;
-
-      if (smartAI !== undefined) {
-        smartAIEnabled = smartAI;
-      }
-
      newMessage.content = sanitizeText(content);
    } else if (message.role == 'assistant') {
      newMessage.content = sanitizeText(message.content);
@@ -207,39 +142,13 @@ export async function streamText(props: {

  const dynamicMaxTokens = modelDetails ? getCompletionTokenLimit(modelDetails) : Math.min(MAX_TOKENS, 16384);

-  // Additional safety cap - respect model-specific limits
-  let safeMaxTokens = dynamicMaxTokens;
-
-  // Apply model-specific caps for Anthropic models
-  if (modelDetails?.provider === 'Anthropic') {
-    if (modelDetails.name.includes('claude-sonnet-4') || modelDetails.name.includes('claude-opus-4')) {
-      safeMaxTokens = Math.min(dynamicMaxTokens, 64000);
-    } else if (modelDetails.name.includes('claude-3-7-sonnet')) {
-      safeMaxTokens = Math.min(dynamicMaxTokens, 64000);
-    } else if (modelDetails.name.includes('claude-3-5-sonnet')) {
-      safeMaxTokens = Math.min(dynamicMaxTokens, 8192);
-    } else {
-      safeMaxTokens = Math.min(dynamicMaxTokens, 4096);
-    }
-  } else {
-    // General safety cap for other providers
-    safeMaxTokens = Math.min(dynamicMaxTokens, 128000);
-  }
+  // Use model-specific limits directly - no artificial cap needed
+  const safeMaxTokens = dynamicMaxTokens;

  logger.info(
-    `Max tokens for model ${modelDetails.name} is ${safeMaxTokens} (capped from ${dynamicMaxTokens}) based on model limits`,
+    `Token limits for model ${modelDetails.name}: maxTokens=${safeMaxTokens}, maxTokenAllowed=${modelDetails.maxTokenAllowed}, maxCompletionTokens=${modelDetails.maxCompletionTokens}`,
  );

-  /*
-   * Check if SmartAI is enabled for supported models
-   * SmartAI is enabled if either:
-   * 1. The model itself has isSmartAIEnabled flag (for models with SmartAI in name)
-   * 2. The user explicitly enabled it via message flag
-   */
-  const isSmartAISupported =
-    modelDetails?.supportsSmartAI && (provider.name === 'Anthropic' || provider.name === 'OpenAI');
-  const useSmartAI = (modelDetails?.isSmartAIEnabled || smartAIEnabled) && isSmartAISupported;
-
  let systemPrompt =
    PromptLibrary.getPropmtFromLibrary(promptId || 'default', {
      cwd: WORK_DIR,
@@ -253,11 +162,6 @@ export async function streamText(props: {
      },
    }) ?? getSystemPrompt();

-  // Enhance system prompt for SmartAI if enabled and supported
-  if (useSmartAI) {
-    systemPrompt = getSmartAISystemPrompt(systemPrompt);
-  }
-
  if (chatMode === 'build' && contextFiles && contextOptimization) {
    const codeContext = createFilesContext(contextFiles, true);

@@ -317,11 +221,18 @@ export async function streamText(props: {

  logger.info(`Sending llm call to ${provider.name} with model ${modelDetails.name}`);

-  // DEBUG: Log reasoning model detection
+  // Log reasoning model detection and token parameters
  const isReasoning = isReasoningModel(modelDetails.name);
-  logger.info(`DEBUG STREAM: Model "${modelDetails.name}" detected as reasoning model: ${isReasoning}`);
+  logger.info(
+    `Model "${modelDetails.name}" is reasoning model: ${isReasoning}, using ${isReasoning ? 'maxCompletionTokens' : 'maxTokens'}: ${safeMaxTokens}`,
+  );

-  // console.log(systemPrompt, processedMessages);
+  // Validate token limits before API call
+  if (safeMaxTokens > (modelDetails.maxTokenAllowed || 128000)) {
+    logger.warn(
+      `Token limit warning: requesting ${safeMaxTokens} tokens but model supports max ${modelDetails.maxTokenAllowed || 128000}`,
+    );
+  }

  // Use maxCompletionTokens for reasoning models (o1, GPT-5), maxTokens for traditional models
  const tokenParams = isReasoning ? { maxCompletionTokens: safeMaxTokens } : { maxTokens: safeMaxTokens };