feat: enhance context handling by adding code context selection and implementing summary generation (#1091) #release

* feat: add context annotation types and enhance file handling in LLM processing * feat: enhance context handling by adding chatId to annotations and implementing summary generation * removed useless changes * feat: updated token counts to include optimization requests * prompt fix * logging added * useless logs removed
2025-01-22 22:48:13 +05:30
parent 2ae897aae7
commit 3c56346e83
16 changed files with 1153 additions and 222 deletions
--- a/app/lib/.server/llm/stream-text.ts
+++ b/app/lib/.server/llm/stream-text.ts
@@ -1,162 +1,48 @@
-import { convertToCoreMessages, streamText as _streamText } from 'ai';
-import { MAX_TOKENS } from './constants';
+import { convertToCoreMessages, streamText as _streamText, type Message } from 'ai';
+import { MAX_TOKENS, type FileMap } from './constants';
 import { getSystemPrompt } from '~/lib/common/prompts/prompts';
-import {
-  DEFAULT_MODEL,
-  DEFAULT_PROVIDER,
-  MODEL_REGEX,
-  MODIFICATIONS_TAG_NAME,
-  PROVIDER_LIST,
-  PROVIDER_REGEX,
-  WORK_DIR,
-} from '~/utils/constants';
-import ignore from 'ignore';
+import { DEFAULT_MODEL, DEFAULT_PROVIDER, MODIFICATIONS_TAG_NAME, PROVIDER_LIST, WORK_DIR } from '~/utils/constants';
 import type { IProviderSetting } from '~/types/model';
 import { PromptLibrary } from '~/lib/common/prompt-library';
 import { allowedHTMLElements } from '~/utils/markdown';
 import { LLMManager } from '~/lib/modules/llm/manager';
 import { createScopedLogger } from '~/utils/logger';
-
-interface ToolResult<Name extends string, Args, Result> {
-  toolCallId: string;
-  toolName: Name;
-  args: Args;
-  result: Result;
-}
-
-interface Message {
-  role: 'user' | 'assistant';
-  content: string;
-  toolInvocations?: ToolResult<string, unknown, unknown>[];
-  model?: string;
-}
+import { createFilesContext, extractPropertiesFromMessage, simplifyBoltActions } from './utils';
+import { getFilePaths } from './select-context';

 export type Messages = Message[];

 export type StreamingOptions = Omit<Parameters<typeof _streamText>[0], 'model'>;

-export interface File {
-  type: 'file';
-  content: string;
-  isBinary: boolean;
-}
-
-export interface Folder {
-  type: 'folder';
-}
-
-type Dirent = File | Folder;
-
-export type FileMap = Record<string, Dirent | undefined>;
-
-export function simplifyBoltActions(input: string): string {
-  // Using regex to match boltAction tags that have type="file"
-  const regex = /(<boltAction[^>]*type="file"[^>]*>)([\s\S]*?)(<\/boltAction>)/g;
-
-  // Replace each matching occurrence
-  return input.replace(regex, (_0, openingTag, _2, closingTag) => {
-    return `${openingTag}\n          ...\n        ${closingTag}`;
-  });
-}
-
-// Common patterns to ignore, similar to .gitignore
-const IGNORE_PATTERNS = [
-  'node_modules/**',
-  '.git/**',
-  'dist/**',
-  'build/**',
-  '.next/**',
-  'coverage/**',
-  '.cache/**',
-  '.vscode/**',
-  '.idea/**',
-  '**/*.log',
-  '**/.DS_Store',
-  '**/npm-debug.log*',
-  '**/yarn-debug.log*',
-  '**/yarn-error.log*',
-  '**/*lock.json',
-  '**/*lock.yml',
-];
-const ig = ignore().add(IGNORE_PATTERNS);
-
-function createFilesContext(files: FileMap) {
-  let filePaths = Object.keys(files);
-  filePaths = filePaths.filter((x) => {
-    const relPath = x.replace('/home/project/', '');
-    return !ig.ignores(relPath);
-  });
-
-  const fileContexts = filePaths
-    .filter((x) => files[x] && files[x].type == 'file')
-    .map((path) => {
-      const dirent = files[path];
-
-      if (!dirent || dirent.type == 'folder') {
-        return '';
-      }
-
-      return `<file path="${path}">\n${dirent.content}\n</file>`;
-    });
-
-  return `Below are the code files present in the webcontainer:\n <codebase>\n${fileContexts.join('\n\n')}\n</codebase>`;
-}
-
-function extractPropertiesFromMessage(message: Message): { model: string; provider: string; content: string } {
-  const textContent = Array.isArray(message.content)
-    ? message.content.find((item) => item.type === 'text')?.text || ''
-    : message.content;
-
-  const modelMatch = textContent.match(MODEL_REGEX);
-  const providerMatch = textContent.match(PROVIDER_REGEX);
-
-  /*
-   * Extract model
-   * const modelMatch = message.content.match(MODEL_REGEX);
-   */
-  const model = modelMatch ? modelMatch[1] : DEFAULT_MODEL;
-
-  /*
-   * Extract provider
-   * const providerMatch = message.content.match(PROVIDER_REGEX);
-   */
-  const provider = providerMatch ? providerMatch[1] : DEFAULT_PROVIDER.name;
-
-  const cleanedContent = Array.isArray(message.content)
-    ? message.content.map((item) => {
-        if (item.type === 'text') {
-          return {
-            type: 'text',
-            text: item.text?.replace(MODEL_REGEX, '').replace(PROVIDER_REGEX, ''),
-          };
-        }
-
-        return item; // Preserve image_url and other types as is
-      })
-    : textContent.replace(MODEL_REGEX, '').replace(PROVIDER_REGEX, '');
-
-  return { model, provider, content: cleanedContent };
-}
-
 const logger = createScopedLogger('stream-text');

 export async function streamText(props: {
-  messages: Messages;
-  env: Env;
+  messages: Omit<Message, 'id'>[];
+  env?: Env;
  options?: StreamingOptions;
  apiKeys?: Record<string, string>;
  files?: FileMap;
  providerSettings?: Record<string, IProviderSetting>;
  promptId?: string;
  contextOptimization?: boolean;
+  contextFiles?: FileMap;
+  summary?: string;
 }) {
-  const { messages, env: serverEnv, options, apiKeys, files, providerSettings, promptId, contextOptimization } = props;
-
-  // console.log({serverEnv});
-
+  const {
+    messages,
+    env: serverEnv,
+    options,
+    apiKeys,
+    files,
+    providerSettings,
+    promptId,
+    contextOptimization,
+    contextFiles,
+    summary,
+  } = props;
  let currentModel = DEFAULT_MODEL;
  let currentProvider = DEFAULT_PROVIDER.name;
-  const processedMessages = messages.map((message) => {
+  let processedMessages = messages.map((message) => {
    if (message.role === 'user') {
      const { model, provider, content } = extractPropertiesFromMessage(message);
      currentModel = model;
@@ -214,13 +100,44 @@ export async function streamText(props: {
      modificationTagName: MODIFICATIONS_TAG_NAME,
    }) ?? getSystemPrompt();

-  if (files && contextOptimization) {
-    const codeContext = createFilesContext(files);
-    systemPrompt = `${systemPrompt}\n\n ${codeContext}`;
+  if (files && contextFiles && contextOptimization) {
+    const codeContext = createFilesContext(contextFiles, true);
+    const filePaths = getFilePaths(files);
+
+    systemPrompt = `${systemPrompt}
+Below are all the files present in the project:
+---
+${filePaths.join('\n')}
+---
+
+Below is the context loaded into context buffer for you to have knowledge of and might need changes to fullfill current user request.
+CONTEXT BUFFER:
+---
+${codeContext}
+---
+`;
+
+    if (summary) {
+      systemPrompt = `${systemPrompt}
+      below is the chat history till now
+CHAT SUMMARY:
+---
+${props.summary}
+---
+`;
+
+      const lastMessage = processedMessages.pop();
+
+      if (lastMessage) {
+        processedMessages = [lastMessage];
+      }
+    }
  }

  logger.info(`Sending llm call to ${provider.name} with model ${modelDetails.name}`);

+  // console.log(systemPrompt,processedMessages);
+
  return await _streamText({
    model: provider.getModelInstance({
      model: modelDetails.name,