feat: enhance context handling by adding code context selection and implementing summary generation (#1091) #release

* feat: add context annotation types and enhance file handling in LLM processing

* feat: enhance context handling by adding chatId to annotations and implementing summary generation

* removed useless changes

* feat: updated token counts to include optimization requests

* prompt fix

* logging added

* useless logs removed
This commit is contained in:
Anirban Kar
2025-01-22 22:48:13 +05:30
committed by GitHub
parent 2ae897aae7
commit 3c56346e83
16 changed files with 1153 additions and 222 deletions

View File

@@ -1,162 +1,48 @@
import { convertToCoreMessages, streamText as _streamText } from 'ai';
import { MAX_TOKENS } from './constants';
import { convertToCoreMessages, streamText as _streamText, type Message } from 'ai';
import { MAX_TOKENS, type FileMap } from './constants';
import { getSystemPrompt } from '~/lib/common/prompts/prompts';
import {
DEFAULT_MODEL,
DEFAULT_PROVIDER,
MODEL_REGEX,
MODIFICATIONS_TAG_NAME,
PROVIDER_LIST,
PROVIDER_REGEX,
WORK_DIR,
} from '~/utils/constants';
import ignore from 'ignore';
import { DEFAULT_MODEL, DEFAULT_PROVIDER, MODIFICATIONS_TAG_NAME, PROVIDER_LIST, WORK_DIR } from '~/utils/constants';
import type { IProviderSetting } from '~/types/model';
import { PromptLibrary } from '~/lib/common/prompt-library';
import { allowedHTMLElements } from '~/utils/markdown';
import { LLMManager } from '~/lib/modules/llm/manager';
import { createScopedLogger } from '~/utils/logger';
interface ToolResult<Name extends string, Args, Result> {
toolCallId: string;
toolName: Name;
args: Args;
result: Result;
}
interface Message {
role: 'user' | 'assistant';
content: string;
toolInvocations?: ToolResult<string, unknown, unknown>[];
model?: string;
}
import { createFilesContext, extractPropertiesFromMessage, simplifyBoltActions } from './utils';
import { getFilePaths } from './select-context';
export type Messages = Message[];
export type StreamingOptions = Omit<Parameters<typeof _streamText>[0], 'model'>;
export interface File {
type: 'file';
content: string;
isBinary: boolean;
}
export interface Folder {
type: 'folder';
}
type Dirent = File | Folder;
export type FileMap = Record<string, Dirent | undefined>;
export function simplifyBoltActions(input: string): string {
// Using regex to match boltAction tags that have type="file"
const regex = /(<boltAction[^>]*type="file"[^>]*>)([\s\S]*?)(<\/boltAction>)/g;
// Replace each matching occurrence
return input.replace(regex, (_0, openingTag, _2, closingTag) => {
return `${openingTag}\n ...\n ${closingTag}`;
});
}
// Common patterns to ignore, similar to .gitignore
const IGNORE_PATTERNS = [
'node_modules/**',
'.git/**',
'dist/**',
'build/**',
'.next/**',
'coverage/**',
'.cache/**',
'.vscode/**',
'.idea/**',
'**/*.log',
'**/.DS_Store',
'**/npm-debug.log*',
'**/yarn-debug.log*',
'**/yarn-error.log*',
'**/*lock.json',
'**/*lock.yml',
];
const ig = ignore().add(IGNORE_PATTERNS);
function createFilesContext(files: FileMap) {
let filePaths = Object.keys(files);
filePaths = filePaths.filter((x) => {
const relPath = x.replace('/home/project/', '');
return !ig.ignores(relPath);
});
const fileContexts = filePaths
.filter((x) => files[x] && files[x].type == 'file')
.map((path) => {
const dirent = files[path];
if (!dirent || dirent.type == 'folder') {
return '';
}
return `<file path="${path}">\n${dirent.content}\n</file>`;
});
return `Below are the code files present in the webcontainer:\n <codebase>\n${fileContexts.join('\n\n')}\n</codebase>`;
}
function extractPropertiesFromMessage(message: Message): { model: string; provider: string; content: string } {
const textContent = Array.isArray(message.content)
? message.content.find((item) => item.type === 'text')?.text || ''
: message.content;
const modelMatch = textContent.match(MODEL_REGEX);
const providerMatch = textContent.match(PROVIDER_REGEX);
/*
* Extract model
* const modelMatch = message.content.match(MODEL_REGEX);
*/
const model = modelMatch ? modelMatch[1] : DEFAULT_MODEL;
/*
* Extract provider
* const providerMatch = message.content.match(PROVIDER_REGEX);
*/
const provider = providerMatch ? providerMatch[1] : DEFAULT_PROVIDER.name;
const cleanedContent = Array.isArray(message.content)
? message.content.map((item) => {
if (item.type === 'text') {
return {
type: 'text',
text: item.text?.replace(MODEL_REGEX, '').replace(PROVIDER_REGEX, ''),
};
}
return item; // Preserve image_url and other types as is
})
: textContent.replace(MODEL_REGEX, '').replace(PROVIDER_REGEX, '');
return { model, provider, content: cleanedContent };
}
const logger = createScopedLogger('stream-text');
export async function streamText(props: {
messages: Messages;
env: Env;
messages: Omit<Message, 'id'>[];
env?: Env;
options?: StreamingOptions;
apiKeys?: Record<string, string>;
files?: FileMap;
providerSettings?: Record<string, IProviderSetting>;
promptId?: string;
contextOptimization?: boolean;
contextFiles?: FileMap;
summary?: string;
}) {
const { messages, env: serverEnv, options, apiKeys, files, providerSettings, promptId, contextOptimization } = props;
// console.log({serverEnv});
const {
messages,
env: serverEnv,
options,
apiKeys,
files,
providerSettings,
promptId,
contextOptimization,
contextFiles,
summary,
} = props;
let currentModel = DEFAULT_MODEL;
let currentProvider = DEFAULT_PROVIDER.name;
const processedMessages = messages.map((message) => {
let processedMessages = messages.map((message) => {
if (message.role === 'user') {
const { model, provider, content } = extractPropertiesFromMessage(message);
currentModel = model;
@@ -214,13 +100,44 @@ export async function streamText(props: {
modificationTagName: MODIFICATIONS_TAG_NAME,
}) ?? getSystemPrompt();
if (files && contextOptimization) {
const codeContext = createFilesContext(files);
systemPrompt = `${systemPrompt}\n\n ${codeContext}`;
if (files && contextFiles && contextOptimization) {
const codeContext = createFilesContext(contextFiles, true);
const filePaths = getFilePaths(files);
systemPrompt = `${systemPrompt}
Below are all the files present in the project:
---
${filePaths.join('\n')}
---
Below is the context loaded into context buffer for you to have knowledge of and might need changes to fullfill current user request.
CONTEXT BUFFER:
---
${codeContext}
---
`;
if (summary) {
systemPrompt = `${systemPrompt}
below is the chat history till now
CHAT SUMMARY:
---
${props.summary}
---
`;
const lastMessage = processedMessages.pop();
if (lastMessage) {
processedMessages = [lastMessage];
}
}
}
logger.info(`Sending llm call to ${provider.name} with model ${modelDetails.name}`);
// console.log(systemPrompt,processedMessages);
return await _streamText({
model: provider.getModelInstance({
model: modelDetails.name,