When AI models fail to use proper artifact tags, code blocks now get automatically detected and converted to file artifacts, preventing code from appearing in chat. The parser detects markdown code fences outside artifacts and wraps them with proper artifact/action tags. This fixes the issue where code would randomly appear in chat instead of being generated as files in the workspace. Fixes #1230 Co-Authored-By: Keoma Wright <founder@lovemedia.org.za>
538 lines
16 KiB
TypeScript
538 lines
16 KiB
TypeScript
import type { ActionType, BoltAction, BoltActionData, FileAction, ShellAction, SupabaseAction } from '~/types/actions';
|
|
import type { BoltArtifactData } from '~/types/artifact';
|
|
import { createScopedLogger } from '~/utils/logger';
|
|
import { unreachable } from '~/utils/unreachable';
|
|
|
|
const ARTIFACT_TAG_OPEN = '<boltArtifact';
|
|
const ARTIFACT_TAG_CLOSE = '</boltArtifact>';
|
|
const ARTIFACT_ACTION_TAG_OPEN = '<boltAction';
|
|
const ARTIFACT_ACTION_TAG_CLOSE = '</boltAction>';
|
|
const BOLT_QUICK_ACTIONS_OPEN = '<bolt-quick-actions>';
|
|
const BOLT_QUICK_ACTIONS_CLOSE = '</bolt-quick-actions>';
|
|
|
|
const logger = createScopedLogger('MessageParser');
|
|
|
|
export interface ArtifactCallbackData extends BoltArtifactData {
|
|
messageId: string;
|
|
}
|
|
|
|
export interface ActionCallbackData {
|
|
artifactId: string;
|
|
messageId: string;
|
|
actionId: string;
|
|
action: BoltAction;
|
|
}
|
|
|
|
export type ArtifactCallback = (data: ArtifactCallbackData) => void;
|
|
export type ActionCallback = (data: ActionCallbackData) => void;
|
|
|
|
export interface ParserCallbacks {
|
|
onArtifactOpen?: ArtifactCallback;
|
|
onArtifactClose?: ArtifactCallback;
|
|
onActionOpen?: ActionCallback;
|
|
onActionStream?: ActionCallback;
|
|
onActionClose?: ActionCallback;
|
|
}
|
|
|
|
interface ElementFactoryProps {
|
|
messageId: string;
|
|
}
|
|
|
|
type ElementFactory = (props: ElementFactoryProps) => string;
|
|
|
|
export interface StreamingMessageParserOptions {
|
|
callbacks?: ParserCallbacks;
|
|
artifactElement?: ElementFactory;
|
|
}
|
|
|
|
interface MessageState {
|
|
position: number;
|
|
insideArtifact: boolean;
|
|
insideAction: boolean;
|
|
currentArtifact?: BoltArtifactData;
|
|
currentAction: BoltActionData;
|
|
actionId: number;
|
|
}
|
|
|
|
function cleanoutMarkdownSyntax(content: string) {
|
|
const codeBlockRegex = /^\s*```\w*\n([\s\S]*?)\n\s*```\s*$/;
|
|
const match = content.match(codeBlockRegex);
|
|
|
|
// console.log('matching', !!match, content);
|
|
|
|
if (match) {
|
|
return match[1]; // Remove common leading 4-space indent
|
|
} else {
|
|
return content;
|
|
}
|
|
}
|
|
|
|
function cleanEscapedTags(content: string) {
|
|
return content.replace(/</g, '<').replace(/>/g, '>');
|
|
}
|
|
export class StreamingMessageParser {
|
|
#messages = new Map<string, MessageState>();
|
|
#artifactCounter = 0;
|
|
|
|
constructor(private _options: StreamingMessageParserOptions = {}) {}
|
|
|
|
parse(messageId: string, input: string) {
|
|
let state = this.#messages.get(messageId);
|
|
|
|
if (!state) {
|
|
state = {
|
|
position: 0,
|
|
insideAction: false,
|
|
insideArtifact: false,
|
|
currentAction: { content: '' },
|
|
actionId: 0,
|
|
};
|
|
|
|
this.#messages.set(messageId, state);
|
|
}
|
|
|
|
let output = '';
|
|
let i = state.position;
|
|
let earlyBreak = false;
|
|
|
|
while (i < input.length) {
|
|
if (input.startsWith(BOLT_QUICK_ACTIONS_OPEN, i)) {
|
|
console.log('input:', input.slice(i));
|
|
|
|
const actionsBlockEnd = input.indexOf(BOLT_QUICK_ACTIONS_CLOSE, i);
|
|
|
|
if (actionsBlockEnd !== -1) {
|
|
const actionsBlockContent = input.slice(i + BOLT_QUICK_ACTIONS_OPEN.length, actionsBlockEnd);
|
|
|
|
// Find all <bolt-quick-action ...>label</bolt-quick-action> inside
|
|
const quickActionRegex = /<bolt-quick-action([^>]*)>([\s\S]*?)<\/bolt-quick-action>/g;
|
|
let match;
|
|
const buttons = [];
|
|
|
|
while ((match = quickActionRegex.exec(actionsBlockContent)) !== null) {
|
|
const tagAttrs = match[1];
|
|
const label = match[2];
|
|
const type = this.#extractAttribute(tagAttrs, 'type');
|
|
const message = this.#extractAttribute(tagAttrs, 'message');
|
|
const path = this.#extractAttribute(tagAttrs, 'path');
|
|
const href = this.#extractAttribute(tagAttrs, 'href');
|
|
buttons.push(
|
|
createQuickActionElement(
|
|
{ type: type || '', message: message || '', path: path || '', href: href || '' },
|
|
label,
|
|
),
|
|
);
|
|
}
|
|
output += createQuickActionGroup(buttons);
|
|
i = actionsBlockEnd + BOLT_QUICK_ACTIONS_CLOSE.length;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (state.insideArtifact) {
|
|
const currentArtifact = state.currentArtifact;
|
|
|
|
if (currentArtifact === undefined) {
|
|
unreachable('Artifact not initialized');
|
|
}
|
|
|
|
if (state.insideAction) {
|
|
const closeIndex = input.indexOf(ARTIFACT_ACTION_TAG_CLOSE, i);
|
|
|
|
const currentAction = state.currentAction;
|
|
|
|
if (closeIndex !== -1) {
|
|
currentAction.content += input.slice(i, closeIndex);
|
|
|
|
let content = currentAction.content.trim();
|
|
|
|
if ('type' in currentAction && currentAction.type === 'file') {
|
|
// Remove markdown code block syntax if present and file is not markdown
|
|
if (!currentAction.filePath.endsWith('.md')) {
|
|
content = cleanoutMarkdownSyntax(content);
|
|
content = cleanEscapedTags(content);
|
|
}
|
|
|
|
content += '\n';
|
|
}
|
|
|
|
currentAction.content = content;
|
|
|
|
this._options.callbacks?.onActionClose?.({
|
|
artifactId: currentArtifact.id,
|
|
messageId,
|
|
|
|
/**
|
|
* We decrement the id because it's been incremented already
|
|
* when `onActionOpen` was emitted to make sure the ids are
|
|
* the same.
|
|
*/
|
|
actionId: String(state.actionId - 1),
|
|
|
|
action: currentAction as BoltAction,
|
|
});
|
|
|
|
state.insideAction = false;
|
|
state.currentAction = { content: '' };
|
|
|
|
i = closeIndex + ARTIFACT_ACTION_TAG_CLOSE.length;
|
|
} else {
|
|
if ('type' in currentAction && currentAction.type === 'file') {
|
|
let content = input.slice(i);
|
|
|
|
if (!currentAction.filePath.endsWith('.md')) {
|
|
content = cleanoutMarkdownSyntax(content);
|
|
content = cleanEscapedTags(content);
|
|
}
|
|
|
|
this._options.callbacks?.onActionStream?.({
|
|
artifactId: currentArtifact.id,
|
|
messageId,
|
|
actionId: String(state.actionId - 1),
|
|
action: {
|
|
...(currentAction as FileAction),
|
|
content,
|
|
filePath: currentAction.filePath,
|
|
},
|
|
});
|
|
}
|
|
|
|
break;
|
|
}
|
|
} else {
|
|
const actionOpenIndex = input.indexOf(ARTIFACT_ACTION_TAG_OPEN, i);
|
|
const artifactCloseIndex = input.indexOf(ARTIFACT_TAG_CLOSE, i);
|
|
|
|
if (actionOpenIndex !== -1 && (artifactCloseIndex === -1 || actionOpenIndex < artifactCloseIndex)) {
|
|
const actionEndIndex = input.indexOf('>', actionOpenIndex);
|
|
|
|
if (actionEndIndex !== -1) {
|
|
state.insideAction = true;
|
|
|
|
state.currentAction = this.#parseActionTag(input, actionOpenIndex, actionEndIndex);
|
|
|
|
this._options.callbacks?.onActionOpen?.({
|
|
artifactId: currentArtifact.id,
|
|
messageId,
|
|
actionId: String(state.actionId++),
|
|
action: state.currentAction as BoltAction,
|
|
});
|
|
|
|
i = actionEndIndex + 1;
|
|
} else {
|
|
break;
|
|
}
|
|
} else if (artifactCloseIndex !== -1) {
|
|
this._options.callbacks?.onArtifactClose?.({ messageId, ...currentArtifact });
|
|
|
|
state.insideArtifact = false;
|
|
state.currentArtifact = undefined;
|
|
|
|
i = artifactCloseIndex + ARTIFACT_TAG_CLOSE.length;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
} else if (input[i] === '<' && input[i + 1] !== '/') {
|
|
let j = i;
|
|
let potentialTag = '';
|
|
|
|
while (j < input.length && potentialTag.length < ARTIFACT_TAG_OPEN.length) {
|
|
potentialTag += input[j];
|
|
|
|
if (potentialTag === ARTIFACT_TAG_OPEN) {
|
|
const nextChar = input[j + 1];
|
|
|
|
if (nextChar && nextChar !== '>' && nextChar !== ' ') {
|
|
output += input.slice(i, j + 1);
|
|
i = j + 1;
|
|
break;
|
|
}
|
|
|
|
const openTagEnd = input.indexOf('>', j);
|
|
|
|
if (openTagEnd !== -1) {
|
|
const artifactTag = input.slice(i, openTagEnd + 1);
|
|
|
|
const artifactTitle = this.#extractAttribute(artifactTag, 'title') as string;
|
|
const type = this.#extractAttribute(artifactTag, 'type') as string;
|
|
const artifactId = this.#extractAttribute(artifactTag, 'id') as string;
|
|
|
|
if (!artifactTitle) {
|
|
logger.warn('Artifact title missing');
|
|
}
|
|
|
|
if (!artifactId) {
|
|
logger.warn('Artifact id missing');
|
|
}
|
|
|
|
state.insideArtifact = true;
|
|
|
|
const currentArtifact = {
|
|
id: artifactId,
|
|
title: artifactTitle,
|
|
type,
|
|
} satisfies BoltArtifactData;
|
|
|
|
state.currentArtifact = currentArtifact;
|
|
|
|
this._options.callbacks?.onArtifactOpen?.({ messageId, ...currentArtifact });
|
|
|
|
const artifactFactory = this._options.artifactElement ?? createArtifactElement;
|
|
|
|
output += artifactFactory({ messageId });
|
|
|
|
i = openTagEnd + 1;
|
|
} else {
|
|
earlyBreak = true;
|
|
}
|
|
|
|
break;
|
|
} else if (!ARTIFACT_TAG_OPEN.startsWith(potentialTag)) {
|
|
output += input.slice(i, j + 1);
|
|
i = j + 1;
|
|
break;
|
|
}
|
|
|
|
j++;
|
|
}
|
|
|
|
if (j === input.length && ARTIFACT_TAG_OPEN.startsWith(potentialTag)) {
|
|
break;
|
|
}
|
|
} else {
|
|
// Check for code blocks outside of artifacts
|
|
if (!state.insideArtifact && input[i] === '`' && input[i + 1] === '`' && input[i + 2] === '`') {
|
|
// Find the end of the code block
|
|
const languageEnd = input.indexOf('\n', i + 3);
|
|
|
|
if (languageEnd !== -1) {
|
|
const codeBlockEnd = input.indexOf('\n```', languageEnd + 1);
|
|
|
|
if (codeBlockEnd !== -1) {
|
|
// Extract language and code content
|
|
const language = input.slice(i + 3, languageEnd).trim();
|
|
const codeContent = input.slice(languageEnd + 1, codeBlockEnd);
|
|
|
|
// Determine file extension based on language
|
|
const fileExtension = this.#getFileExtension(language);
|
|
const fileName = `code_${++this.#artifactCounter}${fileExtension}`;
|
|
|
|
// Auto-generate artifact and action tags
|
|
const artifactId = `artifact_${Date.now()}_${this.#artifactCounter}`;
|
|
const autoArtifact = {
|
|
id: artifactId,
|
|
title: fileName,
|
|
type: 'code',
|
|
};
|
|
|
|
// Emit artifact open callback
|
|
this._options.callbacks?.onArtifactOpen?.({ messageId, ...autoArtifact });
|
|
|
|
// Add artifact element to output
|
|
const artifactFactory = this._options.artifactElement ?? createArtifactElement;
|
|
output += artifactFactory({ messageId });
|
|
|
|
// Emit action for file creation
|
|
const fileAction = {
|
|
type: 'file' as const,
|
|
filePath: fileName,
|
|
content: codeContent + '\n',
|
|
};
|
|
|
|
this._options.callbacks?.onActionOpen?.({
|
|
artifactId,
|
|
messageId,
|
|
actionId: String(state.actionId++),
|
|
action: fileAction,
|
|
});
|
|
|
|
this._options.callbacks?.onActionClose?.({
|
|
artifactId,
|
|
messageId,
|
|
actionId: String(state.actionId - 1),
|
|
action: fileAction,
|
|
});
|
|
|
|
// Emit artifact close callback
|
|
this._options.callbacks?.onArtifactClose?.({ messageId, ...autoArtifact });
|
|
|
|
// Move position past the code block
|
|
i = codeBlockEnd + 4; // +4 for \n```
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
output += input[i];
|
|
i++;
|
|
}
|
|
|
|
if (earlyBreak) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
state.position = i;
|
|
|
|
return output;
|
|
}
|
|
|
|
reset() {
|
|
this.#messages.clear();
|
|
}
|
|
|
|
#parseActionTag(input: string, actionOpenIndex: number, actionEndIndex: number) {
|
|
const actionTag = input.slice(actionOpenIndex, actionEndIndex + 1);
|
|
|
|
const actionType = this.#extractAttribute(actionTag, 'type') as ActionType;
|
|
|
|
const actionAttributes = {
|
|
type: actionType,
|
|
content: '',
|
|
};
|
|
|
|
if (actionType === 'supabase') {
|
|
const operation = this.#extractAttribute(actionTag, 'operation');
|
|
|
|
if (!operation || !['migration', 'query'].includes(operation)) {
|
|
logger.warn(`Invalid or missing operation for Supabase action: ${operation}`);
|
|
throw new Error(`Invalid Supabase operation: ${operation}`);
|
|
}
|
|
|
|
(actionAttributes as SupabaseAction).operation = operation as 'migration' | 'query';
|
|
|
|
if (operation === 'migration') {
|
|
const filePath = this.#extractAttribute(actionTag, 'filePath');
|
|
|
|
if (!filePath) {
|
|
logger.warn('Migration requires a filePath');
|
|
throw new Error('Migration requires a filePath');
|
|
}
|
|
|
|
(actionAttributes as SupabaseAction).filePath = filePath;
|
|
}
|
|
} else if (actionType === 'file') {
|
|
const filePath = this.#extractAttribute(actionTag, 'filePath') as string;
|
|
|
|
if (!filePath) {
|
|
logger.debug('File path not specified');
|
|
}
|
|
|
|
(actionAttributes as FileAction).filePath = filePath;
|
|
} else if (!['shell', 'start'].includes(actionType)) {
|
|
logger.warn(`Unknown action type '${actionType}'`);
|
|
}
|
|
|
|
return actionAttributes as FileAction | ShellAction;
|
|
}
|
|
|
|
#extractAttribute(tag: string, attributeName: string): string | undefined {
|
|
const match = tag.match(new RegExp(`${attributeName}="([^"]*)"`, 'i'));
|
|
return match ? match[1] : undefined;
|
|
}
|
|
|
|
#getFileExtension(language: string): string {
|
|
const languageMap: Record<string, string> = {
|
|
javascript: '.js',
|
|
js: '.js',
|
|
typescript: '.ts',
|
|
ts: '.ts',
|
|
jsx: '.jsx',
|
|
tsx: '.tsx',
|
|
python: '.py',
|
|
py: '.py',
|
|
java: '.java',
|
|
c: '.c',
|
|
cpp: '.cpp',
|
|
'c++': '.cpp',
|
|
csharp: '.cs',
|
|
'c#': '.cs',
|
|
php: '.php',
|
|
ruby: '.rb',
|
|
rb: '.rb',
|
|
go: '.go',
|
|
rust: '.rs',
|
|
rs: '.rs',
|
|
kotlin: '.kt',
|
|
kt: '.kt',
|
|
swift: '.swift',
|
|
html: '.html',
|
|
css: '.css',
|
|
scss: '.scss',
|
|
sass: '.sass',
|
|
less: '.less',
|
|
xml: '.xml',
|
|
json: '.json',
|
|
yaml: '.yaml',
|
|
yml: '.yml',
|
|
toml: '.toml',
|
|
markdown: '.md',
|
|
md: '.md',
|
|
sql: '.sql',
|
|
sh: '.sh',
|
|
bash: '.sh',
|
|
zsh: '.sh',
|
|
fish: '.fish',
|
|
powershell: '.ps1',
|
|
ps1: '.ps1',
|
|
dockerfile: '.dockerfile',
|
|
docker: '.dockerfile',
|
|
makefile: '.makefile',
|
|
make: '.makefile',
|
|
vim: '.vim',
|
|
lua: '.lua',
|
|
perl: '.pl',
|
|
r: '.r',
|
|
matlab: '.m',
|
|
julia: '.jl',
|
|
scala: '.scala',
|
|
clojure: '.clj',
|
|
haskell: '.hs',
|
|
erlang: '.erl',
|
|
elixir: '.ex',
|
|
nim: '.nim',
|
|
crystal: '.cr',
|
|
dart: '.dart',
|
|
vue: '.vue',
|
|
svelte: '.svelte',
|
|
astro: '.astro',
|
|
};
|
|
|
|
const normalized = language.toLowerCase();
|
|
|
|
return languageMap[normalized] || '.txt';
|
|
}
|
|
}
|
|
|
|
const createArtifactElement: ElementFactory = (props) => {
|
|
const elementProps = [
|
|
'class="__boltArtifact__"',
|
|
...Object.entries(props).map(([key, value]) => {
|
|
return `data-${camelToDashCase(key)}=${JSON.stringify(value)}`;
|
|
}),
|
|
];
|
|
|
|
return `<div ${elementProps.join(' ')}></div>`;
|
|
};
|
|
|
|
function camelToDashCase(input: string) {
|
|
return input.replace(/([a-z])([A-Z])/g, '$1-$2').toLowerCase();
|
|
}
|
|
|
|
function createQuickActionElement(props: Record<string, string>, label: string) {
|
|
const elementProps = [
|
|
'class="__boltQuickAction__"',
|
|
'data-bolt-quick-action="true"',
|
|
...Object.entries(props).map(([key, value]) => `data-${camelToDashCase(key)}=${JSON.stringify(value)}`),
|
|
];
|
|
|
|
console.log('elementProps', `<button ${elementProps.join(' ')}>${label}</button>`);
|
|
|
|
return `<button ${elementProps.join(' ')}>${label}</button>`;
|
|
}
|
|
|
|
function createQuickActionGroup(buttons: string[]) {
|
|
return `<div class=\"__boltQuickAction__\" data-bolt-quick-action=\"true\">${buttons.join('')}</div>`;
|
|
}
|