Files
bolt-diy/app/lib/runtime/message-parser.ts
Keoma Wright 39d0775b37 fix: auto-detect and convert code blocks to artifacts when missing tags
When AI models fail to use proper artifact tags, code blocks now get
automatically detected and converted to file artifacts, preventing code
from appearing in chat. The parser detects markdown code fences outside
artifacts and wraps them with proper artifact/action tags.

This fixes the issue where code would randomly appear in chat instead
of being generated as files in the workspace.

Fixes #1230

Co-Authored-By: Keoma Wright <founder@lovemedia.org.za>
2025-08-24 10:50:15 +00:00

538 lines
16 KiB
TypeScript

import type { ActionType, BoltAction, BoltActionData, FileAction, ShellAction, SupabaseAction } from '~/types/actions';
import type { BoltArtifactData } from '~/types/artifact';
import { createScopedLogger } from '~/utils/logger';
import { unreachable } from '~/utils/unreachable';
const ARTIFACT_TAG_OPEN = '<boltArtifact';
const ARTIFACT_TAG_CLOSE = '</boltArtifact>';
const ARTIFACT_ACTION_TAG_OPEN = '<boltAction';
const ARTIFACT_ACTION_TAG_CLOSE = '</boltAction>';
const BOLT_QUICK_ACTIONS_OPEN = '<bolt-quick-actions>';
const BOLT_QUICK_ACTIONS_CLOSE = '</bolt-quick-actions>';
const logger = createScopedLogger('MessageParser');
export interface ArtifactCallbackData extends BoltArtifactData {
messageId: string;
}
export interface ActionCallbackData {
artifactId: string;
messageId: string;
actionId: string;
action: BoltAction;
}
export type ArtifactCallback = (data: ArtifactCallbackData) => void;
export type ActionCallback = (data: ActionCallbackData) => void;
export interface ParserCallbacks {
onArtifactOpen?: ArtifactCallback;
onArtifactClose?: ArtifactCallback;
onActionOpen?: ActionCallback;
onActionStream?: ActionCallback;
onActionClose?: ActionCallback;
}
interface ElementFactoryProps {
messageId: string;
}
type ElementFactory = (props: ElementFactoryProps) => string;
export interface StreamingMessageParserOptions {
callbacks?: ParserCallbacks;
artifactElement?: ElementFactory;
}
interface MessageState {
position: number;
insideArtifact: boolean;
insideAction: boolean;
currentArtifact?: BoltArtifactData;
currentAction: BoltActionData;
actionId: number;
}
function cleanoutMarkdownSyntax(content: string) {
const codeBlockRegex = /^\s*```\w*\n([\s\S]*?)\n\s*```\s*$/;
const match = content.match(codeBlockRegex);
// console.log('matching', !!match, content);
if (match) {
return match[1]; // Remove common leading 4-space indent
} else {
return content;
}
}
function cleanEscapedTags(content: string) {
return content.replace(/&lt;/g, '<').replace(/&gt;/g, '>');
}
export class StreamingMessageParser {
#messages = new Map<string, MessageState>();
#artifactCounter = 0;
constructor(private _options: StreamingMessageParserOptions = {}) {}
parse(messageId: string, input: string) {
let state = this.#messages.get(messageId);
if (!state) {
state = {
position: 0,
insideAction: false,
insideArtifact: false,
currentAction: { content: '' },
actionId: 0,
};
this.#messages.set(messageId, state);
}
let output = '';
let i = state.position;
let earlyBreak = false;
while (i < input.length) {
if (input.startsWith(BOLT_QUICK_ACTIONS_OPEN, i)) {
console.log('input:', input.slice(i));
const actionsBlockEnd = input.indexOf(BOLT_QUICK_ACTIONS_CLOSE, i);
if (actionsBlockEnd !== -1) {
const actionsBlockContent = input.slice(i + BOLT_QUICK_ACTIONS_OPEN.length, actionsBlockEnd);
// Find all <bolt-quick-action ...>label</bolt-quick-action> inside
const quickActionRegex = /<bolt-quick-action([^>]*)>([\s\S]*?)<\/bolt-quick-action>/g;
let match;
const buttons = [];
while ((match = quickActionRegex.exec(actionsBlockContent)) !== null) {
const tagAttrs = match[1];
const label = match[2];
const type = this.#extractAttribute(tagAttrs, 'type');
const message = this.#extractAttribute(tagAttrs, 'message');
const path = this.#extractAttribute(tagAttrs, 'path');
const href = this.#extractAttribute(tagAttrs, 'href');
buttons.push(
createQuickActionElement(
{ type: type || '', message: message || '', path: path || '', href: href || '' },
label,
),
);
}
output += createQuickActionGroup(buttons);
i = actionsBlockEnd + BOLT_QUICK_ACTIONS_CLOSE.length;
continue;
}
}
if (state.insideArtifact) {
const currentArtifact = state.currentArtifact;
if (currentArtifact === undefined) {
unreachable('Artifact not initialized');
}
if (state.insideAction) {
const closeIndex = input.indexOf(ARTIFACT_ACTION_TAG_CLOSE, i);
const currentAction = state.currentAction;
if (closeIndex !== -1) {
currentAction.content += input.slice(i, closeIndex);
let content = currentAction.content.trim();
if ('type' in currentAction && currentAction.type === 'file') {
// Remove markdown code block syntax if present and file is not markdown
if (!currentAction.filePath.endsWith('.md')) {
content = cleanoutMarkdownSyntax(content);
content = cleanEscapedTags(content);
}
content += '\n';
}
currentAction.content = content;
this._options.callbacks?.onActionClose?.({
artifactId: currentArtifact.id,
messageId,
/**
* We decrement the id because it's been incremented already
* when `onActionOpen` was emitted to make sure the ids are
* the same.
*/
actionId: String(state.actionId - 1),
action: currentAction as BoltAction,
});
state.insideAction = false;
state.currentAction = { content: '' };
i = closeIndex + ARTIFACT_ACTION_TAG_CLOSE.length;
} else {
if ('type' in currentAction && currentAction.type === 'file') {
let content = input.slice(i);
if (!currentAction.filePath.endsWith('.md')) {
content = cleanoutMarkdownSyntax(content);
content = cleanEscapedTags(content);
}
this._options.callbacks?.onActionStream?.({
artifactId: currentArtifact.id,
messageId,
actionId: String(state.actionId - 1),
action: {
...(currentAction as FileAction),
content,
filePath: currentAction.filePath,
},
});
}
break;
}
} else {
const actionOpenIndex = input.indexOf(ARTIFACT_ACTION_TAG_OPEN, i);
const artifactCloseIndex = input.indexOf(ARTIFACT_TAG_CLOSE, i);
if (actionOpenIndex !== -1 && (artifactCloseIndex === -1 || actionOpenIndex < artifactCloseIndex)) {
const actionEndIndex = input.indexOf('>', actionOpenIndex);
if (actionEndIndex !== -1) {
state.insideAction = true;
state.currentAction = this.#parseActionTag(input, actionOpenIndex, actionEndIndex);
this._options.callbacks?.onActionOpen?.({
artifactId: currentArtifact.id,
messageId,
actionId: String(state.actionId++),
action: state.currentAction as BoltAction,
});
i = actionEndIndex + 1;
} else {
break;
}
} else if (artifactCloseIndex !== -1) {
this._options.callbacks?.onArtifactClose?.({ messageId, ...currentArtifact });
state.insideArtifact = false;
state.currentArtifact = undefined;
i = artifactCloseIndex + ARTIFACT_TAG_CLOSE.length;
} else {
break;
}
}
} else if (input[i] === '<' && input[i + 1] !== '/') {
let j = i;
let potentialTag = '';
while (j < input.length && potentialTag.length < ARTIFACT_TAG_OPEN.length) {
potentialTag += input[j];
if (potentialTag === ARTIFACT_TAG_OPEN) {
const nextChar = input[j + 1];
if (nextChar && nextChar !== '>' && nextChar !== ' ') {
output += input.slice(i, j + 1);
i = j + 1;
break;
}
const openTagEnd = input.indexOf('>', j);
if (openTagEnd !== -1) {
const artifactTag = input.slice(i, openTagEnd + 1);
const artifactTitle = this.#extractAttribute(artifactTag, 'title') as string;
const type = this.#extractAttribute(artifactTag, 'type') as string;
const artifactId = this.#extractAttribute(artifactTag, 'id') as string;
if (!artifactTitle) {
logger.warn('Artifact title missing');
}
if (!artifactId) {
logger.warn('Artifact id missing');
}
state.insideArtifact = true;
const currentArtifact = {
id: artifactId,
title: artifactTitle,
type,
} satisfies BoltArtifactData;
state.currentArtifact = currentArtifact;
this._options.callbacks?.onArtifactOpen?.({ messageId, ...currentArtifact });
const artifactFactory = this._options.artifactElement ?? createArtifactElement;
output += artifactFactory({ messageId });
i = openTagEnd + 1;
} else {
earlyBreak = true;
}
break;
} else if (!ARTIFACT_TAG_OPEN.startsWith(potentialTag)) {
output += input.slice(i, j + 1);
i = j + 1;
break;
}
j++;
}
if (j === input.length && ARTIFACT_TAG_OPEN.startsWith(potentialTag)) {
break;
}
} else {
// Check for code blocks outside of artifacts
if (!state.insideArtifact && input[i] === '`' && input[i + 1] === '`' && input[i + 2] === '`') {
// Find the end of the code block
const languageEnd = input.indexOf('\n', i + 3);
if (languageEnd !== -1) {
const codeBlockEnd = input.indexOf('\n```', languageEnd + 1);
if (codeBlockEnd !== -1) {
// Extract language and code content
const language = input.slice(i + 3, languageEnd).trim();
const codeContent = input.slice(languageEnd + 1, codeBlockEnd);
// Determine file extension based on language
const fileExtension = this.#getFileExtension(language);
const fileName = `code_${++this.#artifactCounter}${fileExtension}`;
// Auto-generate artifact and action tags
const artifactId = `artifact_${Date.now()}_${this.#artifactCounter}`;
const autoArtifact = {
id: artifactId,
title: fileName,
type: 'code',
};
// Emit artifact open callback
this._options.callbacks?.onArtifactOpen?.({ messageId, ...autoArtifact });
// Add artifact element to output
const artifactFactory = this._options.artifactElement ?? createArtifactElement;
output += artifactFactory({ messageId });
// Emit action for file creation
const fileAction = {
type: 'file' as const,
filePath: fileName,
content: codeContent + '\n',
};
this._options.callbacks?.onActionOpen?.({
artifactId,
messageId,
actionId: String(state.actionId++),
action: fileAction,
});
this._options.callbacks?.onActionClose?.({
artifactId,
messageId,
actionId: String(state.actionId - 1),
action: fileAction,
});
// Emit artifact close callback
this._options.callbacks?.onArtifactClose?.({ messageId, ...autoArtifact });
// Move position past the code block
i = codeBlockEnd + 4; // +4 for \n```
continue;
}
}
}
output += input[i];
i++;
}
if (earlyBreak) {
break;
}
}
state.position = i;
return output;
}
reset() {
this.#messages.clear();
}
#parseActionTag(input: string, actionOpenIndex: number, actionEndIndex: number) {
const actionTag = input.slice(actionOpenIndex, actionEndIndex + 1);
const actionType = this.#extractAttribute(actionTag, 'type') as ActionType;
const actionAttributes = {
type: actionType,
content: '',
};
if (actionType === 'supabase') {
const operation = this.#extractAttribute(actionTag, 'operation');
if (!operation || !['migration', 'query'].includes(operation)) {
logger.warn(`Invalid or missing operation for Supabase action: ${operation}`);
throw new Error(`Invalid Supabase operation: ${operation}`);
}
(actionAttributes as SupabaseAction).operation = operation as 'migration' | 'query';
if (operation === 'migration') {
const filePath = this.#extractAttribute(actionTag, 'filePath');
if (!filePath) {
logger.warn('Migration requires a filePath');
throw new Error('Migration requires a filePath');
}
(actionAttributes as SupabaseAction).filePath = filePath;
}
} else if (actionType === 'file') {
const filePath = this.#extractAttribute(actionTag, 'filePath') as string;
if (!filePath) {
logger.debug('File path not specified');
}
(actionAttributes as FileAction).filePath = filePath;
} else if (!['shell', 'start'].includes(actionType)) {
logger.warn(`Unknown action type '${actionType}'`);
}
return actionAttributes as FileAction | ShellAction;
}
#extractAttribute(tag: string, attributeName: string): string | undefined {
const match = tag.match(new RegExp(`${attributeName}="([^"]*)"`, 'i'));
return match ? match[1] : undefined;
}
#getFileExtension(language: string): string {
const languageMap: Record<string, string> = {
javascript: '.js',
js: '.js',
typescript: '.ts',
ts: '.ts',
jsx: '.jsx',
tsx: '.tsx',
python: '.py',
py: '.py',
java: '.java',
c: '.c',
cpp: '.cpp',
'c++': '.cpp',
csharp: '.cs',
'c#': '.cs',
php: '.php',
ruby: '.rb',
rb: '.rb',
go: '.go',
rust: '.rs',
rs: '.rs',
kotlin: '.kt',
kt: '.kt',
swift: '.swift',
html: '.html',
css: '.css',
scss: '.scss',
sass: '.sass',
less: '.less',
xml: '.xml',
json: '.json',
yaml: '.yaml',
yml: '.yml',
toml: '.toml',
markdown: '.md',
md: '.md',
sql: '.sql',
sh: '.sh',
bash: '.sh',
zsh: '.sh',
fish: '.fish',
powershell: '.ps1',
ps1: '.ps1',
dockerfile: '.dockerfile',
docker: '.dockerfile',
makefile: '.makefile',
make: '.makefile',
vim: '.vim',
lua: '.lua',
perl: '.pl',
r: '.r',
matlab: '.m',
julia: '.jl',
scala: '.scala',
clojure: '.clj',
haskell: '.hs',
erlang: '.erl',
elixir: '.ex',
nim: '.nim',
crystal: '.cr',
dart: '.dart',
vue: '.vue',
svelte: '.svelte',
astro: '.astro',
};
const normalized = language.toLowerCase();
return languageMap[normalized] || '.txt';
}
}
const createArtifactElement: ElementFactory = (props) => {
const elementProps = [
'class="__boltArtifact__"',
...Object.entries(props).map(([key, value]) => {
return `data-${camelToDashCase(key)}=${JSON.stringify(value)}`;
}),
];
return `<div ${elementProps.join(' ')}></div>`;
};
function camelToDashCase(input: string) {
return input.replace(/([a-z])([A-Z])/g, '$1-$2').toLowerCase();
}
function createQuickActionElement(props: Record<string, string>, label: string) {
const elementProps = [
'class="__boltQuickAction__"',
'data-bolt-quick-action="true"',
...Object.entries(props).map(([key, value]) => `data-${camelToDashCase(key)}=${JSON.stringify(value)}`),
];
console.log('elementProps', `<button ${elementProps.join(' ')}>${label}</button>`);
return `<button ${elementProps.join(' ')}>${label}</button>`;
}
function createQuickActionGroup(buttons: string[]) {
return `<div class=\"__boltQuickAction__\" data-bolt-quick-action=\"true\">${buttons.join('')}</div>`;
}