"use client"; import { zipArrays } from "../zip"; import { TokenIdentifiers } from "./TokenIdentifiers"; export const createElements = (body: string): [Token[], number] => { const tabOptions = [ /^\s{2}(?!\s|\t)/m, /^\s{4}(?!\s|\t)/m, /^\t(?!\s|\t)]/m, ]; let tabSpacing = 0; for (const [i, tabOption] of tabOptions.entries()) { if (body.match(tabOption)) { tabSpacing = i; break; } } const tokens = tokenize(body); return [buildAbstractSyntaxTree(tokens, body), tabSpacing]; }; const tokenize = (body: string) => { const tokenizedBody: tokenMarker[] = []; const addToken = (thing: tokenMarker) => { tokenizedBody.push(thing); }; for (const [type, token] of TokenIdentifiers.entries()) { const rx = new RegExp(token.rx); let match; while ((match = rx.exec(body)) !== null) { const start = match.index; const end = rx.lastIndex; if (type !== "p" || !tokenizedBody.find((i) => i.start === start)) { addToken({ start, end, type, }); } } } return tokenizedBody; }; export const buildAbstractSyntaxTree = ( markers: tokenMarker[], body: string, ): Token[] => { ensureNoOrphans(markers); markers.sort((a, b) => { if (a.start === b.start) { console.log(a, b); if (a.type === "p") return -1; if (b.type === "p") return 1; } // if (a.type === "p" && a.start === b.start) return -1; // if (b.type === "p" && a.start === b.start) return 1; return a.start - b.start; }); for (const marker of markers) { marker.token = TokenIdentifiers.get(marker.type)?.parse( body.substring(marker.start, marker.end), ); // if (marker.type === "p" && marker.parent && marker.parent?.type !== "p") { // marker.parent = undefined; // continue; // } if (!marker.token) { throw new Error("Failed to parse token. Token type not found?"); } if (!marker.parent) continue; if (!marker.parent.token) { // debugger; throw new Error("Failed to parse token. Child tokenized before parent"); } marker.parent.token.children = marker.parent.token.children || []; marker.parent.token.children.push(marker.token); // marker.token.parent = marker.parent.token; } const tokens = markers.filter((m) => markers.filter((a) => a !== m && (a.end === m.end || a.start === m.start)) .length || m.type !== "p" ).map((t) => t.token!); for (const token of tokens) { contentToChildren(token); } return tokens.filter((t) => !t.parent); }; const ensureNoOrphansOld = (tokens: tokenMarker[]) => { for (const token of tokens) { const parentPs = tokens.filter((t) => ( t.type === "p" && ( // any p that fully encapsulates the token (t.start <= token.start && t.end >= token.end) || // any p that contains the start of the token (t.start <= token.start && t.end >= token.start) || // any p that contains the end of the token (t.start <= token.end && t.end >= token.end) ) )).sort((a, b) => (a.start - b.start)); if (parentPs.length > 1) { parentPs[0].end = parentPs.at(-1)!.end; const remainingParents = parentPs.slice(1); for (const token of tokens) { if (token.parent && remainingParents.includes(token.parent)) { token.parent = parentPs[0]; } } if (parentPs[0] && parentPs[0].end < token.end) { parentPs[0].end = token.end; } tokens = tokens.filter((t) => !remainingParents.includes(t)); } const potentialParents = tokens.filter((t) => (t.start < token.start && t.end > token.end) || (t.type === "p" && t.start <= token.start && t.end >= token.end && t !== token) ).sort((a, b) => { if (token.start - a.start < token.start - b.start) return -1; return 1; }); token.parent = potentialParents.find((p) => p.type !== "p") ?? potentialParents[0]; if (token.type === "grid") { debugger; } } }; const ensureNoOrphans = (tokens: tokenMarker[]) => { ensureNoOrphansOld(tokens); }; const contentToChildren = (token: Token) => { const children: Token[] = []; let part, content = token.content; // for (const child of token.children || []) { // if (!content) continue; // [part, content] = content.split(child.raw); // part && children.push({ // content: part.trim(), // metadata: {}, // raw: part, // type: "text", // uuid: crypto.randomUUID(), // }); // children.push(child); // } // if (content) { // children.push({ // content: content.trim(), // metadata: {}, // raw: content, // type: "text", // uuid: crypto.randomUUID(), // }); // } const splitMarker = "{{^^}}"; for (const child of token.children || []) { content = content.replace(child.raw, splitMarker); } token.children = zipArrays( content.split(splitMarker).map((c): Token => ({ content: c.trim(), metadata: {}, raw: c, type: "text", uuid: crypto.randomUUID(), rendersContentOnly: true, })), token.children || [], ).filter((c) => c.children?.length || (c.rendersContentOnly && c.content)); }; // const tokenize = (body: string) => { // body = body.replace(/\n?\n?/gs, ""); // const paragraphs = body.split("\n\n"); // const blockTokens: BlockToken[] = []; // const paragraphTokens: ParagraphToken[] = []; // for (const paragraph of paragraphs) { // const block = tokenizeBlock(paragraph); // let openBT = blockTokens.findLast((bt) => !bt.closed); // if (block) { // if (typeof block === "string") { // if (openBT) { // openBT.closed = true; // } // continue; // } // if (openBT) { // openBT.children.push(block); // block.parent = openBT.type; // } // blockTokens.push(block); // continue; // } // if (!openBT) { // openBT = { // children: [], // closed: false, // metadata: {}, // type: "block", // uuid: crypto.randomUUID(), // }; // blockTokens.push(openBT); // } // const multiline = tokenizeParagraph(paragraph); // let openP = paragraphTokens.findLast((p) => !p.closed); // if (multiline) { // if (Array.isArray(multiline)) { // if (openP) { // openP.closed = true; // openP.content = openP.content.concat(multiline); // } // continue; // } // openBT.children.push(multiline); // paragraphTokens.push(multiline); // continue; // } else if (openP && !openP?.allowsInline) { // openP.content.push({ // line: paragraph, // raw: paragraph, // type: "text", // uuid: crypto.randomUUID(), // }); // } // // I don't think the closed check is necessary, but just in case // // if (openP && !openP.closed && !openP.allowsInline) continue; // if (!openP) { // openP = { // allowsInline: true, // closed: true, // content: [], // metadata: {}, // type: "p", // uuid: crypto.randomUUID(), // }; // openBT.children.push(openP); // paragraphTokens.push(openP); // } // const lines = paragraph.split("\n"); // let previous; // for (const line of lines) { // const singleLine = tokenizeLine(line, previous); // if (singleLine) { // if (singleLine !== previous) { // openP.content.push(singleLine); // } // previous = singleLine; // } // } // } // return blockTokens.filter((b) => !b.parent); // };