"use client"; import { zipArrays } from "../zip"; import { buildOnlyDefaultElements, TokenRenderers } from "./TokenIdentifiers"; export const createElements = (body: string): Token[] => { const tokens = tokenize(body); return buildAbstractSyntaxTree(tokens).map((t) => t.token); }; const tokenize = (body: string) => { const tokenizedBody: TokenMarker[] = []; body = body.replaceAll(/[ \t]+\n/g, "\n").replaceAll(/\n{3,}/g, "\n\n"); const addToken = (thing: TokenMarker) => { tokenizedBody.push(thing); }; const ti = buildOnlyDefaultElements(); for (const [type, token] of ti.entries()) { const rx = new RegExp(token.rx); let match; while ((match = rx.exec(body)) !== null) { const start = match.index; const end = rx.lastIndex; if (token.search) { const found = token.search(body.substring(start), start, end); rx.lastIndex = found.lastIndex; addToken({ start: found.start, end: found.end, type, token: token.parse(found.text), }); continue; } addToken({ start, end, type, token: token.parse(match[0]), }); } } return tokenizedBody; }; function buildAbstractSyntaxTree(markers: TokenMarker[]) { markers.sort((a, b) => a.start - b.start); markers = filterOverlappingPBlocks(markers); establishClosestParent(markers); for (const marker of markers) { if (marker.parent) { marker.parent.token.children = marker.parent.token.children || []; marker.parent.token.children.push(marker.token); } } // By starting at the end, we can always assure that we are not filtering out children that haven't been processed yet for (const marker of [...markers].reverse()) { contentToChildren(marker.token); } return markers.filter((m) => !m.parent); // return markers; } function establishClosestParent(blocks: TokenMarker[]): void { blocks.sort((a, b) => a.start - b.start); // Sort blocks by start position for (let i = 0; i < blocks.length; i++) { const block = blocks[i]; if (block.parent) continue; // Skip blocks that already have a parent let closestParent: TokenMarker | undefined = undefined; let minDistance = Number.MAX_SAFE_INTEGER; // Find the closest parent block for each block for (let j = 0; j < i; j++) { const otherBlock = blocks[j]; if (otherBlock.end >= block.start && otherBlock.start <= block.start) { const distance = block.start - otherBlock.start; if ( distance < minDistance && isAcceptableChild(otherBlock.type, block.type) ) { minDistance = distance; closestParent = otherBlock; } } } if (closestParent) { block.parent = closestParent; // Assign the closest parent block } } } type ParentChildMap = { [parentType: string]: string[]; // Map parent types to an array of acceptable child types }; const parentChildMap: ParentChildMap = { list: ["list-item"], // Add more mappings as needed... }; function isAcceptableChild(parentType: string, childType: string): boolean { const acceptableChildren = parentChildMap[parentType]; return acceptableChildren ? acceptableChildren.includes(childType) : true; } // Occasionally, some P blocks start exactly at the same point as another block (a side effect of needing to exclude preceding line-breaks from the regex while also having the only clear delineation being those line-breaks) so we just remove those P blocks so that when searching for a parent, it doesn't need to figure out if the P block is valid or not. This doesn't cause issues during rendering since each block handles its own container element function filterOverlappingPBlocks(blocks: TokenMarker[]): TokenMarker[] { return blocks.filter((block) => { if (block.type !== "p") { return true; } for (const otherBlock of blocks) { if ( otherBlock !== block && (otherBlock.start === block.start || (otherBlock.end === block.end && otherBlock.start < block.start)) ) { return false; } } return true; }); } const contentToChildren = (token: Token) => { let content = token.content; if (!content) return; const wasSpecialCase = handleSpecial(token); if (wasSpecialCase) return; const splitMarker = "{{^^}}"; for (const child of token.children || []) { content = content.replace(child.raw, splitMarker); } token.children = zipArrays( content.split(splitMarker).map( (c): Token => ({ content: c.replaceAll("\n", " "), metadata: {}, raw: c, type: token.rendersChildrenOnly ? "p" : "text", uuid: crypto.randomUUID(), rendersContentOnly: token.rendersChildrenOnly ? false : true, render: TokenRenderers.get(token.rendersChildrenOnly ? "p" : "text")!, children: token.rendersChildrenOnly && c.replaceAll("\n", "") ? [ { content: c.replaceAll("\n", " "), metadata: {}, raw: c, type: "text", uuid: crypto.randomUUID(), render: TokenRenderers.get("text")!, rendersContentOnly: true, }, ] : undefined, }) ), token.children || [] ).filter((c) => c.children?.length || (c.rendersContentOnly && c.content)); }; function handleSpecial(token: Token) { switch (token.type) { case "list": { const chunks = splitByDepth(token.children!); const items = processChunks(chunks); token.children = items.flat(); return token.children; } // case "grid": // return token.children; default: return; } } function splitByDepth(items: Token[]) { const chunks: Token[][] = []; let currentDepth = -1; let chunk: Token[] = []; if (!items) return chunks; for (const item of items) { const depth = Number(item.metadata.initialDepth); if (depth === currentDepth) { chunk.push(item); } else { if (chunk.length > 0) { chunks.push(chunk); } chunk = [item]; currentDepth = depth; } } if (chunk.length > 0) { chunks.push(chunk); } return chunks; } function processChunks(chunks: Token[][]) { const mergedChunks: Token[][] = []; for (let i = 1; i < chunks.length; i++) { const currentChunk = chunks[i]; let j = i - 1; // Find the first chunk with a lower depth while (j >= 0) { const prevChunk = chunks[j]; const prevDepth = Number(prevChunk[0].metadata.initialDepth); if (prevDepth < Number(currentChunk[0].metadata.initialDepth)) { // Append the current chunk to the children of the found chunk const lastPrev = prevChunk[prevChunk.length - 1]; lastPrev.children = lastPrev.children || []; lastPrev.children.push({ type: "list", content: "", raw: "", metadata: { initialDepth: currentChunk[0].metadata.initialDepth }, uuid: crypto.randomUUID(), children: currentChunk, render: TokenRenderers.get("list")!, }); mergedChunks.push(currentChunk); break; } j--; } } // Filter out chunks that were merged into others return chunks.filter((c) => !mergedChunks.find((c2) => c === c2)); } /** * @description Extracts the frontmatter of a markdown document and returns it as an object and with the body with the frontmatter removed from it * * @returns a tuple containing the body and the frontmatter object */ export function extractFrontMatter(body: string): [FrontMatter, string] { const rx = /^---([\s\S]*?)---/; const [_, frontmatterString] = body.match(rx) || ["", ""]; body = body.replace(rx, ""); const frontMatter: FrontMatter = {}; for (const line of frontmatterString.split("\n")) { if (!line) continue; const [key, value] = line.split(": "); frontMatter[key] = value; } return [frontMatter, body]; }