import { IdentifiedToken, Token, TokenAttributes, TokenRenderer, } from "@/types"; type SearchFunction = (s: string, start: number, end: number) => { start: number; end: number; text: string; lastIndex: number; }; type TokenIdentifier = { rx: RegExp; parse: (s: string) => Token; search?: SearchFunction; }; type TokenIdentifierMap = Map< string, TokenIdentifier >; export const TokenIdentifiers = new Map< string, TokenIdentifier >(); type IdentifierRegistration = ( type: string, match: RegExp, parseFunction: (s: string, rx: RegExp) => IdentifiedToken, renderFunction: TokenRenderer, openTagRx?: RegExp, closeTagRx?: RegExp, ) => void; export function buildIdentifierMap(): [ TokenIdentifierMap, IdentifierRegistration, ] { const TokenIdentifiers = new Map< string, TokenIdentifier >(); function registerIdentifier( type: string, match: RegExp, parseFunction: (s: string, rx: RegExp) => IdentifiedToken, renderFunction: TokenRenderer, ): void; function registerIdentifier( type: string, match: RegExp, parseFunction: (s: string, rx: RegExp) => IdentifiedToken, renderFunction: TokenRenderer, openTagRx: RegExp, closeTagRx: RegExp, ): void; function registerIdentifier( type: string, match: RegExp, parseFunction: (s: string, rx: RegExp) => IdentifiedToken, renderFunction: TokenRenderer, openTagRx?: RegExp, closeTagRx?: RegExp, ) { TokenIdentifiers.set(type, { rx: match, parse(s) { const identifiedToken = parseFunction(s, this.rx); const token: TokenAttributes = { render: renderFunction, type, }; return { ...token, ...identifiedToken }; }, search: (openTagRx && closeTagRx) ? (s, start, end) => { return search( s, start, end, new RegExp(openTagRx, "g"), new RegExp(closeTagRx, "g"), ); } : undefined, }); } return [TokenIdentifiers, registerIdentifier]; } export const buildOnlyDefaultElements = () => { const [TokenIdentifiers, registerIdentifier] = buildIdentifierMap(); const rendersContentOnly = true; const rendersChildrenOnly = true; // grid registerIdentifier( "grid", /(? { const rx = /((?:\[\])+)\n+([\s\S]*)\n+\/\[\]/; const [_, columns, content] = s.match(rx) || ["", "..", "Unable to parse grid"]; return { content, raw: s, metadata: { columns: (columns.length / 2).toString(), }, uuid: crypto.randomUUID(), rendersChildrenOnly, }; }, (t) => { return <>{t.raw}; }, /(? { const rx = /\[{2}(!?)\s*?\n+([\s\S]*)\n+\]{2}/; const match = s.match(rx); if (!match) debugger; const [_, isBlock, content] = match || ["", "", s]; return { content: content.trim(), raw: s, metadata: { isBlock, }, uuid: crypto.randomUUID(), rendersChildrenOnly, }; }, (t) => { return <>{t.raw}; }, /\[\[/g, /\]\]/g, ); // fenced code block registerIdentifier("code", /`{3}\n+((?:.|\n)*?)\n+`{3}/g, (s, rx) => { return { content: s.match(new RegExp(rx, ""))?.at(1) || "Unable to parse code", raw: s, metadata: {}, uuid: crypto.randomUUID(), rendersContentOnly, }; }, (t) => { return <>{t.raw}; }); // list registerIdentifier( "list", /^\s*-\s([\s\S]*?)\n\n/gm, (s, rx) => { return { content: s.match(new RegExp(rx, ""))?.at(1) || "Unable to parse list", raw: s, metadata: { initialDepth: s.replace("\n", "").split("-").at(0)?.length.toString() || "1", }, uuid: crypto.randomUUID(), rendersChildrenOnly, }; }, (t) => { return <>{t.raw}; }, ); // list-item registerIdentifier( "list-item", /^\s*-\s(.*?)$/gm, (s, rx) => { return { content: s.match(new RegExp(rx, ""))?.at(1) || "Unable to parse list-item", raw: s, metadata: { initialDepth: s.replace("\n", "").split("-").at(0)?.length.toString() || "1", }, uuid: crypto.randomUUID(), }; }, (t) => { return <>{t.raw}; }, ); // heading registerIdentifier("heading", /^#+\s(.*?)$/gm, (s, rx) => { return { content: s.match(new RegExp(rx, ""))?.at(1) || "Unable to parse heading", raw: s, metadata: { strength: s.match(/#/g)?.length.toString() || "1", }, uuid: crypto.randomUUID(), rendersContentOnly, }; }, (t) => { return <>{t.raw}; }); // image registerIdentifier("image", /\!\[(.*?)\]\((.*?)\)/g, (s, rx) => { const [_, title, src] = s.match(new RegExp(rx, ""))!; return { // content: inline, content: title.trim(), raw: s, metadata: { src, }, uuid: crypto.randomUUID(), rendersContentOnly, }; }, (t) => { return <>{t.raw}; }); // anchor registerIdentifier("anchor", /(? { let preset, [_, title, href] = s.match(new RegExp(rx, ""))!; const match = title.match(/`{3}(cta|button)?(.*)/); if (match) { [_, preset, title] = match; } const classes = { button: "btn-primary inline-block", cta: "btn-secondary inline-block uppercase", }; return { content: title.trim(), raw: s, metadata: { href, classes: classes[preset as keyof typeof classes], }, uuid: crypto.randomUUID(), rendersContentOnly, }; }, (t) => { return <>{t.raw}; }); // inline-code registerIdentifier( "inline-code", /(?<=\s|^)`(.*?)`(?=[\s,.!?)]|$)/gi, (s, rx) => { return { content: s.match(new RegExp(rx, "i"))?.at(1) || "Unable to parse inline-code", raw: s, metadata: {}, uuid: crypto.randomUUID(), rendersContentOnly, }; }, (t) => { return <>{t.raw}; }, ); // bold registerIdentifier("bold", /\*{2}(.*?)\*{2}/g, (s, rx) => { return { content: s.match(new RegExp(rx, "i"))?.at(1) || "Unable to parse bold", raw: s, metadata: {}, uuid: crypto.randomUUID(), rendersContentOnly, }; }, (t) => { return <>{t.raw}; }); // italic registerIdentifier("italic", /(? { return { content: s.match(new RegExp(rx, "i"))?.at(1) || "Unable to parse italic", raw: s, metadata: {}, uuid: crypto.randomUUID(), rendersContentOnly, }; }, (t) => { return <>{t.raw}; }); // popover registerIdentifier("popover", /\^\[(.*?)\]\<<(.*?)\>>/g, (s, rx) => { const [_, title, content] = s.match(new RegExp(rx, ""))!; return { content, raw: s, metadata: { title }, uuid: crypto.randomUUID(), rendersContentOnly, }; }, (t) => { return <>{t.raw}; }); registerIdentifier( "accordion", /\[accordion(\s.*?)?]\n+((?:.|\n)*?)\n+\[\/accordion\]/g, (s, rx) => { const [_, title, content] = s.match(new RegExp(rx, ""))!; return { content, raw: s, metadata: { title }, uuid: crypto.randomUUID(), }; }, (t) => { return <>{t.raw}; }, ); registerIdentifier("p", /(?<=\n\n)([\s\S]*?)(?=\n\n)/g, (s, rx) => { return { content: s, raw: s, metadata: {}, uuid: crypto.randomUUID(), }; }, (t) => { return <>{t.raw}; }); registerIdentifier("hr", /^-{3,}$/gm, (s, rx) => { return { content: s, raw: s, metadata: {}, uuid: crypto.randomUUID(), rendersContentOnly, }; }, (t) => { return <>{t.raw}; }); registerIdentifier("comment", //g, (s, rx) => { return { content: "", metadata: { comment: s }, raw: "", uuid: crypto.randomUUID(), rendersContentOnly, }; }, (t) => { return <>{t.raw}; }); registerIdentifier("frontmatter", /^---([\s\S]*?)---/g, (s, rx) => { return { content: "", metadata: { frontmatterString: s.match(rx)?.at(0) || "", }, raw: "", uuid: "frontmatter", }; }, (t) => { return <>{t.raw}; }); registerIdentifier("table", /^\|\s[\s\S]*?\|(?=(\n\n)|$)/g, (s, rx) => { const rowSections = s.split(/-/gm).map((s) => s.split("\n").map((r) => r.split(/\s?\|\s?/g)) ); let headerRows: string[][] = [], bodyRows: string[][] = [], footerRows: string[][] = []; switch (rowSections.length) { case 1: bodyRows = rowSections[0]; break; case 2: headerRows = rowSections[0]; bodyRows = rowSections[1]; break; case 3: headerRows = rowSections[0]; bodyRows = rowSections[1]; footerRows = rowSections[3]; break; } const maxColumns = Math.max( ...[...headerRows, ...bodyRows, ...footerRows].map((r) => r.length), ); return { content: s, raw: s, metadata: { headerRows: headerRows.join(" | "), bodyRows: bodyRows.join(" | "), footerRows: footerRows.join(" | "), columns: maxColumns.toString(), }, uuid: crypto.randomUUID(), }; }, (t) => { return <>{t.raw}; }); return TokenIdentifiers; }; function findMatchingClosedParenthesis( str: string, openRegex: RegExp, closedRegex: RegExp, ): number | null { let openings = 0; let closings = 0; openRegex = new RegExp(openRegex, "g"); closedRegex = new RegExp(closedRegex, "g"); let lastOpeningSuccessIndex = 0; let lastClosingSuccessIndex = 0; do { const openingMatch = openRegex.exec(str); const closingMatch = closedRegex.exec(str); if ((openingMatch && !closingMatch)) { throw Error("Things have gone horribly wrong"); } // if ((!openingMatch && closingMatch) || (!openingMatch && !closingMatch)) break; if ( openingMatch && closingMatch && openingMatch.index < closingMatch.index ) { openings++; lastOpeningSuccessIndex = openingMatch.index + openingMatch[0].length; closedRegex.lastIndex = lastClosingSuccessIndex; } else if ( (!openingMatch && closingMatch) || (openingMatch && closingMatch && openingMatch.index > closingMatch.index) ) { closings++; lastClosingSuccessIndex = closingMatch.index + closingMatch[0].length; openRegex.lastIndex = lastOpeningSuccessIndex; } else { return closingMatch?.index ?? null; } } while (openings > closings); return closedRegex.lastIndex; } interface SearchResult { start: number; end: number; text: string; lastIndex: number; } function search( s: string, start: number, end: number, openRx: RegExp, closeRx: RegExp, ): SearchResult { const oldEnd = end; const newEnd = findMatchingClosedParenthesis( s, // s.substring(0, end - start), openRx, closeRx, ); if (newEnd === null) throw Error("There was an issue finding a closing tag"); end = newEnd + start; return { start, end, text: s.substring(0, newEnd), lastIndex: oldEnd === end ? end : start + s.match(openRx)![0].length, }; }