From 447f9f1dc1bdc774fe76031512b2acbc87b893f9 Mon Sep 17 00:00:00 2001 From: Emma Date: Thu, 14 Mar 2024 17:47:57 -0600 Subject: [PATCH] ttcMD: Abstracting identifier registration --- lib/tcmd/TokenIdentifiers.tsx | 530 ++++++++++++++++++++++++++++++++++ 1 file changed, 530 insertions(+) create mode 100644 lib/tcmd/TokenIdentifiers.tsx diff --git a/lib/tcmd/TokenIdentifiers.tsx b/lib/tcmd/TokenIdentifiers.tsx new file mode 100644 index 0000000..339b325 --- /dev/null +++ b/lib/tcmd/TokenIdentifiers.tsx @@ -0,0 +1,530 @@ +import { + IdentifiedToken, + Token, + TokenAttributes, + TokenRenderer, +} from "@/types"; + +type SearchFunction = (s: string, start: number, end: number) => { + start: number; + end: number; + text: string; + lastIndex: number; +}; + +type TokenIdentifier = { + rx: RegExp; + parse: (s: string) => Token; + search?: SearchFunction; +}; + +type TokenIdentifierMap = Map< + string, + TokenIdentifier +>; + +export const TokenIdentifiers = new Map< + string, + TokenIdentifier +>(); + +type IdentifierRegistration = ( + type: string, + match: RegExp, + parseFunction: (s: string, rx: RegExp) => IdentifiedToken, + renderFunction: TokenRenderer, + openTagRx?: RegExp, + closeTagRx?: RegExp, +) => void; + +export function buildIdentifierMap(): [ + TokenIdentifierMap, + IdentifierRegistration, +] { + const TokenIdentifiers = new Map< + string, + TokenIdentifier + >(); + + function registerIdentifier( + type: string, + match: RegExp, + parseFunction: (s: string, rx: RegExp) => IdentifiedToken, + renderFunction: TokenRenderer, + ): void; + function registerIdentifier( + type: string, + match: RegExp, + parseFunction: (s: string, rx: RegExp) => IdentifiedToken, + renderFunction: TokenRenderer, + openTagRx: RegExp, + closeTagRx: RegExp, + ): void; + function registerIdentifier( + type: string, + match: RegExp, + parseFunction: (s: string, rx: RegExp) => IdentifiedToken, + renderFunction: TokenRenderer, + openTagRx?: RegExp, + closeTagRx?: RegExp, + ) { + TokenIdentifiers.set(type, { + rx: match, + parse(s) { + const identifiedToken = parseFunction(s, this.rx); + const token: TokenAttributes = { + render: renderFunction, + type, + }; + + return { ...token, ...identifiedToken }; + }, + search: (openTagRx && closeTagRx) + ? (s, start, end) => { + return search( + s, + start, + end, + new RegExp(openTagRx, "g"), + new RegExp(closeTagRx, "g"), + ); + } + : undefined, + }); + } + + return [TokenIdentifiers, registerIdentifier]; +} + +export const buildOnlyDefaultElements = () => { + const [TokenIdentifiers, registerIdentifier] = buildIdentifierMap(); + + const rendersContentOnly = true; + const rendersChildrenOnly = true; + + // grid + registerIdentifier( + "grid", + /(? { + const rx = /((?:\[\])+)\n+([\s\S]*)\n+\/\[\]/; + const [_, columns, content] = s.match(rx) || + ["", "..", "Unable to parse grid"]; + return { + content, + raw: s, + metadata: { + columns: (columns.length / 2).toString(), + }, + uuid: crypto.randomUUID(), + rendersChildrenOnly, + }; + }, + (t) => { + return <>{t.raw}; + }, + /(? { + const rx = /\[{2}(!?)\s*?\n+([\s\S]*)\n+\]{2}/; + const match = s.match(rx); + if (!match) debugger; + const [_, isBlock, content] = match || + ["", "", s]; + + return { + content: content.trim(), + raw: s, + metadata: { + isBlock, + }, + uuid: crypto.randomUUID(), + rendersChildrenOnly, + }; + }, + (t) => { + return <>{t.raw}; + }, + /\[\[/g, + /\]\]/g, + ); + + // fenced code block + registerIdentifier("code", /`{3}\n+((?:.|\n)*?)\n+`{3}/g, (s, rx) => { + return { + content: s.match(new RegExp(rx, ""))?.at(1) || + "Unable to parse code", + raw: s, + metadata: {}, + uuid: crypto.randomUUID(), + rendersContentOnly, + }; + }, (t) => { + return <>{t.raw}; + }); + + // list + registerIdentifier( + "list", + /^\s*-\s([\s\S]*?)\n\n/gm, + (s, rx) => { + return { + content: s.match(new RegExp(rx, ""))?.at(1) || + "Unable to parse list", + raw: s, + metadata: { + initialDepth: + s.replace("\n", "").split("-").at(0)?.length.toString() || + "1", + }, + uuid: crypto.randomUUID(), + rendersChildrenOnly, + }; + }, + (t) => { + return <>{t.raw}; + }, + ); + + // list-item + registerIdentifier( + "list-item", + /^\s*-\s(.*?)$/gm, + (s, rx) => { + return { + content: s.match(new RegExp(rx, ""))?.at(1) || + "Unable to parse list-item", + raw: s, + metadata: { + initialDepth: + s.replace("\n", "").split("-").at(0)?.length.toString() || + "1", + }, + uuid: crypto.randomUUID(), + }; + }, + (t) => { + return <>{t.raw}; + }, + ); + + // heading + registerIdentifier("heading", /^#+\s(.*?)$/gm, (s, rx) => { + return { + content: s.match(new RegExp(rx, ""))?.at(1) || + "Unable to parse heading", + raw: s, + metadata: { + strength: s.match(/#/g)?.length.toString() || "1", + }, + uuid: crypto.randomUUID(), + rendersContentOnly, + }; + }, (t) => { + return <>{t.raw}; + }); + + // image + registerIdentifier("image", /\!\[(.*?)\]\((.*?)\)/g, (s, rx) => { + const [_, title, src] = s.match(new RegExp(rx, ""))!; + + return { + // content: inline, + content: title.trim(), + raw: s, + metadata: { + src, + }, + uuid: crypto.randomUUID(), + rendersContentOnly, + }; + }, (t) => { + return <>{t.raw}; + }); + + // anchor + registerIdentifier("anchor", /(? { + let preset, [_, title, href] = s.match(new RegExp(rx, ""))!; + const match = title.match(/`{3}(cta|button)?(.*)/); + + if (match) { + [_, preset, title] = match; + } + + const classes = { + button: "btn-primary inline-block", + cta: "btn-secondary inline-block uppercase", + }; + return { + content: title.trim(), + raw: s, + metadata: { + href, + classes: classes[preset as keyof typeof classes], + }, + uuid: crypto.randomUUID(), + rendersContentOnly, + }; + }, (t) => { + return <>{t.raw}; + }); + + // inline-code + registerIdentifier( + "inline-code", + /(?<=\s|^)`(.*?)`(?=[\s,.!?)]|$)/gi, + (s, rx) => { + return { + content: s.match(new RegExp(rx, "i"))?.at(1) || + "Unable to parse inline-code", + raw: s, + metadata: {}, + uuid: crypto.randomUUID(), + rendersContentOnly, + }; + }, + (t) => { + return <>{t.raw}; + }, + ); + + // bold + registerIdentifier("bold", /\*{2}(.*?)\*{2}/g, (s, rx) => { + return { + content: s.match(new RegExp(rx, "i"))?.at(1) || + "Unable to parse bold", + raw: s, + metadata: {}, + uuid: crypto.randomUUID(), + rendersContentOnly, + }; + }, (t) => { + return <>{t.raw}; + }); + + // italic + registerIdentifier("italic", /(? { + return { + content: s.match(new RegExp(rx, "i"))?.at(1) || + "Unable to parse italic", + raw: s, + metadata: {}, + uuid: crypto.randomUUID(), + rendersContentOnly, + }; + }, (t) => { + return <>{t.raw}; + }); + + // popover + registerIdentifier("popover", /\^\[(.*?)\]\<<(.*?)\>>/g, (s, rx) => { + const [_, title, content] = s.match(new RegExp(rx, ""))!; + + return { + content, + raw: s, + metadata: { title }, + uuid: crypto.randomUUID(), + rendersContentOnly, + }; + }, (t) => { + return <>{t.raw}; + }); + + registerIdentifier( + "accordion", + /\[accordion(\s.*?)?]\n+((?:.|\n)*?)\n+\[\/accordion\]/g, + (s, rx) => { + const [_, title, content] = s.match(new RegExp(rx, ""))!; + + return { + content, + raw: s, + metadata: { title }, + uuid: crypto.randomUUID(), + }; + }, + (t) => { + return <>{t.raw}; + }, + ); + + registerIdentifier("p", /(?<=\n\n)([\s\S]*?)(?=\n\n)/g, (s, rx) => { + return { + content: s, + raw: s, + metadata: {}, + uuid: crypto.randomUUID(), + }; + }, (t) => { + return <>{t.raw}; + }); + + registerIdentifier("hr", /^-{3,}$/gm, (s, rx) => { + return { + content: s, + raw: s, + metadata: {}, + uuid: crypto.randomUUID(), + rendersContentOnly, + }; + }, (t) => { + return <>{t.raw}; + }); + + registerIdentifier("comment", //g, (s, rx) => { + return { + content: "", + metadata: { comment: s }, + raw: "", + uuid: crypto.randomUUID(), + rendersContentOnly, + }; + }, (t) => { + return <>{t.raw}; + }); + + registerIdentifier("frontmatter", /^---([\s\S]*?)---/g, (s, rx) => { + return { + content: "", + metadata: { + frontmatterString: s.match(rx)?.at(0) || "", + }, + raw: "", + uuid: "frontmatter", + }; + }, (t) => { + return <>{t.raw}; + }); + + registerIdentifier("table", /^\|\s[\s\S]*?\|(?=(\n\n)|$)/g, (s, rx) => { + const rowSections = s.split(/-/gm).map((s) => + s.split("\n").map((r) => r.split(/\s?\|\s?/g)) + ); + + let headerRows: string[][] = [], + bodyRows: string[][] = [], + footerRows: string[][] = []; + + switch (rowSections.length) { + case 1: + bodyRows = rowSections[0]; + break; + case 2: + headerRows = rowSections[0]; + bodyRows = rowSections[1]; + break; + case 3: + headerRows = rowSections[0]; + bodyRows = rowSections[1]; + footerRows = rowSections[3]; + break; + } + + const maxColumns = Math.max( + ...[...headerRows, ...bodyRows, ...footerRows].map((r) => r.length), + ); + + return { + content: s, + raw: s, + metadata: { + headerRows: headerRows.join(" | "), + bodyRows: bodyRows.join(" | "), + footerRows: footerRows.join(" | "), + columns: maxColumns.toString(), + }, + uuid: crypto.randomUUID(), + }; + }, (t) => { + return <>{t.raw}; + }); + + return TokenIdentifiers; +}; + +function findMatchingClosedParenthesis( + str: string, + openRegex: RegExp, + closedRegex: RegExp, +): number | null { + let openings = 0; + let closings = 0; + + openRegex = new RegExp(openRegex, "g"); + closedRegex = new RegExp(closedRegex, "g"); + + let lastOpeningSuccessIndex = 0; + let lastClosingSuccessIndex = 0; + + do { + const openingMatch = openRegex.exec(str); + const closingMatch = closedRegex.exec(str); + + if ((openingMatch && !closingMatch)) { + throw Error("Things have gone horribly wrong"); + } + + // if ((!openingMatch && closingMatch) || (!openingMatch && !closingMatch)) break; + + if ( + openingMatch && closingMatch && openingMatch.index < closingMatch.index + ) { + openings++; + lastOpeningSuccessIndex = openingMatch.index + openingMatch[0].length; + closedRegex.lastIndex = lastClosingSuccessIndex; + } else if ( + (!openingMatch && closingMatch) || + (openingMatch && closingMatch && openingMatch.index > closingMatch.index) + ) { + closings++; + lastClosingSuccessIndex = closingMatch.index + closingMatch[0].length; + openRegex.lastIndex = lastOpeningSuccessIndex; + } else { + return closingMatch?.index ?? null; + } + } while (openings > closings); + + return closedRegex.lastIndex; +} + +interface SearchResult { + start: number; + end: number; + text: string; + lastIndex: number; +} + +function search( + s: string, + start: number, + end: number, + openRx: RegExp, + closeRx: RegExp, +): SearchResult { + const oldEnd = end; + + const newEnd = findMatchingClosedParenthesis( + s, + // s.substring(0, end - start), + openRx, + closeRx, + ); + + if (newEnd === null) throw Error("There was an issue finding a closing tag"); + + end = newEnd + start; + + return { + start, + end, + text: s.substring(0, newEnd), + lastIndex: oldEnd === end ? end : start + s.match(openRx)![0].length, + }; +}