ttcMD: Abstracting identifier registration

This commit is contained in:
Emmaline Autumn 2024-03-14 17:47:57 -06:00
parent 82a175c33a
commit 447f9f1dc1

View File

@ -0,0 +1,530 @@
import {
IdentifiedToken,
Token,
TokenAttributes,
TokenRenderer,
} from "@/types";
type SearchFunction = (s: string, start: number, end: number) => {
start: number;
end: number;
text: string;
lastIndex: number;
};
type TokenIdentifier = {
rx: RegExp;
parse: (s: string) => Token;
search?: SearchFunction;
};
type TokenIdentifierMap = Map<
string,
TokenIdentifier
>;
export const TokenIdentifiers = new Map<
string,
TokenIdentifier
>();
type IdentifierRegistration = (
type: string,
match: RegExp,
parseFunction: (s: string, rx: RegExp) => IdentifiedToken,
renderFunction: TokenRenderer,
openTagRx?: RegExp,
closeTagRx?: RegExp,
) => void;
export function buildIdentifierMap(): [
TokenIdentifierMap,
IdentifierRegistration,
] {
const TokenIdentifiers = new Map<
string,
TokenIdentifier
>();
function registerIdentifier(
type: string,
match: RegExp,
parseFunction: (s: string, rx: RegExp) => IdentifiedToken,
renderFunction: TokenRenderer,
): void;
function registerIdentifier(
type: string,
match: RegExp,
parseFunction: (s: string, rx: RegExp) => IdentifiedToken,
renderFunction: TokenRenderer,
openTagRx: RegExp,
closeTagRx: RegExp,
): void;
function registerIdentifier(
type: string,
match: RegExp,
parseFunction: (s: string, rx: RegExp) => IdentifiedToken,
renderFunction: TokenRenderer,
openTagRx?: RegExp,
closeTagRx?: RegExp,
) {
TokenIdentifiers.set(type, {
rx: match,
parse(s) {
const identifiedToken = parseFunction(s, this.rx);
const token: TokenAttributes = {
render: renderFunction,
type,
};
return { ...token, ...identifiedToken };
},
search: (openTagRx && closeTagRx)
? (s, start, end) => {
return search(
s,
start,
end,
new RegExp(openTagRx, "g"),
new RegExp(closeTagRx, "g"),
);
}
: undefined,
});
}
return [TokenIdentifiers, registerIdentifier];
}
export const buildOnlyDefaultElements = () => {
const [TokenIdentifiers, registerIdentifier] = buildIdentifierMap();
const rendersContentOnly = true;
const rendersChildrenOnly = true;
// grid
registerIdentifier(
"grid",
/(?<!\/)(?:\[\])+\n+((?:.|\n)*?)\n+\/\[\]/g,
(s) => {
const rx = /((?:\[\])+)\n+([\s\S]*)\n+\/\[\]/;
const [_, columns, content] = s.match(rx) ||
["", "..", "Unable to parse grid"];
return {
content,
raw: s,
metadata: {
columns: (columns.length / 2).toString(),
},
uuid: crypto.randomUUID(),
rendersChildrenOnly,
};
},
(t) => {
return <>{t.raw}</>;
},
/(?<!\/)(?:\[\])+/g,
/\/\[\]/g,
);
// card
registerIdentifier(
"card",
/\[{2}([\s\S]*?)\n+\]{2}/g,
(s) => {
const rx = /\[{2}(!?)\s*?\n+([\s\S]*)\n+\]{2}/;
const match = s.match(rx);
if (!match) debugger;
const [_, isBlock, content] = match ||
["", "", s];
return {
content: content.trim(),
raw: s,
metadata: {
isBlock,
},
uuid: crypto.randomUUID(),
rendersChildrenOnly,
};
},
(t) => {
return <>{t.raw}</>;
},
/\[\[/g,
/\]\]/g,
);
// fenced code block
registerIdentifier("code", /`{3}\n+((?:.|\n)*?)\n+`{3}/g, (s, rx) => {
return {
content: s.match(new RegExp(rx, ""))?.at(1) ||
"Unable to parse code",
raw: s,
metadata: {},
uuid: crypto.randomUUID(),
rendersContentOnly,
};
}, (t) => {
return <>{t.raw}</>;
});
// list
registerIdentifier(
"list",
/^\s*-\s([\s\S]*?)\n\n/gm,
(s, rx) => {
return {
content: s.match(new RegExp(rx, ""))?.at(1) ||
"Unable to parse list",
raw: s,
metadata: {
initialDepth:
s.replace("\n", "").split("-").at(0)?.length.toString() ||
"1",
},
uuid: crypto.randomUUID(),
rendersChildrenOnly,
};
},
(t) => {
return <>{t.raw}</>;
},
);
// list-item
registerIdentifier(
"list-item",
/^\s*-\s(.*?)$/gm,
(s, rx) => {
return {
content: s.match(new RegExp(rx, ""))?.at(1) ||
"Unable to parse list-item",
raw: s,
metadata: {
initialDepth:
s.replace("\n", "").split("-").at(0)?.length.toString() ||
"1",
},
uuid: crypto.randomUUID(),
};
},
(t) => {
return <>{t.raw}</>;
},
);
// heading
registerIdentifier("heading", /^#+\s(.*?)$/gm, (s, rx) => {
return {
content: s.match(new RegExp(rx, ""))?.at(1) ||
"Unable to parse heading",
raw: s,
metadata: {
strength: s.match(/#/g)?.length.toString() || "1",
},
uuid: crypto.randomUUID(),
rendersContentOnly,
};
}, (t) => {
return <>{t.raw}</>;
});
// image
registerIdentifier("image", /\!\[(.*?)\]\((.*?)\)/g, (s, rx) => {
const [_, title, src] = s.match(new RegExp(rx, ""))!;
return {
// content: inline,
content: title.trim(),
raw: s,
metadata: {
src,
},
uuid: crypto.randomUUID(),
rendersContentOnly,
};
}, (t) => {
return <>{t.raw}</>;
});
// anchor
registerIdentifier("anchor", /(?<![\!^])\[(.*?)\]\((.*?)\)/g, (s, rx) => {
let preset, [_, title, href] = s.match(new RegExp(rx, ""))!;
const match = title.match(/`{3}(cta|button)?(.*)/);
if (match) {
[_, preset, title] = match;
}
const classes = {
button: "btn-primary inline-block",
cta: "btn-secondary inline-block uppercase",
};
return {
content: title.trim(),
raw: s,
metadata: {
href,
classes: classes[preset as keyof typeof classes],
},
uuid: crypto.randomUUID(),
rendersContentOnly,
};
}, (t) => {
return <>{t.raw}</>;
});
// inline-code
registerIdentifier(
"inline-code",
/(?<=\s|^)`(.*?)`(?=[\s,.!?)]|$)/gi,
(s, rx) => {
return {
content: s.match(new RegExp(rx, "i"))?.at(1) ||
"Unable to parse inline-code",
raw: s,
metadata: {},
uuid: crypto.randomUUID(),
rendersContentOnly,
};
},
(t) => {
return <>{t.raw}</>;
},
);
// bold
registerIdentifier("bold", /\*{2}(.*?)\*{2}/g, (s, rx) => {
return {
content: s.match(new RegExp(rx, "i"))?.at(1) ||
"Unable to parse bold",
raw: s,
metadata: {},
uuid: crypto.randomUUID(),
rendersContentOnly,
};
}, (t) => {
return <>{t.raw}</>;
});
// italic
registerIdentifier("italic", /(?<!\*)\*([^\*]+?)\*(?!\*)/g, (s, rx) => {
return {
content: s.match(new RegExp(rx, "i"))?.at(1) ||
"Unable to parse italic",
raw: s,
metadata: {},
uuid: crypto.randomUUID(),
rendersContentOnly,
};
}, (t) => {
return <>{t.raw}</>;
});
// popover
registerIdentifier("popover", /\^\[(.*?)\]\<<(.*?)\>>/g, (s, rx) => {
const [_, title, content] = s.match(new RegExp(rx, ""))!;
return {
content,
raw: s,
metadata: { title },
uuid: crypto.randomUUID(),
rendersContentOnly,
};
}, (t) => {
return <>{t.raw}</>;
});
registerIdentifier(
"accordion",
/\[accordion(\s.*?)?]\n+((?:.|\n)*?)\n+\[\/accordion\]/g,
(s, rx) => {
const [_, title, content] = s.match(new RegExp(rx, ""))!;
return {
content,
raw: s,
metadata: { title },
uuid: crypto.randomUUID(),
};
},
(t) => {
return <>{t.raw}</>;
},
);
registerIdentifier("p", /(?<=\n\n)([\s\S]*?)(?=\n\n)/g, (s, rx) => {
return {
content: s,
raw: s,
metadata: {},
uuid: crypto.randomUUID(),
};
}, (t) => {
return <>{t.raw}</>;
});
registerIdentifier("hr", /^-{3,}$/gm, (s, rx) => {
return {
content: s,
raw: s,
metadata: {},
uuid: crypto.randomUUID(),
rendersContentOnly,
};
}, (t) => {
return <>{t.raw}</>;
});
registerIdentifier("comment", /<!--[\s\S]+?-->/g, (s, rx) => {
return {
content: "",
metadata: { comment: s },
raw: "",
uuid: crypto.randomUUID(),
rendersContentOnly,
};
}, (t) => {
return <>{t.raw}</>;
});
registerIdentifier("frontmatter", /^---([\s\S]*?)---/g, (s, rx) => {
return {
content: "",
metadata: {
frontmatterString: s.match(rx)?.at(0) || "",
},
raw: "",
uuid: "frontmatter",
};
}, (t) => {
return <>{t.raw}</>;
});
registerIdentifier("table", /^\|\s[\s\S]*?\|(?=(\n\n)|$)/g, (s, rx) => {
const rowSections = s.split(/-/gm).map((s) =>
s.split("\n").map((r) => r.split(/\s?\|\s?/g))
);
let headerRows: string[][] = [],
bodyRows: string[][] = [],
footerRows: string[][] = [];
switch (rowSections.length) {
case 1:
bodyRows = rowSections[0];
break;
case 2:
headerRows = rowSections[0];
bodyRows = rowSections[1];
break;
case 3:
headerRows = rowSections[0];
bodyRows = rowSections[1];
footerRows = rowSections[3];
break;
}
const maxColumns = Math.max(
...[...headerRows, ...bodyRows, ...footerRows].map((r) => r.length),
);
return {
content: s,
raw: s,
metadata: {
headerRows: headerRows.join(" | "),
bodyRows: bodyRows.join(" | "),
footerRows: footerRows.join(" | "),
columns: maxColumns.toString(),
},
uuid: crypto.randomUUID(),
};
}, (t) => {
return <>{t.raw}</>;
});
return TokenIdentifiers;
};
function findMatchingClosedParenthesis(
str: string,
openRegex: RegExp,
closedRegex: RegExp,
): number | null {
let openings = 0;
let closings = 0;
openRegex = new RegExp(openRegex, "g");
closedRegex = new RegExp(closedRegex, "g");
let lastOpeningSuccessIndex = 0;
let lastClosingSuccessIndex = 0;
do {
const openingMatch = openRegex.exec(str);
const closingMatch = closedRegex.exec(str);
if ((openingMatch && !closingMatch)) {
throw Error("Things have gone horribly wrong");
}
// if ((!openingMatch && closingMatch) || (!openingMatch && !closingMatch)) break;
if (
openingMatch && closingMatch && openingMatch.index < closingMatch.index
) {
openings++;
lastOpeningSuccessIndex = openingMatch.index + openingMatch[0].length;
closedRegex.lastIndex = lastClosingSuccessIndex;
} else if (
(!openingMatch && closingMatch) ||
(openingMatch && closingMatch && openingMatch.index > closingMatch.index)
) {
closings++;
lastClosingSuccessIndex = closingMatch.index + closingMatch[0].length;
openRegex.lastIndex = lastOpeningSuccessIndex;
} else {
return closingMatch?.index ?? null;
}
} while (openings > closings);
return closedRegex.lastIndex;
}
interface SearchResult {
start: number;
end: number;
text: string;
lastIndex: number;
}
function search(
s: string,
start: number,
end: number,
openRx: RegExp,
closeRx: RegExp,
): SearchResult {
const oldEnd = end;
const newEnd = findMatchingClosedParenthesis(
s,
// s.substring(0, end - start),
openRx,
closeRx,
);
if (newEnd === null) throw Error("There was an issue finding a closing tag");
end = newEnd + start;
return {
start,
end,
text: s.substring(0, newEnd),
lastIndex: oldEnd === end ? end : start + s.match(openRx)![0].length,
};
}