284 lines
7.8 KiB
TypeScript

"use client";
import { zipArrays } from "../zip";
import { buildOnlyDefaultElements, TokenRenderers } from "./TokenIdentifiers";
export const createElements = (body: string): Token[] => {
const tokens = tokenize(body);
return buildAbstractSyntaxTree(tokens).map((t) => t.token);
};
const tokenize = (body: string) => {
const tokenizedBody: TokenMarker[] = [];
body = body.replaceAll(/[ \t]+\n/g, "\n");
const addToken = (thing: TokenMarker) => {
tokenizedBody.push(thing);
};
const ti = buildOnlyDefaultElements();
for (const [type, token] of ti.entries()) {
const rx = new RegExp(token.rx);
let match;
while ((match = rx.exec(body)) !== null) {
const start = match.index;
const end = rx.lastIndex;
if (token.search) {
const found = token.search(body.substring(start), start, end);
rx.lastIndex = found.lastIndex;
addToken({
start: found.start,
end: found.end,
type,
token: token.parse(found.text),
});
continue;
}
addToken({
start,
end,
type,
token: token.parse(match[0]),
});
}
}
return tokenizedBody;
};
function buildAbstractSyntaxTree(markers: TokenMarker[]) {
markers.sort((a, b) => a.start - b.start);
markers = filterOverlappingPBlocks(markers);
establishClosestParent(markers);
for (const marker of markers) {
if (marker.parent) {
marker.parent.token.children = marker.parent.token.children || [];
marker.parent.token.children.push(marker.token);
}
}
// By starting at the end, we can always assure that we are not filtering out children that haven't been processed yet
for (const marker of [...markers].reverse()) {
contentToChildren(marker.token);
}
return markers.filter((m) => !m.parent);
// return markers;
}
function establishClosestParent(blocks: TokenMarker[]): void {
blocks.sort((a, b) => a.start - b.start); // Sort blocks by start position
for (let i = 0; i < blocks.length; i++) {
const block = blocks[i];
if (block.parent) continue; // Skip blocks that already have a parent
let closestParent: TokenMarker | undefined = undefined;
let minDistance = Number.MAX_SAFE_INTEGER;
// Find the closest parent block for each block
for (let j = 0; j < i; j++) {
const otherBlock = blocks[j];
if (otherBlock.end >= block.start && otherBlock.start <= block.start) {
const distance = block.start - otherBlock.start;
if (
distance < minDistance &&
isAcceptableChild(otherBlock.type, block.type)
) {
minDistance = distance;
closestParent = otherBlock;
}
}
}
if (closestParent) {
block.parent = closestParent; // Assign the closest parent block
}
}
}
type ParentChildMap = {
[parentType: string]: string[]; // Map parent types to an array of acceptable child types
};
const parentChildMap: ParentChildMap = {
"list": ["list-item"],
// Add more mappings as needed...
};
function isAcceptableChild(parentType: string, childType: string): boolean {
const acceptableChildren = parentChildMap[parentType];
return acceptableChildren ? acceptableChildren.includes(childType) : true;
}
// Occasionally, some P blocks start exactly at the same point as another block (a side effect of needing to exclude preceding line-breaks from the regex while also having the only clear delineation being those line-breaks) so we just remove those P blocks so that when searching for a parent, it doesn't need to figure out if the P block is valid or not. This doesn't cause issues during rendering since each block handles its own container element
function filterOverlappingPBlocks(blocks: TokenMarker[]): TokenMarker[] {
return blocks.filter((block) => {
if (block.type !== "p") {
return true;
}
for (const otherBlock of blocks) {
if (
otherBlock !== block &&
(
otherBlock.start === block.start ||
(otherBlock.end === block.end && otherBlock.start < block.start)
)
) {
return false;
}
}
return true;
});
}
const contentToChildren = (token: Token) => {
let content = token.content;
if (!content) return;
const wasSpecialCase = handleSpecial(token);
if (wasSpecialCase) return;
const splitMarker = "{{^^}}";
for (const child of token.children || []) {
content = content.replace(child.raw, splitMarker);
}
token.children = zipArrays(
content.split(splitMarker).map((c): Token => ({
content: c.replaceAll("\n", " "),
metadata: {},
raw: c,
type: token.rendersChildrenOnly ? "p" : "text",
uuid: crypto.randomUUID(),
rendersContentOnly: token.rendersChildrenOnly ? false : true,
render: TokenRenderers.get(token.rendersChildrenOnly ? "p" : "text")!,
children: token.rendersChildrenOnly && c.replaceAll("\n", "")
? [
{
content: c.replaceAll("\n", " "),
metadata: {},
raw: c,
type: "text",
uuid: crypto.randomUUID(),
render: TokenRenderers.get("text")!,
rendersContentOnly: true,
},
]
: undefined,
})),
token.children || [],
).filter((c) => c.children?.length || (c.rendersContentOnly && c.content));
};
function handleSpecial(token: Token) {
switch (token.type) {
case "list": {
const chunks = splitByDepth(token.children!);
const items = processChunks(chunks);
token.children = items.flat();
return token.children;
}
// case "grid":
// return token.children;
default:
return;
}
}
function splitByDepth(items: Token[]) {
const chunks: Token[][] = [];
let currentDepth = -1;
let chunk: Token[] = [];
if (!items) return chunks;
for (const item of items) {
const depth = Number(item.metadata.initialDepth);
if (depth === currentDepth) {
chunk.push(item);
} else {
if (chunk.length > 0) {
chunks.push(chunk);
}
chunk = [item];
currentDepth = depth;
}
}
if (chunk.length > 0) {
chunks.push(chunk);
}
return chunks;
}
function processChunks(chunks: Token[][]) {
const mergedChunks: Token[][] = [];
for (let i = 1; i < chunks.length; i++) {
const currentChunk = chunks[i];
let j = i - 1;
// Find the first chunk with a lower depth
while (j >= 0) {
const prevChunk = chunks[j];
const prevDepth = Number(prevChunk[0].metadata.initialDepth);
if (prevDepth < Number(currentChunk[0].metadata.initialDepth)) {
// Append the current chunk to the children of the found chunk
const lastPrev = prevChunk[prevChunk.length - 1];
lastPrev.children = lastPrev.children || [];
lastPrev.children.push({
type: "list",
content: "",
raw: "",
metadata: { initialDepth: currentChunk[0].metadata.initialDepth },
uuid: crypto.randomUUID(),
children: currentChunk,
render: TokenRenderers.get("list")!,
});
mergedChunks.push(currentChunk);
break;
}
j--;
}
}
// Filter out chunks that were merged into others
return chunks.filter((c) => !mergedChunks.find((c2) => c === c2));
}
/**
* @description Extracts the frontmatter of a markdown document and returns it as an object and with the body with the frontmatter removed from it
*
* @returns a tuple containing the body and the frontmatter object
*/
export function extractFrontMatter(body: string): [FrontMatter, string] {
const rx = /^---([\s\S]*?)---/;
const [_, frontmatterString] = body.match(rx) || ["", ""];
body = body.replace(rx, "");
const frontMatter: FrontMatter = {};
for (const line of frontmatterString.split("\n")) {
if (!line) continue;
const [key, value] = line.split(": ");
frontMatter[key] = value;
}
return [frontMatter, body];
}