287 lines
7.6 KiB
TypeScript

"use client";
import { zipArrays } from "../zip";
import { TokenIdentifiers } from "./TokenIdentifiers";
export const createElements = (body: string): [Token[], number] => {
const tabOptions = [
/^\s{2}(?!\s|\t)/m,
/^\s{4}(?!\s|\t)/m,
/^\t(?!\s|\t)]/m,
];
let tabSpacing = 0;
for (const [i, tabOption] of tabOptions.entries()) {
if (body.match(tabOption)) {
tabSpacing = i;
break;
}
}
const tokens = tokenize(body);
return [buildAbstractSyntaxTree(tokens, body), tabSpacing];
};
const tokenize = (body: string) => {
const tokenizedBody: tokenMarker[] = [];
const addToken = (thing: tokenMarker) => {
tokenizedBody.push(thing);
};
for (const [type, token] of TokenIdentifiers.entries()) {
const rx = new RegExp(token.rx);
let match;
while ((match = rx.exec(body)) !== null) {
const start = match.index;
const end = rx.lastIndex;
if (type !== "p" || !tokenizedBody.find((i) => i.start === start)) {
addToken({
start,
end,
type,
});
}
}
}
return tokenizedBody;
};
export const buildAbstractSyntaxTree = (
markers: tokenMarker[],
body: string,
): Token[] => {
ensureNoOrphans(markers);
markers.sort((a, b) => {
if (a.start === b.start) {
console.log(a, b);
if (a.type === "p") return -1;
if (b.type === "p") return 1;
}
// if (a.type === "p" && a.start === b.start) return -1;
// if (b.type === "p" && a.start === b.start) return 1;
return a.start - b.start;
});
for (const marker of markers) {
marker.token = TokenIdentifiers.get(marker.type)?.parse(
body.substring(marker.start, marker.end),
);
// if (marker.type === "p" && marker.parent && marker.parent?.type !== "p") {
// marker.parent = undefined;
// continue;
// }
if (!marker.token) {
throw new Error("Failed to parse token. Token type not found?");
}
if (!marker.parent) continue;
if (!marker.parent.token) {
// debugger;
throw new Error("Failed to parse token. Child tokenized before parent");
}
marker.parent.token.children = marker.parent.token.children || [];
marker.parent.token.children.push(marker.token);
// marker.token.parent = marker.parent.token;
}
const tokens = markers.filter((m) =>
markers.filter((a) => a !== m && (a.end === m.end || a.start === m.start))
.length || m.type !== "p"
).map((t) => t.token!);
for (const token of tokens) {
contentToChildren(token);
}
return tokens.filter((t) => !t.parent);
};
const ensureNoOrphansOld = (tokens: tokenMarker[]) => {
for (const token of tokens) {
const parentPs = tokens.filter((t) => (
t.type === "p" && (
// any p that fully encapsulates the token
(t.start <= token.start && t.end >= token.end) ||
// any p that contains the start of the token
(t.start <= token.start && t.end >= token.start) ||
// any p that contains the end of the token
(t.start <= token.end && t.end >= token.end)
)
)).sort((a, b) => (a.start - b.start));
if (parentPs.length > 1) {
parentPs[0].end = parentPs.at(-1)!.end;
const remainingParents = parentPs.slice(1);
for (const token of tokens) {
if (token.parent && remainingParents.includes(token.parent)) {
token.parent = parentPs[0];
}
}
if (parentPs[0] && parentPs[0].end < token.end) {
parentPs[0].end = token.end;
}
tokens = tokens.filter((t) => !remainingParents.includes(t));
}
const potentialParents = tokens.filter((t) =>
(t.start < token.start && t.end > token.end) ||
(t.type === "p" && t.start <= token.start &&
t.end >= token.end && t !== token)
).sort((a, b) => {
if (token.start - a.start < token.start - b.start) return -1;
return 1;
});
token.parent = potentialParents.find((p) => p.type !== "p") ??
potentialParents[0];
if (token.type === "grid") {
debugger;
}
}
};
const ensureNoOrphans = (tokens: tokenMarker[]) => {
ensureNoOrphansOld(tokens);
};
const contentToChildren = (token: Token) => {
const children: Token[] = [];
let part, content = token.content;
// for (const child of token.children || []) {
// if (!content) continue;
// [part, content] = content.split(child.raw);
// part && children.push({
// content: part.trim(),
// metadata: {},
// raw: part,
// type: "text",
// uuid: crypto.randomUUID(),
// });
// children.push(child);
// }
// if (content) {
// children.push({
// content: content.trim(),
// metadata: {},
// raw: content,
// type: "text",
// uuid: crypto.randomUUID(),
// });
// }
const splitMarker = "{{^^}}";
for (const child of token.children || []) {
content = content.replace(child.raw, splitMarker);
}
token.children = zipArrays(
content.split(splitMarker).map((c): Token => ({
content: c.trim(),
metadata: {},
raw: c,
type: "text",
uuid: crypto.randomUUID(),
rendersContentOnly: true,
})),
token.children || [],
).filter((c) => c.children?.length || (c.rendersContentOnly && c.content));
};
// const tokenize = (body: string) => {
// body = body.replace(/\n?<!--(.*?)-->\n?/gs, "");
// const paragraphs = body.split("\n\n");
// const blockTokens: BlockToken[] = [];
// const paragraphTokens: ParagraphToken[] = [];
// for (const paragraph of paragraphs) {
// const block = tokenizeBlock(paragraph);
// let openBT = blockTokens.findLast((bt) => !bt.closed);
// if (block) {
// if (typeof block === "string") {
// if (openBT) {
// openBT.closed = true;
// }
// continue;
// }
// if (openBT) {
// openBT.children.push(block);
// block.parent = openBT.type;
// }
// blockTokens.push(block);
// continue;
// }
// if (!openBT) {
// openBT = {
// children: [],
// closed: false,
// metadata: {},
// type: "block",
// uuid: crypto.randomUUID(),
// };
// blockTokens.push(openBT);
// }
// const multiline = tokenizeParagraph(paragraph);
// let openP = paragraphTokens.findLast((p) => !p.closed);
// if (multiline) {
// if (Array.isArray(multiline)) {
// if (openP) {
// openP.closed = true;
// openP.content = openP.content.concat(multiline);
// }
// continue;
// }
// openBT.children.push(multiline);
// paragraphTokens.push(multiline);
// continue;
// } else if (openP && !openP?.allowsInline) {
// openP.content.push({
// line: paragraph,
// raw: paragraph,
// type: "text",
// uuid: crypto.randomUUID(),
// });
// }
// // I don't think the closed check is necessary, but just in case
// // if (openP && !openP.closed && !openP.allowsInline) continue;
// if (!openP) {
// openP = {
// allowsInline: true,
// closed: true,
// content: [],
// metadata: {},
// type: "p",
// uuid: crypto.randomUUID(),
// };
// openBT.children.push(openP);
// paragraphTokens.push(openP);
// }
// const lines = paragraph.split("\n");
// let previous;
// for (const line of lines) {
// const singleLine = tokenizeLine(line, previous);
// if (singleLine) {
// if (singleLine !== previous) {
// openP.content.push(singleLine);
// }
// previous = singleLine;
// }
// }
// }
// return blockTokens.filter((b) => !b.parent);
// };