287 lines
7.6 KiB
TypeScript
287 lines
7.6 KiB
TypeScript
"use client";
|
|
|
|
import { zipArrays } from "../zip";
|
|
import { TokenIdentifiers } from "./TokenIdentifiers";
|
|
|
|
export const createElements = (body: string): [Token[], number] => {
|
|
const tabOptions = [
|
|
/^\s{2}(?!\s|\t)/m,
|
|
/^\s{4}(?!\s|\t)/m,
|
|
/^\t(?!\s|\t)]/m,
|
|
];
|
|
let tabSpacing = 0;
|
|
|
|
for (const [i, tabOption] of tabOptions.entries()) {
|
|
if (body.match(tabOption)) {
|
|
tabSpacing = i;
|
|
break;
|
|
}
|
|
}
|
|
const tokens = tokenize(body);
|
|
return [buildAbstractSyntaxTree(tokens, body), tabSpacing];
|
|
};
|
|
|
|
const tokenize = (body: string) => {
|
|
const tokenizedBody: tokenMarker[] = [];
|
|
|
|
const addToken = (thing: tokenMarker) => {
|
|
tokenizedBody.push(thing);
|
|
};
|
|
|
|
for (const [type, token] of TokenIdentifiers.entries()) {
|
|
const rx = new RegExp(token.rx);
|
|
let match;
|
|
while ((match = rx.exec(body)) !== null) {
|
|
const start = match.index;
|
|
const end = rx.lastIndex;
|
|
|
|
if (type !== "p" || !tokenizedBody.find((i) => i.start === start)) {
|
|
addToken({
|
|
start,
|
|
end,
|
|
type,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
return tokenizedBody;
|
|
};
|
|
|
|
export const buildAbstractSyntaxTree = (
|
|
markers: tokenMarker[],
|
|
body: string,
|
|
): Token[] => {
|
|
ensureNoOrphans(markers);
|
|
|
|
markers.sort((a, b) => {
|
|
if (a.start === b.start) {
|
|
console.log(a, b);
|
|
if (a.type === "p") return -1;
|
|
if (b.type === "p") return 1;
|
|
}
|
|
// if (a.type === "p" && a.start === b.start) return -1;
|
|
// if (b.type === "p" && a.start === b.start) return 1;
|
|
return a.start - b.start;
|
|
});
|
|
|
|
for (const marker of markers) {
|
|
marker.token = TokenIdentifiers.get(marker.type)?.parse(
|
|
body.substring(marker.start, marker.end),
|
|
);
|
|
// if (marker.type === "p" && marker.parent && marker.parent?.type !== "p") {
|
|
// marker.parent = undefined;
|
|
// continue;
|
|
// }
|
|
if (!marker.token) {
|
|
throw new Error("Failed to parse token. Token type not found?");
|
|
}
|
|
if (!marker.parent) continue;
|
|
|
|
if (!marker.parent.token) {
|
|
// debugger;
|
|
throw new Error("Failed to parse token. Child tokenized before parent");
|
|
}
|
|
|
|
marker.parent.token.children = marker.parent.token.children || [];
|
|
marker.parent.token.children.push(marker.token);
|
|
// marker.token.parent = marker.parent.token;
|
|
}
|
|
|
|
const tokens = markers.filter((m) =>
|
|
markers.filter((a) => a !== m && (a.end === m.end || a.start === m.start))
|
|
.length || m.type !== "p"
|
|
).map((t) => t.token!);
|
|
|
|
for (const token of tokens) {
|
|
contentToChildren(token);
|
|
}
|
|
|
|
return tokens.filter((t) => !t.parent);
|
|
};
|
|
|
|
const ensureNoOrphansOld = (tokens: tokenMarker[]) => {
|
|
for (const token of tokens) {
|
|
const parentPs = tokens.filter((t) => (
|
|
t.type === "p" && (
|
|
// any p that fully encapsulates the token
|
|
(t.start <= token.start && t.end >= token.end) ||
|
|
// any p that contains the start of the token
|
|
(t.start <= token.start && t.end >= token.start) ||
|
|
// any p that contains the end of the token
|
|
(t.start <= token.end && t.end >= token.end)
|
|
)
|
|
)).sort((a, b) => (a.start - b.start));
|
|
|
|
if (parentPs.length > 1) {
|
|
parentPs[0].end = parentPs.at(-1)!.end;
|
|
const remainingParents = parentPs.slice(1);
|
|
for (const token of tokens) {
|
|
if (token.parent && remainingParents.includes(token.parent)) {
|
|
token.parent = parentPs[0];
|
|
}
|
|
}
|
|
if (parentPs[0] && parentPs[0].end < token.end) {
|
|
parentPs[0].end = token.end;
|
|
}
|
|
tokens = tokens.filter((t) => !remainingParents.includes(t));
|
|
}
|
|
|
|
const potentialParents = tokens.filter((t) =>
|
|
(t.start < token.start && t.end > token.end) ||
|
|
(t.type === "p" && t.start <= token.start &&
|
|
t.end >= token.end && t !== token)
|
|
).sort((a, b) => {
|
|
if (token.start - a.start < token.start - b.start) return -1;
|
|
return 1;
|
|
});
|
|
|
|
token.parent = potentialParents.find((p) => p.type !== "p") ??
|
|
potentialParents[0];
|
|
|
|
if (token.type === "grid") {
|
|
debugger;
|
|
}
|
|
}
|
|
};
|
|
|
|
const ensureNoOrphans = (tokens: tokenMarker[]) => {
|
|
ensureNoOrphansOld(tokens);
|
|
};
|
|
|
|
const contentToChildren = (token: Token) => {
|
|
const children: Token[] = [];
|
|
let part, content = token.content;
|
|
|
|
// for (const child of token.children || []) {
|
|
// if (!content) continue;
|
|
// [part, content] = content.split(child.raw);
|
|
// part && children.push({
|
|
// content: part.trim(),
|
|
// metadata: {},
|
|
// raw: part,
|
|
// type: "text",
|
|
// uuid: crypto.randomUUID(),
|
|
// });
|
|
// children.push(child);
|
|
// }
|
|
|
|
// if (content) {
|
|
// children.push({
|
|
// content: content.trim(),
|
|
// metadata: {},
|
|
// raw: content,
|
|
// type: "text",
|
|
// uuid: crypto.randomUUID(),
|
|
// });
|
|
// }
|
|
const splitMarker = "{{^^}}";
|
|
for (const child of token.children || []) {
|
|
content = content.replace(child.raw, splitMarker);
|
|
}
|
|
|
|
token.children = zipArrays(
|
|
content.split(splitMarker).map((c): Token => ({
|
|
content: c.trim(),
|
|
metadata: {},
|
|
raw: c,
|
|
type: "text",
|
|
uuid: crypto.randomUUID(),
|
|
rendersContentOnly: true,
|
|
})),
|
|
token.children || [],
|
|
).filter((c) => c.children?.length || (c.rendersContentOnly && c.content));
|
|
};
|
|
|
|
// const tokenize = (body: string) => {
|
|
// body = body.replace(/\n?<!--(.*?)-->\n?/gs, "");
|
|
|
|
// const paragraphs = body.split("\n\n");
|
|
|
|
// const blockTokens: BlockToken[] = [];
|
|
// const paragraphTokens: ParagraphToken[] = [];
|
|
|
|
// for (const paragraph of paragraphs) {
|
|
// const block = tokenizeBlock(paragraph);
|
|
// let openBT = blockTokens.findLast((bt) => !bt.closed);
|
|
// if (block) {
|
|
// if (typeof block === "string") {
|
|
// if (openBT) {
|
|
// openBT.closed = true;
|
|
// }
|
|
// continue;
|
|
// }
|
|
|
|
// if (openBT) {
|
|
// openBT.children.push(block);
|
|
// block.parent = openBT.type;
|
|
// }
|
|
// blockTokens.push(block);
|
|
// continue;
|
|
// }
|
|
|
|
// if (!openBT) {
|
|
// openBT = {
|
|
// children: [],
|
|
// closed: false,
|
|
// metadata: {},
|
|
// type: "block",
|
|
// uuid: crypto.randomUUID(),
|
|
// };
|
|
// blockTokens.push(openBT);
|
|
// }
|
|
|
|
// const multiline = tokenizeParagraph(paragraph);
|
|
// let openP = paragraphTokens.findLast((p) => !p.closed);
|
|
// if (multiline) {
|
|
// if (Array.isArray(multiline)) {
|
|
// if (openP) {
|
|
// openP.closed = true;
|
|
// openP.content = openP.content.concat(multiline);
|
|
// }
|
|
// continue;
|
|
// }
|
|
|
|
// openBT.children.push(multiline);
|
|
// paragraphTokens.push(multiline);
|
|
// continue;
|
|
// } else if (openP && !openP?.allowsInline) {
|
|
// openP.content.push({
|
|
// line: paragraph,
|
|
// raw: paragraph,
|
|
// type: "text",
|
|
// uuid: crypto.randomUUID(),
|
|
// });
|
|
// }
|
|
|
|
// // I don't think the closed check is necessary, but just in case
|
|
// // if (openP && !openP.closed && !openP.allowsInline) continue;
|
|
// if (!openP) {
|
|
// openP = {
|
|
// allowsInline: true,
|
|
// closed: true,
|
|
// content: [],
|
|
// metadata: {},
|
|
// type: "p",
|
|
// uuid: crypto.randomUUID(),
|
|
// };
|
|
// openBT.children.push(openP);
|
|
// paragraphTokens.push(openP);
|
|
// }
|
|
|
|
// const lines = paragraph.split("\n");
|
|
// let previous;
|
|
// for (const line of lines) {
|
|
// const singleLine = tokenizeLine(line, previous);
|
|
|
|
// if (singleLine) {
|
|
// if (singleLine !== previous) {
|
|
// openP.content.push(singleLine);
|
|
// }
|
|
// previous = singleLine;
|
|
// }
|
|
// }
|
|
// }
|
|
|
|
// return blockTokens.filter((b) => !b.parent);
|
|
// };
|