I'm really sick of not making any progress

This commit is contained in:
2024-03-12 04:53:54 -06:00
parent ed4497b991
commit 3c8f5bb8ba
16 changed files with 863 additions and 167 deletions

View File

@@ -0,0 +1,192 @@
export const TokenIdentifiers = new Map<string, {
rx: RegExp;
parse: (s: string) => Token;
}>();
// TokenIdentifiers.set("p", {
// rx: /\n{2,}((?:.|\n)*?)\n{2,}/g,
// parse(s) {
// const [_, content] = s.match(new RegExp(this.rx, ""))!;
// return {
// // content,
// content,
// raw: s,
// metadata: {},
// type: "p",
// uuid: crypto.randomUUID(),
// };
// },
// });
const rendersContentOnly = true;
const rendersChildrenOnly = true;
TokenIdentifiers.set("card", {
rx: /\[{2}\n+(\n|.*?)\n+\]{2}/g,
parse(s) {
return {
content: s.match(new RegExp(this.rx, ""))?.at(1) ||
"Unable to parse card",
raw: s,
metadata: {},
type: "card",
uuid: crypto.randomUUID(),
};
},
});
TokenIdentifiers.set("code", {
rx: /`{3}\n+((?:.|\n)*?)\n+`{3}/g,
parse(s) {
return {
content: s.match(new RegExp(this.rx, ""))?.at(1) ||
"Unable to parse code",
raw: s,
metadata: {},
type: "code",
uuid: crypto.randomUUID(),
rendersContentOnly,
};
},
});
TokenIdentifiers.set("grid", {
rx: /(?:\[\])+\n+((?:.|\n)*?)\n+\/\[\]/g,
parse(s) {
return {
content: s.match(new RegExp(this.rx, ""))?.at(1) ||
"Unable to parse grid",
raw: s,
metadata: {
columns: s.split("\n").at(0)?.match(/\[\]/g)?.length.toString() || "1",
},
type: "grid",
uuid: crypto.randomUUID(),
rendersChildrenOnly,
};
},
});
TokenIdentifiers.set("heading", {
rx: /^#+\s(.*?)$/gm,
parse(s) {
return {
content: s.match(new RegExp(this.rx, ""))?.at(1) ||
"Unable to parse heading",
raw: s,
metadata: {
strength: s.match(/#/g)?.length.toString() || "1",
},
type: "heading",
uuid: crypto.randomUUID(),
rendersContentOnly,
};
},
});
TokenIdentifiers.set("image", {
rx: /\!\[(.*?)\]\((.*?)\)/g,
parse(s) {
const [_, title, src] = s.match(new RegExp(this.rx, ""))!;
return {
// content: inline,
content: title.trim(),
raw: s,
metadata: {
src,
},
type: "image",
uuid: crypto.randomUUID(),
rendersContentOnly,
};
},
});
TokenIdentifiers.set("anchor", {
rx: /(?<![\!^])\[(.*?)\]\((.*?)\)/g,
parse(s) {
let preset, [_, title, href] = s.match(new RegExp(this.rx, ""))!;
const match = title.match(/`{3}(cta|button)?(.*)/);
if (match) {
[_, preset, title] = match;
}
const classes = {
button: "btn-primary inline-block",
cta: "btn-secondary inline-block uppercase",
};
return {
// content: inline,
content: title.trim(),
raw: s,
metadata: {
href,
classes: classes[preset as keyof typeof classes],
},
type: "anchor",
uuid: crypto.randomUUID(),
rendersContentOnly,
};
},
});
TokenIdentifiers.set("inline-code", {
rx: /\s?`(.{3,}|[a-z0-9]*?)`[^`a-z0-9\n]/gi,
parse(s) {
return {
// content: inline,
content: s.match(new RegExp(this.rx, "i"))?.at(1) ||
"Unable to parse inline-code",
raw: s,
metadata: {},
type: "inline-code",
uuid: crypto.randomUUID(),
rendersContentOnly,
};
},
});
TokenIdentifiers.set("popover", {
rx: /\^\[(.*?)\]\<<(.*?)\>>/g,
parse(s) {
const [_, title, content] = s.match(new RegExp(this.rx, ""))!;
return {
// content,
content,
raw: s,
metadata: { title },
type: "popover",
uuid: crypto.randomUUID(),
rendersContentOnly,
};
},
});
TokenIdentifiers.set("accordion", {
rx: /\[accordion(\s.*?)?]\n+((?:.|\n)*?)\n+\[\/accordion\]/g,
parse(s) {
const [_, title, content] = s.match(new RegExp(this.rx, ""))!;
return {
// content,
content,
raw: s,
metadata: { title },
type: "accordion",
uuid: crypto.randomUUID(),
};
},
});
TokenIdentifiers.set("p", {
rx: /(?<=\n\n)([\s\S]*?)(?=\n\n)/g,
parse(s) {
// const [_, content] = s.match(new RegExp(this.rx, ""))!;
return {
// content,
content: s,
raw: s,
metadata: {},
type: "p",
uuid: crypto.randomUUID(),
};
},
});
// const p = TokenIdentifiers.get("p");
// TokenIdentifiers.clear();
// p && TokenIdentifiers.set("p", p);

View File

@@ -1,103 +1,286 @@
"use client";
import { zipArrays } from "../zip";
import { tokenizeLine } from "./tokenizeLine";
import { tokenizeBlock } from "./tokenizeBlock";
import { tokenizeParagraph } from "./tokenizeParagraph";
import { TokenIdentifiers } from "./TokenIdentifiers";
export const createElements = (body: string) => {
export const createElements = (body: string): [Token[], number] => {
const tabOptions = [
/^\s{2}(?!\s|\t)/m,
/^\s{4}(?!\s|\t)/m,
/^\t(?!\s|\t)]/m,
];
let tabSpacing = 0;
for (const [i, tabOption] of tabOptions.entries()) {
if (body.match(tabOption)) {
tabSpacing = i;
break;
}
}
const tokens = tokenize(body);
return tokens;
return [buildAbstractSyntaxTree(tokens, body), tabSpacing];
};
const tokenize = (body: string) => {
body = body.replace(/\n?<!--(.*?)-->\n?/gs, "");
const tokenizedBody: tokenMarker[] = [];
const paragraphs = body.split("\n\n");
const addToken = (thing: tokenMarker) => {
tokenizedBody.push(thing);
};
const blockTokens: BlockToken[] = [];
const paragraphTokens: ParagraphToken[] = [];
for (const [type, token] of TokenIdentifiers.entries()) {
const rx = new RegExp(token.rx);
let match;
while ((match = rx.exec(body)) !== null) {
const start = match.index;
const end = rx.lastIndex;
for (const paragraph of paragraphs) {
const block = tokenizeBlock(paragraph);
let openBT = blockTokens.findLast((bt) => !bt.closed);
if (block) {
if (typeof block === "string") {
if (openBT) {
openBT.closed = true;
}
continue;
}
if (openBT) {
openBT.children.push(block);
block.parent = openBT.type;
}
blockTokens.push(block);
continue;
}
if (!openBT) {
openBT = {
children: [],
closed: false,
metadata: {},
type: "block",
uuid: crypto.randomUUID(),
};
blockTokens.push(openBT);
}
const multiline = tokenizeParagraph(paragraph);
let openP = paragraphTokens.findLast((p) => !p.closed);
if (multiline) {
if (Array.isArray(multiline)) {
if (openP) {
openP.closed = true;
openP.content = openP.content.concat(multiline);
}
continue;
}
openBT.children.push(multiline);
paragraphTokens.push(multiline);
continue;
} else if (openP && !openP?.allowsInline) {
openP.content.push({
line: paragraph,
raw: paragraph,
type: "text",
uuid: crypto.randomUUID(),
});
}
// I don't think the closed check is necessary, but just in case
// if (openP && !openP.closed && !openP.allowsInline) continue;
if (!openP) {
openP = {
allowsInline: true,
closed: true,
content: [],
metadata: {},
type: "p",
uuid: crypto.randomUUID(),
};
openBT.children.push(openP);
paragraphTokens.push(openP);
}
const lines = paragraph.split("\n");
let previous;
for (const line of lines) {
const singleLine = tokenizeLine(line, previous);
if (singleLine) {
if (singleLine !== previous) {
openP.content.push(singleLine);
}
previous = singleLine;
if (type !== "p" || !tokenizedBody.find((i) => i.start === start)) {
addToken({
start,
end,
type,
});
}
}
}
return blockTokens.filter((b) => !b.parent);
return tokenizedBody;
};
export const buildAbstractSyntaxTree = (
markers: tokenMarker[],
body: string,
): Token[] => {
ensureNoOrphans(markers);
markers.sort((a, b) => {
if (a.start === b.start) {
console.log(a, b);
if (a.type === "p") return -1;
if (b.type === "p") return 1;
}
// if (a.type === "p" && a.start === b.start) return -1;
// if (b.type === "p" && a.start === b.start) return 1;
return a.start - b.start;
});
for (const marker of markers) {
marker.token = TokenIdentifiers.get(marker.type)?.parse(
body.substring(marker.start, marker.end),
);
// if (marker.type === "p" && marker.parent && marker.parent?.type !== "p") {
// marker.parent = undefined;
// continue;
// }
if (!marker.token) {
throw new Error("Failed to parse token. Token type not found?");
}
if (!marker.parent) continue;
if (!marker.parent.token) {
// debugger;
throw new Error("Failed to parse token. Child tokenized before parent");
}
marker.parent.token.children = marker.parent.token.children || [];
marker.parent.token.children.push(marker.token);
// marker.token.parent = marker.parent.token;
}
const tokens = markers.filter((m) =>
markers.filter((a) => a !== m && (a.end === m.end || a.start === m.start))
.length || m.type !== "p"
).map((t) => t.token!);
for (const token of tokens) {
contentToChildren(token);
}
return tokens.filter((t) => !t.parent);
};
const ensureNoOrphansOld = (tokens: tokenMarker[]) => {
for (const token of tokens) {
const parentPs = tokens.filter((t) => (
t.type === "p" && (
// any p that fully encapsulates the token
(t.start <= token.start && t.end >= token.end) ||
// any p that contains the start of the token
(t.start <= token.start && t.end >= token.start) ||
// any p that contains the end of the token
(t.start <= token.end && t.end >= token.end)
)
)).sort((a, b) => (a.start - b.start));
if (parentPs.length > 1) {
parentPs[0].end = parentPs.at(-1)!.end;
const remainingParents = parentPs.slice(1);
for (const token of tokens) {
if (token.parent && remainingParents.includes(token.parent)) {
token.parent = parentPs[0];
}
}
if (parentPs[0] && parentPs[0].end < token.end) {
parentPs[0].end = token.end;
}
tokens = tokens.filter((t) => !remainingParents.includes(t));
}
const potentialParents = tokens.filter((t) =>
(t.start < token.start && t.end > token.end) ||
(t.type === "p" && t.start <= token.start &&
t.end >= token.end && t !== token)
).sort((a, b) => {
if (token.start - a.start < token.start - b.start) return -1;
return 1;
});
token.parent = potentialParents.find((p) => p.type !== "p") ??
potentialParents[0];
if (token.type === "grid") {
debugger;
}
}
};
const ensureNoOrphans = (tokens: tokenMarker[]) => {
ensureNoOrphansOld(tokens);
};
const contentToChildren = (token: Token) => {
const children: Token[] = [];
let part, content = token.content;
// for (const child of token.children || []) {
// if (!content) continue;
// [part, content] = content.split(child.raw);
// part && children.push({
// content: part.trim(),
// metadata: {},
// raw: part,
// type: "text",
// uuid: crypto.randomUUID(),
// });
// children.push(child);
// }
// if (content) {
// children.push({
// content: content.trim(),
// metadata: {},
// raw: content,
// type: "text",
// uuid: crypto.randomUUID(),
// });
// }
const splitMarker = "{{^^}}";
for (const child of token.children || []) {
content = content.replace(child.raw, splitMarker);
}
token.children = zipArrays(
content.split(splitMarker).map((c): Token => ({
content: c.trim(),
metadata: {},
raw: c,
type: "text",
uuid: crypto.randomUUID(),
rendersContentOnly: true,
})),
token.children || [],
).filter((c) => c.children?.length || (c.rendersContentOnly && c.content));
};
// const tokenize = (body: string) => {
// body = body.replace(/\n?<!--(.*?)-->\n?/gs, "");
// const paragraphs = body.split("\n\n");
// const blockTokens: BlockToken[] = [];
// const paragraphTokens: ParagraphToken[] = [];
// for (const paragraph of paragraphs) {
// const block = tokenizeBlock(paragraph);
// let openBT = blockTokens.findLast((bt) => !bt.closed);
// if (block) {
// if (typeof block === "string") {
// if (openBT) {
// openBT.closed = true;
// }
// continue;
// }
// if (openBT) {
// openBT.children.push(block);
// block.parent = openBT.type;
// }
// blockTokens.push(block);
// continue;
// }
// if (!openBT) {
// openBT = {
// children: [],
// closed: false,
// metadata: {},
// type: "block",
// uuid: crypto.randomUUID(),
// };
// blockTokens.push(openBT);
// }
// const multiline = tokenizeParagraph(paragraph);
// let openP = paragraphTokens.findLast((p) => !p.closed);
// if (multiline) {
// if (Array.isArray(multiline)) {
// if (openP) {
// openP.closed = true;
// openP.content = openP.content.concat(multiline);
// }
// continue;
// }
// openBT.children.push(multiline);
// paragraphTokens.push(multiline);
// continue;
// } else if (openP && !openP?.allowsInline) {
// openP.content.push({
// line: paragraph,
// raw: paragraph,
// type: "text",
// uuid: crypto.randomUUID(),
// });
// }
// // I don't think the closed check is necessary, but just in case
// // if (openP && !openP.closed && !openP.allowsInline) continue;
// if (!openP) {
// openP = {
// allowsInline: true,
// closed: true,
// content: [],
// metadata: {},
// type: "p",
// uuid: crypto.randomUUID(),
// };
// openBT.children.push(openP);
// paragraphTokens.push(openP);
// }
// const lines = paragraph.split("\n");
// let previous;
// for (const line of lines) {
// const singleLine = tokenizeLine(line, previous);
// if (singleLine) {
// if (singleLine !== previous) {
// openP.content.push(singleLine);
// }
// previous = singleLine;
// }
// }
// }
// return blockTokens.filter((b) => !b.parent);
// };

View File

@@ -44,8 +44,8 @@ const blockTokens: {
},
},
{
rx: /\[accordion\s?([a-z\s]*)\]/,
closeRx: /\[\/accordion\]/,
rx: /^\[accordion\s?([a-z\s]*)\]/i,
closeRx: /^\[\/accordion\]/,
create(line) {
const title = line.match(this.rx)?.at(1);
return {

View File

@@ -61,6 +61,21 @@ export const inlineTokens: {
) => void;
replace: (line: string) => string;
}[] = [
{
rx: /\s?`(.*?)`[^a-z0-9`]\s?/gi,
create(content, start, end, tokens) {
tokens.push({
content: this.replace(content[0]),
type: "inline-code",
end,
start,
uuid: crypto.randomUUID(),
});
},
replace(l) {
return l.replace(this.rx, (...all) => all[1]);
},
},
{
rx: /(\*\*)(.*?)(\*\*)/g,
create(content, start, end, tokens) {

View File

@@ -1,37 +1,37 @@
export const tokenizeParagraph = (paragraph: string) => {
for (const block of blockTokens) {
const openTest = block.rx.test(paragraph),
closeTest = block.closeRx.test(paragraph);
for (const pgraph of paragraphTokens) {
const openTest = pgraph.rx.test(paragraph),
closeTest = pgraph.closeRx.test(paragraph);
if (openTest && closeTest) {
const p = block.create(paragraph);
const p = pgraph.create(paragraph);
p.closed = true;
return p;
}
if (closeTest) return block.create(paragraph).content;
if (closeTest) return pgraph.create(paragraph).content;
if (openTest) {
return block.create(paragraph);
return pgraph.create(paragraph);
}
}
};
const blockTokens: {
const paragraphTokens: {
rx: RegExp;
closeRx: RegExp;
create: (line: string) => ParagraphToken;
}[] = [
{
rx: /^```/g,
rx: /\n```/g,
closeRx: /\n```/g,
create(line) {
return {
type: "code",
metadata: {
language: line.split("\n").at(0)!.replace(this.rx, ""),
// language: line.split("\n").at(0)!.replace(this.rx, ""),
},
closed: false,
content: [{
line: line.replace(/```.*?\n/g, "").replace(/\n```/, ""),
line: line.match(/```(.*?)\n```/g)?.at(1) || line,
type: "text",
raw: line,
uuid: crypto.randomUUID(),