226 lines
5.5 KiB
TypeScript

import { zipArrays } from "../zip";
import { inlineTokens } from "./inlineTokens";
import { singleLineTokens } from "./singleLineTokens";
import { tokenizeBlock } from "./tokenizeBlock";
import { tokenizeParagraph } from "./tokenizeParagraph";
export const createElements = (body: string) => {
const tokens = tokenize(body);
return tokens;
};
const tokenize = (body: string) => {
const paragraphs = body.split("\n\n");
const blockTokens: BlockToken[] = [];
const paragraphTokens: ParagraphToken[] = [];
for (const paragraph of paragraphs) {
const block = tokenizeBlock(paragraph);
let openBT = blockTokens.findLast((bt) => !bt.closed);
if (block) {
if (typeof block === "string") {
if (openBT) {
openBT.closed = true;
}
continue;
}
if (openBT) {
openBT.children.push(block);
block.parent = openBT.type;
}
blockTokens.push(block);
continue;
}
if (!openBT) {
openBT = {
children: [],
closed: false,
metadata: {},
type: "block",
};
blockTokens.push(openBT);
}
const multiline = tokenizeParagraph(paragraph);
let openP = paragraphTokens.findLast((p) => !p.closed);
if (multiline) {
if (Array.isArray(multiline)) {
if (openP) {
openP.closed = true;
openP.content = openP.content.concat(multiline);
}
continue;
}
openBT.children.push(multiline);
paragraphTokens.push(multiline);
continue;
} else if (openP && !openP?.allowsInline) {
openP.content.push({
line: paragraph,
raw: paragraph,
type: "text",
});
}
// I don't think the closed check is necessary, but just in case
// if (openP && !openP.closed && !openP.allowsInline) continue;
if (!openP) {
openP = {
allowsInline: true,
closed: true,
content: [],
metadata: {},
type: "p",
};
openBT.children.push(openP);
paragraphTokens.push(openP);
}
const lines = paragraph.split("\n");
let previous;
for (const line of lines) {
const singleLine = tokenizeLine(line, previous);
if (singleLine) {
if (singleLine !== previous) {
openP.content.push(singleLine);
}
previous = singleLine;
}
}
}
return blockTokens.filter((b) => !b.parent);
};
// const __tokenize = (md: string) => {
// const tokens: (Token)[] = [];
// // md = md.replace(/(?<=[a-z])\n(?=[a-z])/g, " ");
// const lines = md.split("\n");
// let preserveEmpty = false;
// let multilineLines;
// let tokenSettings;
// for (let line of lines) {
// if (!line && !preserveEmpty) continue;
// let foundLine = false;
// if (!multilineLines) {
// token:
// for (const token of multilineTokens) {
// if (!token.rx.test(line)) continue token;
// tokenSettings = token;
// multilineLines = token.create(tokens);
// preserveEmpty = true;
// foundLine = true;
// multilineLines.push({
// type: "text",
// line: token.replace(line),
// });
// }
// } else {
// foundLine = true;
// if (tokenSettings?.closeRx?.test(line) || tokenSettings?.rx.test(line)) {
// tokenSettings = undefined;
// multilineLines = undefined;
// preserveEmpty = false;
// } else {
// multilineLines.push({
// type: "text",
// line,
// });
// }
// }
// if (!multilineLines) {
// token:
// for (const token of singleLineTokens) {
// if (!token.rx.test(line)) continue token;
// foundLine = true;
// line = line.replace(token.replaceRx, "").trim();
// const lineContent = tokenizeInline(line);
// token.create(lineContent, tokens);
// }
// }
// if (foundLine) continue;
// tokens.push({
// type: "text",
// line: tokenizeInline(line),
// });
// }
// return tokens;
// };
const tokenizeLine = (
line: string,
previous?: SingleLineToken,
): SingleLineToken => {
for (const token of singleLineTokens) {
if (!token.rx.test(line)) continue;
const t = token.create(line);
if (t.type === "h2") {
}
t.line = tokenizeInline(line.replace(token.replaceRx, ""));
return t;
}
if (previous?.mends) {
previous.raw += " " + line;
previous.line = tokenizeInline(previous.raw.replace(previous.cfg!.rx, ""));
return previous;
}
return {
line: tokenizeInline(line),
type: "text",
raw: line,
};
};
const tokenizeInline = (line: string) => {
line = line.trim();
const originalLine = line;
const insertMarker = "\u{03A9}";
const tokens: InlineTokenInsert[] = [];
for (const token of inlineTokens) {
token.rx.lastIndex = 0;
let match;
while ((match = token.rx.exec(line)) !== null) {
const tokenStart = match.index;
const tokenEnd = match.index + match[0].length;
token.create(match, tokenStart, tokenEnd, tokens);
}
}
if (tokens.length) {
for (const insert of tokens) {
line = line.slice(0, insert.start) +
"".padStart(insert.end - insert.start, insertMarker) +
line.slice(insert.end, line.length);
}
return zipArrays(
line.split(new RegExp(insertMarker + "{2,}")).map((t): InlineToken => ({
content: t,
type: "text",
})),
tokens,
).filter((t) => t.content);
}
return originalLine;
};