226 lines
5.5 KiB
TypeScript
226 lines
5.5 KiB
TypeScript
import { zipArrays } from "../zip";
|
|
import { inlineTokens } from "./inlineTokens";
|
|
import { singleLineTokens } from "./singleLineTokens";
|
|
import { tokenizeBlock } from "./tokenizeBlock";
|
|
import { tokenizeParagraph } from "./tokenizeParagraph";
|
|
|
|
export const createElements = (body: string) => {
|
|
const tokens = tokenize(body);
|
|
|
|
return tokens;
|
|
};
|
|
|
|
const tokenize = (body: string) => {
|
|
const paragraphs = body.split("\n\n");
|
|
|
|
const blockTokens: BlockToken[] = [];
|
|
const paragraphTokens: ParagraphToken[] = [];
|
|
|
|
for (const paragraph of paragraphs) {
|
|
const block = tokenizeBlock(paragraph);
|
|
let openBT = blockTokens.findLast((bt) => !bt.closed);
|
|
if (block) {
|
|
if (typeof block === "string") {
|
|
if (openBT) {
|
|
openBT.closed = true;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (openBT) {
|
|
openBT.children.push(block);
|
|
block.parent = openBT.type;
|
|
}
|
|
blockTokens.push(block);
|
|
continue;
|
|
}
|
|
|
|
if (!openBT) {
|
|
openBT = {
|
|
children: [],
|
|
closed: false,
|
|
metadata: {},
|
|
type: "block",
|
|
};
|
|
blockTokens.push(openBT);
|
|
}
|
|
|
|
const multiline = tokenizeParagraph(paragraph);
|
|
let openP = paragraphTokens.findLast((p) => !p.closed);
|
|
if (multiline) {
|
|
if (Array.isArray(multiline)) {
|
|
if (openP) {
|
|
openP.closed = true;
|
|
openP.content = openP.content.concat(multiline);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
openBT.children.push(multiline);
|
|
paragraphTokens.push(multiline);
|
|
continue;
|
|
} else if (openP && !openP?.allowsInline) {
|
|
openP.content.push({
|
|
line: paragraph,
|
|
raw: paragraph,
|
|
type: "text",
|
|
});
|
|
}
|
|
|
|
// I don't think the closed check is necessary, but just in case
|
|
// if (openP && !openP.closed && !openP.allowsInline) continue;
|
|
if (!openP) {
|
|
openP = {
|
|
allowsInline: true,
|
|
closed: true,
|
|
content: [],
|
|
metadata: {},
|
|
type: "p",
|
|
};
|
|
openBT.children.push(openP);
|
|
paragraphTokens.push(openP);
|
|
}
|
|
|
|
const lines = paragraph.split("\n");
|
|
let previous;
|
|
for (const line of lines) {
|
|
const singleLine = tokenizeLine(line, previous);
|
|
|
|
if (singleLine) {
|
|
if (singleLine !== previous) {
|
|
openP.content.push(singleLine);
|
|
}
|
|
previous = singleLine;
|
|
}
|
|
}
|
|
}
|
|
|
|
return blockTokens.filter((b) => !b.parent);
|
|
};
|
|
|
|
// const __tokenize = (md: string) => {
|
|
// const tokens: (Token)[] = [];
|
|
// // md = md.replace(/(?<=[a-z])\n(?=[a-z])/g, " ");
|
|
// const lines = md.split("\n");
|
|
// let preserveEmpty = false;
|
|
// let multilineLines;
|
|
// let tokenSettings;
|
|
|
|
// for (let line of lines) {
|
|
// if (!line && !preserveEmpty) continue;
|
|
// let foundLine = false;
|
|
|
|
// if (!multilineLines) {
|
|
// token:
|
|
// for (const token of multilineTokens) {
|
|
// if (!token.rx.test(line)) continue token;
|
|
// tokenSettings = token;
|
|
// multilineLines = token.create(tokens);
|
|
// preserveEmpty = true;
|
|
// foundLine = true;
|
|
// multilineLines.push({
|
|
// type: "text",
|
|
// line: token.replace(line),
|
|
// });
|
|
// }
|
|
// } else {
|
|
// foundLine = true;
|
|
// if (tokenSettings?.closeRx?.test(line) || tokenSettings?.rx.test(line)) {
|
|
// tokenSettings = undefined;
|
|
// multilineLines = undefined;
|
|
// preserveEmpty = false;
|
|
// } else {
|
|
// multilineLines.push({
|
|
// type: "text",
|
|
// line,
|
|
// });
|
|
// }
|
|
// }
|
|
|
|
// if (!multilineLines) {
|
|
// token:
|
|
// for (const token of singleLineTokens) {
|
|
// if (!token.rx.test(line)) continue token;
|
|
// foundLine = true;
|
|
// line = line.replace(token.replaceRx, "").trim();
|
|
|
|
// const lineContent = tokenizeInline(line);
|
|
// token.create(lineContent, tokens);
|
|
// }
|
|
// }
|
|
|
|
// if (foundLine) continue;
|
|
|
|
// tokens.push({
|
|
// type: "text",
|
|
// line: tokenizeInline(line),
|
|
// });
|
|
// }
|
|
|
|
// return tokens;
|
|
// };
|
|
|
|
const tokenizeLine = (
|
|
line: string,
|
|
previous?: SingleLineToken,
|
|
): SingleLineToken => {
|
|
for (const token of singleLineTokens) {
|
|
if (!token.rx.test(line)) continue;
|
|
|
|
const t = token.create(line);
|
|
|
|
if (t.type === "h2") {
|
|
}
|
|
|
|
t.line = tokenizeInline(line.replace(token.replaceRx, ""));
|
|
return t;
|
|
}
|
|
|
|
if (previous?.mends) {
|
|
previous.raw += " " + line;
|
|
previous.line = tokenizeInline(previous.raw.replace(previous.cfg!.rx, ""));
|
|
return previous;
|
|
}
|
|
|
|
return {
|
|
line: tokenizeInline(line),
|
|
type: "text",
|
|
raw: line,
|
|
};
|
|
};
|
|
|
|
const tokenizeInline = (line: string) => {
|
|
line = line.trim();
|
|
const originalLine = line;
|
|
const insertMarker = "\u{03A9}";
|
|
const tokens: InlineTokenInsert[] = [];
|
|
|
|
for (const token of inlineTokens) {
|
|
token.rx.lastIndex = 0;
|
|
let match;
|
|
while ((match = token.rx.exec(line)) !== null) {
|
|
const tokenStart = match.index;
|
|
const tokenEnd = match.index + match[0].length;
|
|
|
|
token.create(match, tokenStart, tokenEnd, tokens);
|
|
}
|
|
}
|
|
|
|
if (tokens.length) {
|
|
for (const insert of tokens) {
|
|
line = line.slice(0, insert.start) +
|
|
"".padStart(insert.end - insert.start, insertMarker) +
|
|
line.slice(insert.end, line.length);
|
|
}
|
|
|
|
return zipArrays(
|
|
line.split(new RegExp(insertMarker + "{2,}")).map((t): InlineToken => ({
|
|
content: t,
|
|
type: "text",
|
|
})),
|
|
tokens,
|
|
).filter((t) => t.content);
|
|
}
|
|
return originalLine;
|
|
};
|