marked/src/Lexer.ts
Tony Brix a990c54e0c
fix: fix more types (#2893)
Co-authored-by: Steven <steven@ceriously.com>
2023-08-19 16:55:56 -06:00

517 lines
14 KiB
TypeScript

import { _Tokenizer } from './Tokenizer.ts';
import { _defaults } from './defaults.ts';
import { block, inline } from './rules.ts';
import type { Token, TokensList, Tokens } from './Tokens.ts';
import type { MarkedOptions, TokenizerExtension } from './MarkedOptions.ts';
import type { Rules } from './rules.ts';
/**
* smartypants text replacement
*/
function smartypants(text: string) {
return text
// em-dashes
.replace(/---/g, '\u2014')
// en-dashes
.replace(/--/g, '\u2013')
// opening singles
.replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
// closing singles & apostrophes
.replace(/'/g, '\u2019')
// opening doubles
.replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
// closing doubles
.replace(/"/g, '\u201d')
// ellipses
.replace(/\.{3}/g, '\u2026');
}
/**
* mangle email addresses
*/
function mangle(text: string) {
let out = '';
for (let i = 0; i < text.length; i++) {
const ch = Math.random() > 0.5
? 'x' + text.charCodeAt(i).toString(16)
: text.charCodeAt(i).toString();
out += '&#' + ch + ';';
}
return out;
}
/**
* Block Lexer
*/
export class _Lexer {
tokens: TokensList;
options: MarkedOptions;
state: {
inLink: boolean;
inRawBlock: boolean;
top: boolean;
};
private tokenizer: _Tokenizer;
private inlineQueue: {src: string, tokens: Token[]}[];
constructor(options?: MarkedOptions) {
// TokenList cannot be created in one go
// @ts-expect-error
this.tokens = [];
this.tokens.links = Object.create(null);
this.options = options || _defaults;
this.options.tokenizer = this.options.tokenizer || new _Tokenizer();
this.tokenizer = this.options.tokenizer;
this.tokenizer.options = this.options;
this.tokenizer.lexer = this;
this.inlineQueue = [];
this.state = {
inLink: false,
inRawBlock: false,
top: true
};
const rules = {
block: block.normal,
inline: inline.normal
};
if (this.options.pedantic) {
rules.block = block.pedantic;
rules.inline = inline.pedantic;
} else if (this.options.gfm) {
rules.block = block.gfm;
if (this.options.breaks) {
rules.inline = inline.breaks;
} else {
rules.inline = inline.gfm;
}
}
this.tokenizer.rules = rules;
}
/**
* Expose Rules
*/
static get rules(): Rules {
return {
block,
inline
};
}
/**
* Static Lex Method
*/
static lex(src: string, options?: MarkedOptions) {
const lexer = new _Lexer(options);
return lexer.lex(src);
}
/**
* Static Lex Inline Method
*/
static lexInline(src: string, options?: MarkedOptions) {
const lexer = new _Lexer(options);
return lexer.inlineTokens(src);
}
/**
* Preprocessing
*/
lex(src: string) {
src = src
.replace(/\r\n|\r/g, '\n');
this.blockTokens(src, this.tokens);
let next;
while (next = this.inlineQueue.shift()) {
this.inlineTokens(next.src, next.tokens);
}
return this.tokens;
}
/**
* Lexing
*/
blockTokens(src: string, tokens?: Token[]): Token[];
blockTokens(src: string, tokens?: TokensList): TokensList;
blockTokens(src: string, tokens: Token[] = []) {
if (this.options.pedantic) {
src = src.replace(/\t/g, ' ').replace(/^ +$/gm, '');
} else {
src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => {
return leading + ' '.repeat(tabs.length);
});
}
let token: Tokens.Generic | undefined;
let lastToken;
let cutSrc;
let lastParagraphClipped;
while (src) {
if (this.options.extensions
&& this.options.extensions.block
&& this.options.extensions.block.some((extTokenizer: TokenizerExtension['tokenizer']) => {
if (token = extTokenizer.call({ lexer: this }, src, tokens)) {
src = src.substring(token.raw.length);
tokens.push(token);
return true;
}
return false;
})) {
continue;
}
// newline
if (token = this.tokenizer.space(src)) {
src = src.substring(token.raw.length);
if (token.raw.length === 1 && tokens.length > 0) {
// if there's a single \n as a spacer, it's terminating the last line,
// so move it there so that we don't get unecessary paragraph tags
tokens[tokens.length - 1].raw += '\n';
} else {
tokens.push(token);
}
continue;
}
// code
if (token = this.tokenizer.code(src)) {
src = src.substring(token.raw.length);
lastToken = tokens[tokens.length - 1];
// An indented code block cannot interrupt a paragraph.
if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
} else {
tokens.push(token);
}
continue;
}
// fences
if (token = this.tokenizer.fences(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// heading
if (token = this.tokenizer.heading(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// hr
if (token = this.tokenizer.hr(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// blockquote
if (token = this.tokenizer.blockquote(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// list
if (token = this.tokenizer.list(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// html
if (token = this.tokenizer.html(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// def
if (token = this.tokenizer.def(src)) {
src = src.substring(token.raw.length);
lastToken = tokens[tokens.length - 1];
if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.raw;
this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
} else if (!this.tokens.links[token.tag]) {
this.tokens.links[token.tag] = {
href: token.href,
title: token.title
};
}
continue;
}
// table (gfm)
if (token = this.tokenizer.table(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// lheading
if (token = this.tokenizer.lheading(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// top-level paragraph
// prevent paragraph consuming extensions by clipping 'src' to extension start
cutSrc = src;
if (this.options.extensions && this.options.extensions.startBlock) {
let startIndex = Infinity;
const tempSrc = src.slice(1);
let tempStart;
this.options.extensions.startBlock.forEach((getStartIndex) => {
tempStart = getStartIndex.call({ lexer: this }, tempSrc);
if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); }
});
if (startIndex < Infinity && startIndex >= 0) {
cutSrc = src.substring(0, startIndex + 1);
}
}
if (this.state.top && (token = this.tokenizer.paragraph(cutSrc))) {
lastToken = tokens[tokens.length - 1];
if (lastParagraphClipped && lastToken.type === 'paragraph') {
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
this.inlineQueue.pop();
this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
} else {
tokens.push(token);
}
lastParagraphClipped = (cutSrc.length !== src.length);
src = src.substring(token.raw.length);
continue;
}
// text
if (token = this.tokenizer.text(src)) {
src = src.substring(token.raw.length);
lastToken = tokens[tokens.length - 1];
if (lastToken && lastToken.type === 'text') {
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
this.inlineQueue.pop();
this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
} else {
tokens.push(token);
}
continue;
}
if (src) {
const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
if (this.options.silent) {
console.error(errMsg);
break;
} else {
throw new Error(errMsg);
}
}
}
this.state.top = true;
return tokens;
}
inline(src: string, tokens: Token[] = []) {
this.inlineQueue.push({ src, tokens });
return tokens;
}
/**
* Lexing/Compiling
*/
inlineTokens(src: string, tokens: Token[] = []): Token[] {
let token, lastToken, cutSrc;
// String with links masked to avoid interference with em and strong
let maskedSrc = src;
let match;
let keepPrevChar, prevChar;
// Mask out reflinks
if (this.tokens.links) {
const links = Object.keys(this.tokens.links);
if (links.length > 0) {
while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) {
if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) {
maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex);
}
}
}
}
// Mask out other blocks
while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) {
maskedSrc = maskedSrc.slice(0, match.index) + '[' + 'a'.repeat(match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex);
}
// Mask out escaped characters
while ((match = this.tokenizer.rules.inline.anyPunctuation.exec(maskedSrc)) != null) {
maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.anyPunctuation.lastIndex);
}
while (src) {
if (!keepPrevChar) {
prevChar = '';
}
keepPrevChar = false;
// extensions
if (this.options.extensions
&& this.options.extensions.inline
&& this.options.extensions.inline.some((extTokenizer) => {
if (token = extTokenizer.call({ lexer: this }, src, tokens)) {
src = src.substring(token.raw.length);
tokens.push(token);
return true;
}
return false;
})) {
continue;
}
// escape
if (token = this.tokenizer.escape(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// tag
if (token = this.tokenizer.tag(src)) {
src = src.substring(token.raw.length);
lastToken = tokens[tokens.length - 1];
if (lastToken && token.type === 'text' && lastToken.type === 'text') {
lastToken.raw += token.raw;
lastToken.text += token.text;
} else {
tokens.push(token);
}
continue;
}
// link
if (token = this.tokenizer.link(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// reflink, nolink
if (token = this.tokenizer.reflink(src, this.tokens.links)) {
src = src.substring(token.raw.length);
lastToken = tokens[tokens.length - 1];
if (lastToken && token.type === 'text' && lastToken.type === 'text') {
lastToken.raw += token.raw;
lastToken.text += token.text;
} else {
tokens.push(token);
}
continue;
}
// em & strong
if (token = this.tokenizer.emStrong(src, maskedSrc, prevChar)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// code
if (token = this.tokenizer.codespan(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// br
if (token = this.tokenizer.br(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// del (gfm)
if (token = this.tokenizer.del(src)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// autolink
if (token = this.tokenizer.autolink(src, mangle)) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// url (gfm)
if (!this.state.inLink && (token = this.tokenizer.url(src, mangle))) {
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
// text
// prevent inlineText consuming extensions by clipping 'src' to extension start
cutSrc = src;
if (this.options.extensions && this.options.extensions.startInline) {
let startIndex = Infinity;
const tempSrc = src.slice(1);
let tempStart;
this.options.extensions.startInline.forEach((getStartIndex) => {
tempStart = getStartIndex.call({ lexer: this }, tempSrc);
if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); }
});
if (startIndex < Infinity && startIndex >= 0) {
cutSrc = src.substring(0, startIndex + 1);
}
}
if (token = this.tokenizer.inlineText(cutSrc, smartypants)) {
src = src.substring(token.raw.length);
if (token.raw.slice(-1) !== '_') { // Track prevChar before string of ____ started
prevChar = token.raw.slice(-1);
}
keepPrevChar = true;
lastToken = tokens[tokens.length - 1];
if (lastToken && lastToken.type === 'text') {
lastToken.raw += token.raw;
lastToken.text += token.text;
} else {
tokens.push(token);
}
continue;
}
if (src) {
const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
if (this.options.silent) {
console.error(errMsg);
break;
} else {
throw new Error(errMsg);
}
}
}
return tokens;
}
}