marked/src/Lexer.js

435 lines
10 KiB
JavaScript
Raw Normal View History

2020-04-06 23:25:33 -05:00
const Tokenizer = require('./Tokenizer.js');
2019-11-06 11:11:06 -06:00
const { defaults } = require('./defaults.js');
const { block, inline } = require('./rules.js');
/**
* smartypants text replacement
*/
function smartypants(text) {
return text
// em-dashes
.replace(/---/g, '\u2014')
// en-dashes
.replace(/--/g, '\u2013')
// opening singles
.replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018')
// closing singles & apostrophes
.replace(/'/g, '\u2019')
// opening doubles
.replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c')
// closing doubles
.replace(/"/g, '\u201d')
// ellipses
.replace(/\.{3}/g, '\u2026');
}
/**
* mangle email addresses
*/
function mangle(text) {
let out = '',
i,
ch;
const l = text.length;
for (i = 0; i < l; i++) {
ch = text.charCodeAt(i);
if (Math.random() > 0.5) {
ch = 'x' + ch.toString(16);
}
out += '&#' + ch + ';';
}
return out;
}
/**
* Block Lexer
*/
2019-11-06 11:11:06 -06:00
module.exports = class Lexer {
constructor(options) {
this.tokens = [];
this.tokens.links = Object.create(null);
this.options = options || defaults;
2020-04-06 23:25:33 -05:00
this.options.tokenizer = this.options.tokenizer || new Tokenizer();
this.tokenizer = this.options.tokenizer;
this.tokenizer.options = this.options;
const rules = {
block: block.normal,
inline: inline.normal
};
if (this.options.pedantic) {
rules.block = block.pedantic;
rules.inline = inline.pedantic;
} else if (this.options.gfm) {
rules.block = block.gfm;
if (this.options.breaks) {
rules.inline = inline.breaks;
} else {
rules.inline = inline.gfm;
}
}
this.tokenizer.rules = rules;
2019-11-06 11:11:06 -06:00
}
2019-11-06 11:11:06 -06:00
/**
* Expose Rules
2019-11-06 11:11:06 -06:00
*/
static get rules() {
return {
block,
inline
};
}
2019-11-06 11:11:06 -06:00
/**
* Static Lex Method
*/
static lex(src, options) {
const lexer = new Lexer(options);
return lexer.lex(src);
2020-04-02 00:23:29 -05:00
}
2019-11-06 11:11:06 -06:00
/**
* Preprocessing
*/
lex(src) {
src = src
.replace(/\r\n|\r/g, '\n')
.replace(/\t/g, ' ');
2020-04-06 23:25:33 -05:00
this.blockTokens(src, this.tokens, true);
2020-04-02 00:23:29 -05:00
2020-04-06 22:28:47 -05:00
this.inline(this.tokens);
2020-04-02 00:23:29 -05:00
return this.tokens;
}
2019-11-06 11:11:06 -06:00
/**
* Lexing
*/
2020-04-06 23:25:33 -05:00
blockTokens(src, tokens = [], top = true) {
2019-11-06 11:11:06 -06:00
src = src.replace(/^ +$/gm, '');
2020-04-23 23:00:10 -05:00
let token, i, l, lastToken;
2019-11-06 11:11:06 -06:00
while (src) {
// newline
if (token = this.tokenizer.space(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
if (token.type) {
tokens.push(token);
2019-11-06 11:11:06 -06:00
}
2020-04-06 23:25:33 -05:00
continue;
2019-11-06 11:11:06 -06:00
}
2019-11-06 11:11:06 -06:00
// code
if (token = this.tokenizer.code(src, tokens)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
2020-04-23 23:00:10 -05:00
if (token.type) {
tokens.push(token);
} else {
lastToken = tokens[tokens.length - 1];
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
}
2019-11-06 11:11:06 -06:00
continue;
}
2019-11-06 11:11:06 -06:00
// fences
if (token = this.tokenizer.fences(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
tokens.push(token);
2019-11-06 11:11:06 -06:00
continue;
}
2019-11-06 11:11:06 -06:00
// heading
if (token = this.tokenizer.heading(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
tokens.push(token);
2019-11-06 11:11:06 -06:00
continue;
}
2019-11-06 11:11:06 -06:00
// table no leading pipe (gfm)
if (token = this.tokenizer.nptable(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
2019-11-06 11:11:06 -06:00
// hr
if (token = this.tokenizer.hr(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
tokens.push(token);
2019-11-06 11:11:06 -06:00
continue;
}
2019-11-06 11:11:06 -06:00
// blockquote
if (token = this.tokenizer.blockquote(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
token.tokens = this.blockTokens(token.text, [], top);
2020-04-06 23:25:33 -05:00
tokens.push(token);
continue;
}
2019-11-06 11:11:06 -06:00
// list
if (token = this.tokenizer.list(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
l = token.items.length;
for (i = 0; i < l; i++) {
token.items[i].tokens = this.blockTokens(token.items[i].text, [], false);
}
2020-04-06 23:25:33 -05:00
tokens.push(token);
2019-11-06 11:11:06 -06:00
continue;
}
2019-11-06 11:11:06 -06:00
// html
if (token = this.tokenizer.html(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
tokens.push(token);
2019-11-06 11:11:06 -06:00
continue;
}
2019-11-06 11:11:06 -06:00
// def
if (top && (token = this.tokenizer.def(src))) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
if (!this.tokens.links[token.tag]) {
this.tokens.links[token.tag] = {
href: token.href,
title: token.title
2019-11-06 11:11:06 -06:00
};
}
2019-11-06 11:11:06 -06:00
continue;
}
2019-11-06 11:11:06 -06:00
// table (gfm)
if (token = this.tokenizer.table(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
tokens.push(token);
continue;
2019-11-06 11:11:06 -06:00
}
2019-11-06 11:11:06 -06:00
// lheading
if (token = this.tokenizer.lheading(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
tokens.push(token);
continue;
}
2019-11-06 11:11:06 -06:00
// top-level paragraph
if (top && (token = this.tokenizer.paragraph(src))) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
tokens.push(token);
2019-11-06 11:11:06 -06:00
continue;
}
2019-11-06 11:11:06 -06:00
// text
2020-04-23 23:00:10 -05:00
if (token = this.tokenizer.text(src, tokens)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
2020-04-23 23:00:10 -05:00
if (token.type) {
tokens.push(token);
} else {
lastToken = tokens[tokens.length - 1];
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
}
2019-11-06 11:11:06 -06:00
continue;
}
2019-11-06 11:11:06 -06:00
if (src) {
2020-04-02 00:23:29 -05:00
const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
if (this.options.silent) {
console.error(errMsg);
2020-04-06 23:25:33 -05:00
break;
2020-04-02 00:23:29 -05:00
} else {
throw new Error(errMsg);
}
2019-11-06 11:11:06 -06:00
}
}
2020-04-02 00:23:29 -05:00
return tokens;
}
2020-04-06 22:28:47 -05:00
inline(tokens) {
2020-04-02 00:23:29 -05:00
let i,
j,
k,
l2,
row,
token;
const l = tokens.length;
for (i = 0; i < l; i++) {
token = tokens[i];
switch (token.type) {
case 'paragraph':
case 'text':
case 'heading': {
token.tokens = [];
2020-04-06 22:28:47 -05:00
this.inlineTokens(token.text, token.tokens);
2020-04-02 00:23:29 -05:00
break;
}
case 'table': {
token.tokens = {
header: [],
cells: []
};
// header
l2 = token.header.length;
for (j = 0; j < l2; j++) {
token.tokens.header[j] = [];
2020-04-06 22:28:47 -05:00
this.inlineTokens(token.header[j], token.tokens.header[j]);
2020-04-02 00:23:29 -05:00
}
// cells
l2 = token.cells.length;
for (j = 0; j < l2; j++) {
row = token.cells[j];
token.tokens.cells[j] = [];
for (k = 0; k < row.length; k++) {
token.tokens.cells[j][k] = [];
2020-04-06 22:28:47 -05:00
this.inlineTokens(row[k], token.tokens.cells[j][k]);
2020-04-02 00:23:29 -05:00
}
}
break;
}
case 'blockquote': {
2020-04-06 22:28:47 -05:00
this.inline(token.tokens);
2020-04-02 00:23:29 -05:00
break;
}
case 'list': {
l2 = token.items.length;
for (j = 0; j < l2; j++) {
2020-04-06 22:28:47 -05:00
this.inline(token.items[j].tokens);
2020-04-02 00:23:29 -05:00
}
break;
}
default: {
// do nothing
}
}
}
return tokens;
}
/**
* Lexing/Compiling
*/
inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
2020-04-06 23:25:33 -05:00
let token;
2020-04-02 00:23:29 -05:00
while (src) {
// escape
if (token = this.tokenizer.escape(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
tokens.push(token);
2020-04-02 00:23:29 -05:00
continue;
}
// tag
if (token = this.tokenizer.tag(src, inLink, inRawBlock)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
inLink = token.inLink;
inRawBlock = token.inRawBlock;
2020-04-06 23:25:33 -05:00
tokens.push(token);
2020-04-02 00:23:29 -05:00
continue;
}
// link
if (token = this.tokenizer.link(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
if (token.type === 'link') {
token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
}
2020-04-06 23:25:33 -05:00
tokens.push(token);
2020-04-02 00:23:29 -05:00
continue;
}
// reflink, nolink
if (token = this.tokenizer.reflink(src, this.tokens.links)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
if (token.type === 'link') {
token.tokens = this.inlineTokens(token.text, [], true, inRawBlock);
}
2020-04-06 23:25:33 -05:00
tokens.push(token);
2020-04-02 00:23:29 -05:00
continue;
}
// strong
if (token = this.tokenizer.strong(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
2020-04-06 23:25:33 -05:00
tokens.push(token);
2020-04-02 00:23:29 -05:00
continue;
}
// em
if (token = this.tokenizer.em(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
2020-04-06 23:25:33 -05:00
tokens.push(token);
2020-04-02 00:23:29 -05:00
continue;
}
// code
if (token = this.tokenizer.codespan(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
tokens.push(token);
2020-04-02 00:23:29 -05:00
continue;
}
// br
if (token = this.tokenizer.br(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
tokens.push(token);
2020-04-02 00:23:29 -05:00
continue;
}
// del (gfm)
if (token = this.tokenizer.del(src)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
2020-04-06 23:25:33 -05:00
tokens.push(token);
2020-04-02 00:23:29 -05:00
continue;
}
// autolink
if (token = this.tokenizer.autolink(src, mangle)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
tokens.push(token);
2020-04-02 00:23:29 -05:00
continue;
}
// url (gfm)
if (!inLink && (token = this.tokenizer.url(src, mangle))) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
tokens.push(token);
2020-04-02 00:23:29 -05:00
continue;
}
// text
if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) {
2020-04-06 23:25:33 -05:00
src = src.substring(token.raw.length);
tokens.push(token);
2020-04-02 00:23:29 -05:00
continue;
}
if (src) {
const errMsg = 'Infinite loop on byte: ' + src.charCodeAt(0);
if (this.options.silent) {
console.error(errMsg);
2020-04-06 23:25:33 -05:00
break;
2020-04-02 00:23:29 -05:00
} else {
throw new Error(errMsg);
}
}
}
2020-04-06 23:25:33 -05:00
return tokens;
2020-04-02 00:23:29 -05:00
}
};