marked/src/Tokenizer.js
2020-05-03 15:19:53 -04:00

633 lines
15 KiB
JavaScript

const { defaults } = require('./defaults.js');
const {
rtrim,
splitCells,
escape,
findClosingBracket
} = require('./helpers.js');
function outputLink(cap, link, raw) {
const href = link.href;
const title = link.title ? escape(link.title) : null;
if (cap[0].charAt(0) !== '!') {
return {
type: 'link',
raw,
href,
title,
text: cap[1]
};
} else {
return {
type: 'image',
raw,
text: escape(cap[1]),
href,
title
};
}
}
function indentCodeCompensation(raw, text) {
const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
if (matchIndentToCode === null) {
return text;
}
const indentToCode = matchIndentToCode[1];
return text
.split('\n')
.map(node => {
const matchIndentInNode = node.match(/^\s+/);
if (matchIndentInNode === null) {
return node;
}
const [indentInNode] = matchIndentInNode;
if (indentInNode.length >= indentToCode.length) {
return node.slice(indentToCode.length);
}
return node;
})
.join('\n');
}
/**
* Tokenizer
*/
module.exports = class Tokenizer {
constructor(options) {
this.options = options || defaults;
}
space(src) {
const cap = this.rules.block.newline.exec(src);
if (cap) {
if (cap[0].length > 1) {
return {
type: 'space',
raw: cap[0]
};
}
return { raw: '\n' };
}
}
code(src, tokens) {
const cap = this.rules.block.code.exec(src);
if (cap) {
const lastToken = tokens[tokens.length - 1];
// An indented code block cannot interrupt a paragraph.
if (lastToken && lastToken.type === 'paragraph') {
return {
raw: cap[0],
text: cap[0].trimRight()
};
}
const text = cap[0].replace(/^ {4}/gm, '');
return {
type: 'code',
raw: cap[0],
codeBlockStyle: 'indented',
text: !this.options.pedantic
? rtrim(text, '\n')
: text
};
}
}
fences(src) {
const cap = this.rules.block.fences.exec(src);
if (cap) {
const raw = cap[0];
const text = indentCodeCompensation(raw, cap[3] || '');
return {
type: 'code',
raw,
lang: cap[2] ? cap[2].trim() : cap[2],
text
};
}
}
heading(src) {
const cap = this.rules.block.heading.exec(src);
if (cap) {
return {
type: 'heading',
raw: cap[0],
depth: cap[1].length,
text: cap[2]
};
}
}
nptable(src) {
const cap = this.rules.block.nptable.exec(src);
if (cap) {
const item = {
type: 'table',
header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [],
raw: cap[0]
};
if (item.header.length === item.align.length) {
let l = item.align.length;
let i;
for (i = 0; i < l; i++) {
if (/^ *-+: *$/.test(item.align[i])) {
item.align[i] = 'right';
} else if (/^ *:-+: *$/.test(item.align[i])) {
item.align[i] = 'center';
} else if (/^ *:-+ *$/.test(item.align[i])) {
item.align[i] = 'left';
} else {
item.align[i] = null;
}
}
l = item.cells.length;
for (i = 0; i < l; i++) {
item.cells[i] = splitCells(item.cells[i], item.header.length);
}
return item;
}
}
}
hr(src) {
const cap = this.rules.block.hr.exec(src);
if (cap) {
return {
type: 'hr',
raw: cap[0]
};
}
}
blockquote(src) {
const cap = this.rules.block.blockquote.exec(src);
if (cap) {
const text = cap[0].replace(/^ *> ?/gm, '');
return {
type: 'blockquote',
raw: cap[0],
text
};
}
}
list(src) {
const cap = this.rules.block.list.exec(src);
if (cap) {
let raw = cap[0];
const bull = cap[2];
const isordered = bull.length > 1;
const list = {
type: 'list',
raw,
ordered: isordered,
start: isordered ? +bull : '',
loose: false,
items: []
};
// Get each top-level item.
const itemMatch = cap[0].match(this.rules.block.item);
let next = false,
item,
space,
b,
addBack,
loose,
istask,
ischecked;
const l = itemMatch.length;
for (let i = 0; i < l; i++) {
item = itemMatch[i];
raw = item;
// Remove the list item's bullet
// so it is seen as the next token.
space = item.length;
item = item.replace(/^ *([*+-]|\d+\.) */, '');
// Outdent whatever the
// list item contains. Hacky.
if (~item.indexOf('\n ')) {
space -= item.length;
item = !this.options.pedantic
? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
: item.replace(/^ {1,4}/gm, '');
}
// Determine whether the next list item belongs here.
// Backpedal if it does not belong in this list.
if (i !== l - 1) {
b = this.rules.block.bullet.exec(itemMatch[i + 1])[0];
if (bull.length > 1 ? b.length === 1
: (b.length > 1 || (this.options.smartLists && b !== bull))) {
addBack = itemMatch.slice(i + 1).join('\n');
list.raw = list.raw.substring(0, list.raw.length - addBack.length);
i = l - 1;
}
}
// Determine whether item is loose or not.
// Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/
// for discount behavior.
loose = next || /\n\n(?!\s*$)/.test(item);
if (i !== l - 1) {
next = item.charAt(item.length - 1) === '\n';
if (!loose) loose = next;
}
if (loose) {
list.loose = true;
}
// Check for task list items
istask = /^\[[ xX]\] /.test(item);
ischecked = undefined;
if (istask) {
ischecked = item[1] !== ' ';
item = item.replace(/^\[[ xX]\] +/, '');
}
list.items.push({
raw,
task: istask,
checked: ischecked,
loose: loose,
text: item
});
}
return list;
}
}
html(src) {
const cap = this.rules.block.html.exec(src);
if (cap) {
return {
type: this.options.sanitize
? 'paragraph'
: 'html',
raw: cap[0],
pre: !this.options.sanitizer
&& (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]
};
}
}
def(src) {
const cap = this.rules.block.def.exec(src);
if (cap) {
if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1);
const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
return {
tag,
raw: cap[0],
href: cap[2],
title: cap[3]
};
}
}
table(src) {
const cap = this.rules.block.table.exec(src);
if (cap) {
const item = {
type: 'table',
header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')),
align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */),
cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : []
};
if (item.header.length === item.align.length) {
item.raw = cap[0];
let l = item.align.length;
let i;
for (i = 0; i < l; i++) {
if (/^ *-+: *$/.test(item.align[i])) {
item.align[i] = 'right';
} else if (/^ *:-+: *$/.test(item.align[i])) {
item.align[i] = 'center';
} else if (/^ *:-+ *$/.test(item.align[i])) {
item.align[i] = 'left';
} else {
item.align[i] = null;
}
}
l = item.cells.length;
for (i = 0; i < l; i++) {
item.cells[i] = splitCells(
item.cells[i].replace(/^ *\| *| *\| *$/g, ''),
item.header.length);
}
return item;
}
}
}
lheading(src) {
const cap = this.rules.block.lheading.exec(src);
if (cap) {
return {
type: 'heading',
raw: cap[0],
depth: cap[2].charAt(0) === '=' ? 1 : 2,
text: cap[1]
};
}
}
paragraph(src) {
const cap = this.rules.block.paragraph.exec(src);
if (cap) {
return {
type: 'paragraph',
raw: cap[0],
text: cap[1].charAt(cap[1].length - 1) === '\n'
? cap[1].slice(0, -1)
: cap[1]
};
}
}
text(src, tokens) {
const cap = this.rules.block.text.exec(src);
if (cap) {
const lastToken = tokens[tokens.length - 1];
if (lastToken && lastToken.type === 'text') {
return {
raw: cap[0],
text: cap[0]
};
}
return {
type: 'text',
raw: cap[0],
text: cap[0]
};
}
}
escape(src) {
const cap = this.rules.inline.escape.exec(src);
if (cap) {
return {
type: 'escape',
raw: cap[0],
text: escape(cap[1])
};
}
}
tag(src, inLink, inRawBlock) {
const cap = this.rules.inline.tag.exec(src);
if (cap) {
if (!inLink && /^<a /i.test(cap[0])) {
inLink = true;
} else if (inLink && /^<\/a>/i.test(cap[0])) {
inLink = false;
}
if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
inRawBlock = true;
} else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
inRawBlock = false;
}
return {
type: this.options.sanitize
? 'text'
: 'html',
raw: cap[0],
inLink,
inRawBlock,
text: this.options.sanitize
? (this.options.sanitizer
? this.options.sanitizer(cap[0])
: escape(cap[0]))
: cap[0]
};
}
}
link(src) {
const cap = this.rules.inline.link.exec(src);
if (cap) {
const lastParenIndex = findClosingBracket(cap[2], '()');
if (lastParenIndex > -1) {
const start = cap[0].indexOf('!') === 0 ? 5 : 4;
const linkLen = start + cap[1].length + lastParenIndex;
cap[2] = cap[2].substring(0, lastParenIndex);
cap[0] = cap[0].substring(0, linkLen).trim();
cap[3] = '';
}
let href = cap[2];
let title = '';
if (this.options.pedantic) {
const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
if (link) {
href = link[1];
title = link[3];
} else {
title = '';
}
} else {
title = cap[3] ? cap[3].slice(1, -1) : '';
}
href = href.trim().replace(/^<([\s\S]*)>$/, '$1');
const token = outputLink(cap, {
href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
title: title ? title.replace(this.rules.inline._escapes, '$1') : title
}, cap[0]);
return token;
}
}
reflink(src, links) {
let cap;
if ((cap = this.rules.inline.reflink.exec(src))
|| (cap = this.rules.inline.nolink.exec(src))) {
let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
link = links[link.toLowerCase()];
if (!link || !link.href) {
const text = cap[0].charAt(0);
return {
type: 'text',
raw: text,
text
};
}
const token = outputLink(cap, link, cap[0]);
return token;
}
}
strong(src) {
const cap = this.rules.inline.strong.exec(src);
if (cap) {
return {
type: 'strong',
raw: cap[0],
text: cap[4] || cap[3] || cap[2] || cap[1]
};
}
}
em(src) {
const cap = this.rules.inline.em.exec(src);
if (cap) {
return {
type: 'em',
raw: cap[0],
text: cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1]
};
}
}
codespan(src) {
const cap = this.rules.inline.code.exec(src);
if (cap) {
let text = cap[2].replace(/\n/g, ' ');
const hasNonSpaceChars = /[^ ]/.test(text);
const hasSpaceCharsOnBothEnds = text.startsWith(' ') && text.endsWith(' ');
if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
text = text.substring(1, text.length - 1);
}
text = escape(text, true);
return {
type: 'codespan',
raw: cap[0],
text
};
}
}
br(src) {
const cap = this.rules.inline.br.exec(src);
if (cap) {
return {
type: 'br',
raw: cap[0]
};
}
}
del(src) {
const cap = this.rules.inline.del.exec(src);
if (cap) {
return {
type: 'del',
raw: cap[0],
text: cap[1]
};
}
}
autolink(src, mangle) {
const cap = this.rules.inline.autolink.exec(src);
if (cap) {
let text, href;
if (cap[2] === '@') {
text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]);
href = 'mailto:' + text;
} else {
text = escape(cap[1]);
href = text;
}
return {
type: 'link',
raw: cap[0],
text,
href,
tokens: [
{
type: 'text',
raw: text,
text
}
]
};
}
}
url(src, mangle) {
let cap;
if (cap = this.rules.inline.url.exec(src)) {
let text, href;
if (cap[2] === '@') {
text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]);
href = 'mailto:' + text;
} else {
// do extended autolink path validation
let prevCapZero;
do {
prevCapZero = cap[0];
cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
} while (prevCapZero !== cap[0]);
text = escape(cap[0]);
if (cap[1] === 'www.') {
href = 'http://' + text;
} else {
href = text;
}
}
return {
type: 'link',
raw: cap[0],
text,
href,
tokens: [
{
type: 'text',
raw: text,
text
}
]
};
}
}
inlineText(src, inRawBlock, smartypants) {
const cap = this.rules.inline.text.exec(src);
if (cap) {
let text;
if (inRawBlock) {
text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0];
} else {
text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]);
}
return {
type: 'text',
raw: cap[0],
text
};
}
}
};