const { defaults } = require('./defaults.js'); const { rtrim, splitCells, escape, findClosingBracket } = require('./helpers.js'); const { block, inline } = require('./rules.js'); function outputLink(cap, link, tokens, raw, lexer) { const href = link.href; const title = link.title ? escape(link.title) : null; if (cap[0].charAt(0) !== '!') { return { type: 'link', raw, href, title, tokens: lexer.inlineTokens(cap[1]) }; } else { return { type: 'image', raw, text: escape(cap[1]), href, title }; } } /** * Tokenizer */ module.exports = class Tokenizer { constructor(options) { this.options = options || defaults; this.initialize(); } initialize() { this.inLink = false; this.inRawBlock = false; this.rules = { block: block.normal, inline: inline.normal }; if (this.options.pedantic) { this.rules.block = block.pedantic; this.rules.inline = inline.pedantic; } else if (this.options.gfm) { this.rules.block = block.gfm; if (this.options.breaks) { this.rules.inline = inline.breaks; } else { this.rules.inline = inline.gfm; } } } /** * Expose Block Rules */ static get rules() { return { block, inline }; } space(lexer, src, tokens, top) { const cap = this.rules.block.newline.exec(src); if (cap) { if (cap[0].length > 1) { return { type: 'space', raw: cap[0] }; } return { raw: '\n' }; } } code(lexer, src, tokens, top) { const cap = this.rules.block.code.exec(src); if (cap) { const lastToken = tokens[tokens.length - 1]; // An indented code block cannot interrupt a paragraph. if (lastToken && lastToken.type === 'paragraph') { tokens.pop(); lastToken.text += '\n' + cap[0].trimRight(); lastToken.raw += '\n' + cap[0]; return lastToken; } else { const text = cap[0].replace(/^ {4}/gm, ''); return { type: 'code', raw: cap[0], codeBlockStyle: 'indented', text: !this.options.pedantic ? rtrim(text, '\n') : text }; } } } fences(lexer, src, tokens, top) { const cap = this.rules.block.fences.exec(src); if (cap) { return { type: 'code', raw: cap[0], lang: cap[2] ? cap[2].trim() : cap[2], text: cap[3] || '' }; } } heading(lexer, src, tokens, top) { const cap = this.rules.block.heading.exec(src); if (cap) { return { type: 'heading', raw: cap[0], depth: cap[1].length, text: cap[2] }; } } nptable(lexer, src, tokens, top) { const cap = this.rules.block.nptable.exec(src); if (cap) { const item = { type: 'table', header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')), align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [], raw: cap[0] }; if (item.header.length === item.align.length) { let l = item.align.length; let i; for (i = 0; i < l; i++) { if (/^ *-+: *$/.test(item.align[i])) { item.align[i] = 'right'; } else if (/^ *:-+: *$/.test(item.align[i])) { item.align[i] = 'center'; } else if (/^ *:-+ *$/.test(item.align[i])) { item.align[i] = 'left'; } else { item.align[i] = null; } } l = item.cells.length; for (i = 0; i < l; i++) { item.cells[i] = splitCells(item.cells[i], item.header.length); } return item; } } } hr(lexer, src, tokens, top) { const cap = this.rules.block.hr.exec(src); if (cap) { return { type: 'hr', raw: cap[0] }; } } blockquote(lexer, src, tokens, top) { const cap = this.rules.block.blockquote.exec(src); if (cap) { const text = cap[0].replace(/^ *> ?/gm, ''); return { type: 'blockquote', raw: cap[0], tokens: lexer.blockTokens(text, [], top) }; } } list(lexer, src, tokens, top) { const cap = this.rules.block.list.exec(src); if (cap) { let raw = cap[0]; const bull = cap[2]; const isordered = bull.length > 1; const list = { type: 'list', raw, ordered: isordered, start: isordered ? +bull : '', loose: false, items: [] }; // Get each top-level item. const itemMatch = cap[0].match(this.rules.block.item); let next = false, item, space, b, addBack, loose, istask, ischecked; const l = itemMatch.length; for (let i = 0; i < l; i++) { item = itemMatch[i]; raw = item; // Remove the list item's bullet // so it is seen as the next token. space = item.length; item = item.replace(/^ *([*+-]|\d+\.) */, ''); // Outdent whatever the // list item contains. Hacky. if (~item.indexOf('\n ')) { space -= item.length; item = !this.options.pedantic ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '') : item.replace(/^ {1,4}/gm, ''); } // Determine whether the next list item belongs here. // Backpedal if it does not belong in this list. if (i !== l - 1) { b = this.rules.block.bullet.exec(itemMatch[i + 1])[0]; if (bull.length > 1 ? b.length === 1 : (b.length > 1 || (this.options.smartLists && b !== bull))) { addBack = itemMatch.slice(i + 1).join('\n'); list.raw = list.raw.substring(0, list.raw.length - addBack.length); i = l - 1; } } // Determine whether item is loose or not. // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/ // for discount behavior. loose = next || /\n\n(?!\s*$)/.test(item); if (i !== l - 1) { next = item.charAt(item.length - 1) === '\n'; if (!loose) loose = next; } if (loose) { list.loose = true; } // Check for task list items istask = /^\[[ xX]\] /.test(item); ischecked = undefined; if (istask) { ischecked = item[1] !== ' '; item = item.replace(/^\[[ xX]\] +/, ''); } list.items.push({ raw, task: istask, checked: ischecked, loose: loose, tokens: lexer.blockTokens(item, [], false) }); } return list; } } html(lexer, src, tokens, top) { const cap = this.rules.block.html.exec(src); if (cap) { return { type: this.options.sanitize ? 'paragraph' : 'html', raw: cap[0], pre: !this.options.sanitizer && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'), text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0] }; } } def(lexer, src, tokens, top) { const cap = this.rules.block.def.exec(src); if (cap) { if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1); const tag = cap[1].toLowerCase().replace(/\s+/g, ' '); return { tag, raw: cap[0], href: cap[2], title: cap[3] }; } } table(lexer, src, tokens, top) { const cap = this.rules.block.table.exec(src); if (cap) { const item = { type: 'table', header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')), align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [] }; if (item.header.length === item.align.length) { item.raw = cap[0]; let l = item.align.length; let i; for (i = 0; i < l; i++) { if (/^ *-+: *$/.test(item.align[i])) { item.align[i] = 'right'; } else if (/^ *:-+: *$/.test(item.align[i])) { item.align[i] = 'center'; } else if (/^ *:-+ *$/.test(item.align[i])) { item.align[i] = 'left'; } else { item.align[i] = null; } } l = item.cells.length; for (i = 0; i < l; i++) { item.cells[i] = splitCells( item.cells[i].replace(/^ *\| *| *\| *$/g, ''), item.header.length); } return item; } } } lheading(lexer, src, tokens, top) { const cap = this.rules.block.lheading.exec(src); if (cap) { return { type: 'heading', raw: cap[0], depth: cap[2].charAt(0) === '=' ? 1 : 2, text: cap[1] }; } } paragraph(lexer, src, tokens, top) { const cap = this.rules.block.paragraph.exec(src); if (cap) { return { type: 'paragraph', raw: cap[0], text: cap[1].charAt(cap[1].length - 1) === '\n' ? cap[1].slice(0, -1) : cap[1] }; } } text(lexer, src, tokens, top) { const cap = this.rules.block.text.exec(src); if (cap) { return { type: 'text', raw: cap[0], text: cap[0] }; } } escape(lexer, src, tokens, top) { const cap = this.rules.inline.escape.exec(src); if (cap) { return { type: 'escape', raw: cap[0], text: escape(cap[1]) }; } } tag(lexer, src, tokens, top) { const cap = this.rules.inline.tag.exec(src); if (cap) { if (!this.inLink && /^/i.test(cap[0])) { this.inLink = false; } if (!this.inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { this.inRawBlock = true; } else if (this.inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { this.inRawBlock = false; } return { type: this.options.sanitize ? 'text' : 'html', raw: cap[0], text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0] }; } } link(lexer, src, tokens, top) { const cap = this.rules.inline.link.exec(src); if (cap) { const lastParenIndex = findClosingBracket(cap[2], '()'); if (lastParenIndex > -1) { const start = cap[0].indexOf('!') === 0 ? 5 : 4; const linkLen = start + cap[1].length + lastParenIndex; cap[2] = cap[2].substring(0, lastParenIndex); cap[0] = cap[0].substring(0, linkLen).trim(); cap[3] = ''; } this.inLink = true; let href = cap[2]; let title = ''; if (this.options.pedantic) { const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href); if (link) { href = link[1]; title = link[3]; } else { title = ''; } } else { title = cap[3] ? cap[3].slice(1, -1) : ''; } href = href.trim().replace(/^<([\s\S]*)>$/, '$1'); const token = outputLink(cap, { href: href ? href.replace(this.rules.inline._escapes, '$1') : href, title: title ? title.replace(this.rules.inline._escapes, '$1') : title }, tokens, cap[0], lexer); this.inLink = false; return token; } } reflink(lexer, src, tokens, top) { let cap; if ((cap = this.rules.inline.reflink.exec(src)) || (cap = this.rules.inline.nolink.exec(src))) { let link = (cap[2] || cap[1]).replace(/\s+/g, ' '); link = lexer.tokens.links[link.toLowerCase()]; if (!link || !link.href) { const text = cap[0].charAt(0); return { type: 'text', raw: text, text }; } this.inLink = true; const token = outputLink(cap, link, tokens, cap[0], lexer); this.inLink = false; return token; } } strong(lexer, src, tokens, top) { const cap = this.rules.inline.strong.exec(src); if (cap) { return { type: 'strong', raw: cap[0], tokens: lexer.inlineTokens(cap[4] || cap[3] || cap[2] || cap[1]) }; } } em(lexer, src, tokens, top) { const cap = this.rules.inline.em.exec(src); if (cap) { return { type: 'em', raw: cap[0], tokens: lexer.inlineTokens(cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1]) }; } } codespan(lexer, src, tokens, top) { const cap = this.rules.inline.code.exec(src); if (cap) { return { type: 'codespan', raw: cap[0], text: escape(cap[2].trim(), true) }; } } br(lexer, src, tokens, top) { const cap = this.rules.inline.br.exec(src); if (cap) { return { type: 'br', raw: cap[0] }; } } del(lexer, src, tokens, top) { const cap = this.rules.inline.del.exec(src); if (cap) { return { type: 'del', raw: cap[0], tokens: lexer.inlineTokens(cap[1]) }; } } autolink(lexer, src, tokens, top) { const cap = this.rules.inline.autolink.exec(src); if (cap) { let text, href; if (cap[2] === '@') { text = escape(this.options.mangle ? this.mangle(cap[1]) : cap[1]); href = 'mailto:' + text; } else { text = escape(cap[1]); href = text; } return { type: 'link', raw: cap[0], text, href, tokens: [ { type: 'text', raw: text, text } ] }; } } url(lexer, src, tokens, top) { let cap; if (!this.inLink && (cap = this.rules.inline.url.exec(src))) { let text, href; if (cap[2] === '@') { text = escape(this.options.mangle ? this.mangle(cap[0]) : cap[0]); href = 'mailto:' + text; } else { // do extended autolink path validation let prevCapZero; do { prevCapZero = cap[0]; cap[0] = this.rules.inline._backpedal.exec(cap[0])[0]; } while (prevCapZero !== cap[0]); text = escape(cap[0]); if (cap[1] === 'www.') { href = 'http://' + text; } else { href = text; } } return { type: 'link', raw: cap[0], text, href, tokens: [ { type: 'text', raw: text, text } ] }; } } inlineText(lexer, src, tokens, top) { const cap = this.rules.inline.text.exec(src); if (cap) { let text; if (this.inRawBlock) { text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]; } else { text = escape(this.options.smartypants ? this.smartypants(cap[0]) : cap[0]); } return { type: 'text', raw: cap[0], text }; } } /** * Smartypants Transformations */ smartypants(text) { return text // em-dashes .replace(/---/g, '\u2014') // en-dashes .replace(/--/g, '\u2013') // opening singles .replace(/(^|[-\u2014/(\[{"\s])'/g, '$1\u2018') // closing singles & apostrophes .replace(/'/g, '\u2019') // opening doubles .replace(/(^|[-\u2014/(\[{\u2018\s])"/g, '$1\u201c') // closing doubles .replace(/"/g, '\u201d') // ellipses .replace(/\.{3}/g, '\u2026'); } /** * Mangle Links */ mangle(text) { let out = '', i, ch; const l = text.length; for (i = 0; i < l; i++) { ch = text.charCodeAt(i); if (Math.random() > 0.5) { ch = 'x' + ch.toString(16); } out += '&#' + ch + ';'; } return out; } };