const { defaults } = require('./defaults.js'); const { rtrim, splitCells, escape, findClosingBracket } = require('./helpers.js'); function outputLink(cap, link, raw) { const href = link.href; const title = link.title ? escape(link.title) : null; if (cap[0].charAt(0) !== '!') { return { type: 'link', raw, href, title, text: cap[1] }; } else { return { type: 'image', raw, text: escape(cap[1]), href, title }; } } /** * Tokenizer */ module.exports = class Tokenizer { constructor(options) { this.options = options || defaults; } space(src) { const cap = this.rules.block.newline.exec(src); if (cap) { if (cap[0].length > 1) { return { type: 'space', raw: cap[0] }; } return { raw: '\n' }; } } code(src, tokens) { const cap = this.rules.block.code.exec(src); if (cap) { const lastToken = tokens[tokens.length - 1]; // An indented code block cannot interrupt a paragraph. if (lastToken && lastToken.type === 'paragraph') { return { raw: cap[0], text: cap[0].trimRight() }; } const text = cap[0].replace(/^ {4}/gm, ''); return { type: 'code', raw: cap[0], codeBlockStyle: 'indented', text: !this.options.pedantic ? rtrim(text, '\n') : text }; } } fences(src) { const cap = this.rules.block.fences.exec(src); if (cap) { return { type: 'code', raw: cap[0], lang: cap[2] ? cap[2].trim() : cap[2], text: cap[3] || '' }; } } heading(src) { const cap = this.rules.block.heading.exec(src); if (cap) { return { type: 'heading', raw: cap[0], depth: cap[1].length, text: cap[2] }; } } nptable(src) { const cap = this.rules.block.nptable.exec(src); if (cap) { const item = { type: 'table', header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')), align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [], raw: cap[0] }; if (item.header.length === item.align.length) { let l = item.align.length; let i; for (i = 0; i < l; i++) { if (/^ *-+: *$/.test(item.align[i])) { item.align[i] = 'right'; } else if (/^ *:-+: *$/.test(item.align[i])) { item.align[i] = 'center'; } else if (/^ *:-+ *$/.test(item.align[i])) { item.align[i] = 'left'; } else { item.align[i] = null; } } l = item.cells.length; for (i = 0; i < l; i++) { item.cells[i] = splitCells(item.cells[i], item.header.length); } return item; } } } hr(src) { const cap = this.rules.block.hr.exec(src); if (cap) { return { type: 'hr', raw: cap[0] }; } } blockquote(src) { const cap = this.rules.block.blockquote.exec(src); if (cap) { const text = cap[0].replace(/^ *> ?/gm, ''); return { type: 'blockquote', raw: cap[0], text }; } } list(src) { const cap = this.rules.block.list.exec(src); if (cap) { let raw = cap[0]; const bull = cap[2]; const isordered = bull.length > 1; const list = { type: 'list', raw, ordered: isordered, start: isordered ? +bull : '', loose: false, items: [] }; // Get each top-level item. const itemMatch = cap[0].match(this.rules.block.item); let next = false, item, space, b, addBack, loose, istask, ischecked; const l = itemMatch.length; for (let i = 0; i < l; i++) { item = itemMatch[i]; raw = item; // Remove the list item's bullet // so it is seen as the next token. space = item.length; item = item.replace(/^ *([*+-]|\d+\.) */, ''); // Outdent whatever the // list item contains. Hacky. if (~item.indexOf('\n ')) { space -= item.length; item = !this.options.pedantic ? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '') : item.replace(/^ {1,4}/gm, ''); } // Determine whether the next list item belongs here. // Backpedal if it does not belong in this list. if (i !== l - 1) { b = this.rules.block.bullet.exec(itemMatch[i + 1])[0]; if (bull.length > 1 ? b.length === 1 : (b.length > 1 || (this.options.smartLists && b !== bull))) { addBack = itemMatch.slice(i + 1).join('\n'); list.raw = list.raw.substring(0, list.raw.length - addBack.length); i = l - 1; } } // Determine whether item is loose or not. // Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/ // for discount behavior. loose = next || /\n\n(?!\s*$)/.test(item); if (i !== l - 1) { next = item.charAt(item.length - 1) === '\n'; if (!loose) loose = next; } if (loose) { list.loose = true; } // Check for task list items istask = /^\[[ xX]\] /.test(item); ischecked = undefined; if (istask) { ischecked = item[1] !== ' '; item = item.replace(/^\[[ xX]\] +/, ''); } list.items.push({ raw, task: istask, checked: ischecked, loose: loose, text: item }); } return list; } } html(src) { const cap = this.rules.block.html.exec(src); if (cap) { return { type: this.options.sanitize ? 'paragraph' : 'html', raw: cap[0], pre: !this.options.sanitizer && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'), text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0] }; } } def(src) { const cap = this.rules.block.def.exec(src); if (cap) { if (cap[3]) cap[3] = cap[3].substring(1, cap[3].length - 1); const tag = cap[1].toLowerCase().replace(/\s+/g, ' '); return { tag, raw: cap[0], href: cap[2], title: cap[3] }; } } table(src) { const cap = this.rules.block.table.exec(src); if (cap) { const item = { type: 'table', header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')), align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [] }; if (item.header.length === item.align.length) { item.raw = cap[0]; let l = item.align.length; let i; for (i = 0; i < l; i++) { if (/^ *-+: *$/.test(item.align[i])) { item.align[i] = 'right'; } else if (/^ *:-+: *$/.test(item.align[i])) { item.align[i] = 'center'; } else if (/^ *:-+ *$/.test(item.align[i])) { item.align[i] = 'left'; } else { item.align[i] = null; } } l = item.cells.length; for (i = 0; i < l; i++) { item.cells[i] = splitCells( item.cells[i].replace(/^ *\| *| *\| *$/g, ''), item.header.length); } return item; } } } lheading(src) { const cap = this.rules.block.lheading.exec(src); if (cap) { return { type: 'heading', raw: cap[0], depth: cap[2].charAt(0) === '=' ? 1 : 2, text: cap[1] }; } } paragraph(src) { const cap = this.rules.block.paragraph.exec(src); if (cap) { return { type: 'paragraph', raw: cap[0], text: cap[1].charAt(cap[1].length - 1) === '\n' ? cap[1].slice(0, -1) : cap[1] }; } } text(src, tokens) { const cap = this.rules.block.text.exec(src); if (cap) { const lastToken = tokens[tokens.length - 1]; if (lastToken && lastToken.type === 'text') { return { raw: cap[0], text: cap[0] }; } return { type: 'text', raw: cap[0], text: cap[0] }; } } escape(src) { const cap = this.rules.inline.escape.exec(src); if (cap) { return { type: 'escape', raw: cap[0], text: escape(cap[1]) }; } } tag(src, inLink, inRawBlock) { const cap = this.rules.inline.tag.exec(src); if (cap) { if (!inLink && /^/i.test(cap[0])) { inLink = false; } if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { inRawBlock = true; } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { inRawBlock = false; } return { type: this.options.sanitize ? 'text' : 'html', raw: cap[0], inLink, inRawBlock, text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0] }; } } link(src) { const cap = this.rules.inline.link.exec(src); if (cap) { const lastParenIndex = findClosingBracket(cap[2], '()'); if (lastParenIndex > -1) { const start = cap[0].indexOf('!') === 0 ? 5 : 4; const linkLen = start + cap[1].length + lastParenIndex; cap[2] = cap[2].substring(0, lastParenIndex); cap[0] = cap[0].substring(0, linkLen).trim(); cap[3] = ''; } let href = cap[2]; let title = ''; if (this.options.pedantic) { const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href); if (link) { href = link[1]; title = link[3]; } else { title = ''; } } else { title = cap[3] ? cap[3].slice(1, -1) : ''; } href = href.trim().replace(/^<([\s\S]*)>$/, '$1'); const token = outputLink(cap, { href: href ? href.replace(this.rules.inline._escapes, '$1') : href, title: title ? title.replace(this.rules.inline._escapes, '$1') : title }, cap[0]); return token; } } reflink(src, links) { let cap; if ((cap = this.rules.inline.reflink.exec(src)) || (cap = this.rules.inline.nolink.exec(src))) { let link = (cap[2] || cap[1]).replace(/\s+/g, ' '); link = links[link.toLowerCase()]; if (!link || !link.href) { const text = cap[0].charAt(0); return { type: 'text', raw: text, text }; } const token = outputLink(cap, link, cap[0]); return token; } } strong(src) { const cap = this.rules.inline.strong.exec(src); if (cap) { return { type: 'strong', raw: cap[0], text: cap[4] || cap[3] || cap[2] || cap[1] }; } } em(src) { const cap = this.rules.inline.em.exec(src); if (cap) { return { type: 'em', raw: cap[0], text: cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1] }; } } codespan(src) { const cap = this.rules.inline.code.exec(src); if (cap) { const text = escape(cap[2].trim(), true); return { type: 'codespan', raw: cap[0], text: !this.options.pedantic ? text.replace(/\n/g, ' ') : text }; } } br(src) { const cap = this.rules.inline.br.exec(src); if (cap) { return { type: 'br', raw: cap[0] }; } } del(src) { const cap = this.rules.inline.del.exec(src); if (cap) { return { type: 'del', raw: cap[0], text: cap[1] }; } } autolink(src, mangle) { const cap = this.rules.inline.autolink.exec(src); if (cap) { let text, href; if (cap[2] === '@') { text = escape(this.options.mangle ? mangle(cap[1]) : cap[1]); href = 'mailto:' + text; } else { text = escape(cap[1]); href = text; } return { type: 'link', raw: cap[0], text, href, tokens: [ { type: 'text', raw: text, text } ] }; } } url(src, mangle) { let cap; if (cap = this.rules.inline.url.exec(src)) { let text, href; if (cap[2] === '@') { text = escape(this.options.mangle ? mangle(cap[0]) : cap[0]); href = 'mailto:' + text; } else { // do extended autolink path validation let prevCapZero; do { prevCapZero = cap[0]; cap[0] = this.rules.inline._backpedal.exec(cap[0])[0]; } while (prevCapZero !== cap[0]); text = escape(cap[0]); if (cap[1] === 'www.') { href = 'http://' + text; } else { href = text; } } return { type: 'link', raw: cap[0], text, href, tokens: [ { type: 'text', raw: text, text } ] }; } } inlineText(src, inRawBlock, smartypants) { const cap = this.rules.inline.text.exec(src); if (cap) { let text; if (inRawBlock) { text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]; } else { text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]); } return { type: 'text', raw: cap[0], text }; } } };