const { noopTest, edit, merge } = require('./helpers.js'); /** * Block-Level Grammar */ const block = { newline: /^\n+/, code: /^( {4}[^\n]+\n*)+/, fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/, hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/, heading: /^ {0,3}(#{1,6}) +([^\n]*?)(?: +#+)? *(?:\n+|$)/, blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, list: /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/, html: '^ {0,3}(?:' // optional indentation + '<(script|pre|style)[\\s>][\\s\\S]*?(?:[^\\n]*\\n+|$)' // (1) + '|comment[^\\n]*(\\n+|$)' // (2) + '|<\\?[\\s\\S]*?\\?>\\n*' // (3) + '|\\n*' // (4) + '|\\n*' // (5) + '|)[\\s\\S]*?(?:\\n{2,}|$)' // (6) + '|<(?!script|pre|style)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$)' // (7) open tag + '|(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$)' // (7) closing tag + ')', def: /^ {0,3}\[(label)\]: *\n? *]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/, nptable: noopTest, table: noopTest, lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/, // regex template, placeholders will be replaced according to different paragraph // interruption rules of commonmark and the original markdown spec: _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html)[^\n]+)*)/, text: /^[^\n]+/ }; block._label = /(?!\s*\])(?:\\[\[\]]|[^\[\]])+/; block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/; block.def = edit(block.def) .replace('label', block._label) .replace('title', block._title) .getRegex(); block.bullet = /(?:[*+-]|\d{1,9}\.)/; block.item = /^( *)(bull) ?[^\n]*(?:\n(?!\1bull ?)[^\n]*)*/; block.item = edit(block.item, 'gm') .replace(/bull/g, block.bullet) .getRegex(); block.list = edit(block.list) .replace(/bull/g, block.bullet) .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))') .replace('def', '\\n+(?=' + block.def.source + ')') .getRegex(); block._tag = 'address|article|aside|base|basefont|blockquote|body|caption' + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption' + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe' + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option' + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr' + '|track|ul'; block._comment = //; block.html = edit(block.html, 'i') .replace('comment', block._comment) .replace('tag', block._tag) .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/) .getRegex(); block.paragraph = edit(block._paragraph) .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs .replace('blockquote', ' {0,3}>') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', ')|<(?:script|pre|style|!--)') .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks .getRegex(); block.blockquote = edit(block.blockquote) .replace('paragraph', block.paragraph) .getRegex(); /** * Normal Block Grammar */ block.normal = merge({}, block); /** * GFM Block Grammar */ block.gfm = merge({}, block.normal, { nptable: '^ *([^|\\n ].*\\|.*)\\n' // Header + ' *([-:]+ *\\|[-| :]*)' // Align + '(?:\\n((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)', // Cells table: '^ *\\|(.+)\\n' // Header + ' *\\|?( *[-:]+[-| :]*)' // Align + '(?:\\n *((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells }); block.gfm.nptable = edit(block.gfm.nptable) .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('blockquote', ' {0,3}>') .replace('code', ' {4}[^\\n]') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', ')|<(?:script|pre|style|!--)') .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks .getRegex(); block.gfm.table = edit(block.gfm.table) .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('blockquote', ' {0,3}>') .replace('code', ' {4}[^\\n]') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', ')|<(?:script|pre|style|!--)') .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks .getRegex(); /** * Pedantic grammar (original John Gruber's loose markdown specification) */ block.pedantic = merge({}, block.normal, { html: edit( '^ *(?:comment *(?:\\n|\\s*$)' + '|<(tag)[\\s\\S]+? *(?:\\n{2,}|\\s*$)' // closed tag + '|\\s]*)*?/?> *(?:\\n{2,}|\\s*$))') .replace('comment', block._comment) .replace(/tag/g, '(?!(?:' + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub' + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)' + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b') .getRegex(), def: /^ *\[([^\]]+)\]: *]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/, heading: /^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/, fences: noopTest, // fences not supported paragraph: edit(block.normal._paragraph) .replace('hr', block.hr) .replace('heading', ' *#{1,6} *[^\n]') .replace('lheading', block.lheading) .replace('blockquote', ' {0,3}>') .replace('|fences', '') .replace('|list', '') .replace('|html', '') .getRegex() }); /** * Inline-Level Grammar */ const inline = { escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/, autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/, url: noopTest, tag: '^comment' + '|^' // self-closing tag + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. + '|^' // declaration, e.g. + '|^', // CDATA section link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/, reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/, nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/, strong: /^__([^\s_])__(?!_)|^\*\*([^\s*])\*\*(?!\*)|^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)/, // (1) returns if starts w/ punctuation | (2) ⬐Check groups to skip over ⬐ skip if needed ⬐repeat logic for inner *'s (must be in pairs)⬎ ⬐last char can't be punct OR ⬐final * must also be followed by punct (or endline) | (3) Underscores | (4) Underscores | (5) Underscores em: /^(?:(\*(?=[`\]punctuation]))|\*)(?![\*\s])((?:(?:(?!emSkip)(?:[^\*]|[\\\s]\*)|emSkip)|(?:(?:(?!emSkip)(?:[^\*]|[\\\s]\*)|emSkip)*?(??@\\[^_{|}~'; inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex(); // sequences em should skip over [reflink], [title][reflink], [title](link), `code`, inline._emSkip = '\\[reflink\\]|\\[.*?\\]\\[reflink\\]|\\[.*?\\]\\(.*?\\)|`.*?`|<.*?>'; inline.em = edit(inline.em) .replace(/punctuation/g, inline._punctuation) .replace(/emSkip/g, inline._emSkip) .getRegex(); inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g; inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/; inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/; inline.autolink = edit(inline.autolink) .replace('scheme', inline._scheme) .replace('email', inline._email) .getRegex(); inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/; inline.tag = edit(inline.tag) .replace('comment', block._comment) .replace('attribute', inline._attribute) .getRegex(); inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/; inline._href = /<(?:\\[<>]?|[^\s<>\\])*>|[^\s\x00-\x1f]*/; inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/; inline.link = edit(inline.link) .replace('label', inline._label) .replace('href', inline._href) .replace('title', inline._title) .getRegex(); inline.reflink = edit(inline.reflink) .replace('label', inline._label) .getRegex(); /** * Normal Inline Grammar */ inline.normal = merge({}, inline); /** * Pedantic Inline Grammar */ inline.pedantic = merge({}, inline.normal, { strong: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/, em: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/, link: edit(/^!?\[(label)\]\((.*?)\)/) .replace('label', inline._label) .getRegex(), reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/) .replace('label', inline._label) .getRegex() }); /** * GFM Inline Grammar */ inline.gfm = merge({}, inline.normal, { escape: edit(inline.escape).replace('])', '~|])').getRegex(), _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/, url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/, _backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/, del: /^~+(?=\S)([\s\S]*?\S)~+/, text: /^(`+|[^`])(?:[\s\S]*?(?:(?=[\\