2019-11-06 11:11:06 -06:00
const {
2019-12-04 21:14:51 +01:00
noopTest ,
2019-11-05 15:29:42 -06:00
edit ,
merge
2019-11-06 11:11:06 -06:00
} = require ( './helpers.js' ) ;
2019-11-05 15:29:42 -06:00
/ * *
* Block - Level Grammar
* /
2019-11-06 11:11:06 -06:00
const block = {
2019-11-05 15:29:42 -06:00
newline : /^\n+/ ,
code : /^( {4}[^\n]+\n*)+/ ,
2020-02-10 16:06:38 -05:00
fences : /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/ ,
2019-11-05 15:29:42 -06:00
hr : /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/ ,
heading : /^ {0,3}(#{1,6}) +([^\n]*?)(?: +#+)? *(?:\n+|$)/ ,
blockquote : /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/ ,
list : /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/ ,
html : '^ {0,3}(?:' // optional indentation
+ '<(script|pre|style)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
+ '|comment[^\\n]*(\\n+|$)' // (2)
+ '|<\\?[\\s\\S]*?\\?>\\n*' // (3)
+ '|<![A-Z][\\s\\S]*?>\\n*' // (4)
+ '|<!\\[CDATA\\[[\\s\\S]*?\\]\\]>\\n*' // (5)
+ '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:\\n{2,}|$)' // (6)
+ '|<(?!script|pre|style)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$)' // (7) open tag
+ '|</(?!script|pre|style)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:\\n{2,}|$)' // (7) closing tag
+ ')' ,
def : /^ {0,3}\[(label)\]: *\n? *<?([^\s>]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/ ,
2019-12-04 21:14:51 +01:00
nptable : noopTest ,
table : noopTest ,
2019-11-05 15:29:42 -06:00
lheading : /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/ ,
// regex template, placeholders will be replaced according to different paragraph
// interruption rules of commonmark and the original markdown spec:
_paragraph : /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html)[^\n]+)*)/ ,
text : /^[^\n]+/
} ;
block . _label = /(?!\s*\])(?:\\[\[\]]|[^\[\]])+/ ;
block . _title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/ ;
block . def = edit ( block . def )
. replace ( 'label' , block . _label )
. replace ( 'title' , block . _title )
. getRegex ( ) ;
2020-06-17 17:27:35 +02:00
block . bullet = /(?:[*+-]|\d{1,9}[.)])/ ;
2019-11-05 15:29:42 -06:00
block . item = /^( *)(bull) ?[^\n]*(?:\n(?!\1bull ?)[^\n]*)*/ ;
block . item = edit ( block . item , 'gm' )
. replace ( /bull/g , block . bullet )
. getRegex ( ) ;
block . list = edit ( block . list )
. replace ( /bull/g , block . bullet )
. replace ( 'hr' , '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))' )
. replace ( 'def' , '\\n+(?=' + block . def . source + ')' )
. getRegex ( ) ;
block . _tag = 'address|article|aside|base|basefont|blockquote|body|caption'
+ '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption'
+ '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe'
+ '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option'
+ '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr'
+ '|track|ul' ;
2020-08-07 09:03:47 +03:00
block . _comment = /<!--(?!-?>)[\s\S]*?(?:-->|$)/ ;
2019-11-05 15:29:42 -06:00
block . html = edit ( block . html , 'i' )
. replace ( 'comment' , block . _comment )
. replace ( 'tag' , block . _tag )
. replace ( 'attribute' , / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/ )
. getRegex ( ) ;
block . paragraph = edit ( block . _paragraph )
. replace ( 'hr' , block . hr )
2020-02-11 11:30:33 -05:00
. replace ( 'heading' , ' {0,3}#{1,6} ' )
2019-11-05 15:29:42 -06:00
. replace ( '|lheading' , '' ) // setex headings don't interrupt commonmark paragraphs
. replace ( 'blockquote' , ' {0,3}>' )
2020-02-10 16:06:38 -05:00
. replace ( 'fences' , ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n' )
2019-11-05 15:29:42 -06:00
. replace ( 'list' , ' {0,3}(?:[*+-]|1[.)]) ' ) // only lists starting from 1 can interrupt
. replace ( 'html' , '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)' )
. replace ( 'tag' , block . _tag ) // pars can be interrupted by type (6) html blocks
. getRegex ( ) ;
block . blockquote = edit ( block . blockquote )
. replace ( 'paragraph' , block . paragraph )
. getRegex ( ) ;
/ * *
* Normal Block Grammar
* /
block . normal = merge ( { } , block ) ;
/ * *
* GFM Block Grammar
* /
block . gfm = merge ( { } , block . normal , {
2020-03-09 11:02:26 -05:00
nptable : '^ *([^|\\n ].*\\|.*)\\n' // Header
+ ' *([-:]+ *\\|[-| :]*)' // Align
2020-03-09 14:20:32 -05:00
+ '(?:\\n((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' , // Cells
2020-02-04 23:46:34 -05:00
table : '^ *\\|(.+)\\n' // Header
+ ' *\\|?( *[-:]+[-| :]*)' // Align
2020-03-09 14:20:32 -05:00
+ '(?:\\n *((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells
2019-11-05 15:29:42 -06:00
} ) ;
2020-03-09 11:02:26 -05:00
block . gfm . nptable = edit ( block . gfm . nptable )
. replace ( 'hr' , block . hr )
. replace ( 'heading' , ' {0,3}#{1,6} ' )
. replace ( 'blockquote' , ' {0,3}>' )
. replace ( 'code' , ' {4}[^\\n]' )
. replace ( 'fences' , ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n' )
. replace ( 'list' , ' {0,3}(?:[*+-]|1[.)]) ' ) // only lists starting from 1 can interrupt
. replace ( 'html' , '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)' )
. replace ( 'tag' , block . _tag ) // tables can be interrupted by type (6) html blocks
. getRegex ( ) ;
2020-02-04 23:46:34 -05:00
block . gfm . table = edit ( block . gfm . table )
. replace ( 'hr' , block . hr )
2020-02-11 11:30:33 -05:00
. replace ( 'heading' , ' {0,3}#{1,6} ' )
2020-02-04 23:46:34 -05:00
. replace ( 'blockquote' , ' {0,3}>' )
2020-02-10 15:09:19 -05:00
. replace ( 'code' , ' {4}[^\\n]' )
2020-02-11 11:32:43 -05:00
. replace ( 'fences' , ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n' )
2020-02-04 23:46:34 -05:00
. replace ( 'list' , ' {0,3}(?:[*+-]|1[.)]) ' ) // only lists starting from 1 can interrupt
. replace ( 'html' , '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|!--)' )
2020-03-09 11:02:26 -05:00
. replace ( 'tag' , block . _tag ) // tables can be interrupted by type (6) html blocks
2020-02-04 23:46:34 -05:00
. getRegex ( ) ;
2019-11-05 15:29:42 -06:00
/ * *
* Pedantic grammar ( original John Gruber ' s loose markdown specification )
* /
block . pedantic = merge ( { } , block . normal , {
html : edit (
'^ *(?:comment *(?:\\n|\\s*$)'
+ '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
+ '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))' )
. replace ( 'comment' , block . _comment )
. replace ( /tag/g , '(?!(?:'
+ 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub'
+ '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)'
+ '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b' )
. getRegex ( ) ,
def : /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/ ,
heading : /^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/ ,
2019-12-04 21:14:51 +01:00
fences : noopTest , // fences not supported
2019-11-05 15:29:42 -06:00
paragraph : edit ( block . normal . _paragraph )
. replace ( 'hr' , block . hr )
. replace ( 'heading' , ' *#{1,6} *[^\n]' )
. replace ( 'lheading' , block . lheading )
. replace ( 'blockquote' , ' {0,3}>' )
. replace ( '|fences' , '' )
. replace ( '|list' , '' )
. replace ( '|html' , '' )
. getRegex ( )
} ) ;
/ * *
* Inline - Level Grammar
* /
2019-11-06 11:11:06 -06:00
const inline = {
2019-11-05 15:29:42 -06:00
escape : /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/ ,
autolink : /^<(scheme:[^\s\x00-\x1f<>]*|email)>/ ,
2019-12-04 21:14:51 +01:00
url : noopTest ,
2019-11-05 15:29:42 -06:00
tag : '^comment'
+ '|^</[a-zA-Z][\\w:-]*\\s*>' // self-closing tag
+ '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag
+ '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. <?php ?>
+ '|^<![a-zA-Z]+\\s[\\s\\S]*?>' // declaration, e.g. <!DOCTYPE html>
+ '|^<!\\[CDATA\\[[\\s\\S]*?\\]\\]>' , // CDATA section
link : /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/ ,
reflink : /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/ ,
nolink : /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/ ,
2020-06-20 10:25:48 -05:00
reflinkSearch : 'reflink|nolink(?!\\()' ,
2020-07-09 19:35:22 -04:00
strong : {
start : /^(?:(\*\*(?=[*punctuation]))|\*\*)(?![\s])|__/ , // (1) returns if starts w/ punctuation
middle : /^\*\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*\*$|^__(?![\s])((?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?)__$/ ,
endAst : /[^punctuation\s]\*\*(?!\*)|[punctuation]\*\*(?!\*)(?:(?=[punctuation\s]|$))/ , // last char can't be punct, or final * must also be followed by punct (or endline)
endUnd : /[^\s]__(?!_)(?:(?=[punctuation\s])|$)/ // last char can't be a space, and final _ must preceed punct or \s (or endline)
} ,
em : {
start : /^(?:(\*(?=[punctuation]))|\*)(?![*\s])|_/ , // (1) returns if starts w/ punctuation
middle : /^\*(?:(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)|\*(?:(?!overlapSkip)(?:[^*]|\\\*)|overlapSkip)*?\*)+?\*$|^_(?![_\s])(?:(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)|_(?:(?!overlapSkip)(?:[^_]|\\_)|overlapSkip)*?_)+?_$/ ,
endAst : /[^punctuation\s]\*(?!\*)|[punctuation]\*(?!\*)(?:(?=[punctuation\s]|$))/ , // last char can't be punct, or final * must also be followed by punct (or endline)
endUnd : /[^\s]_(?!_)(?:(?=[punctuation\s])|$)/ // last char can't be a space, and final _ must preceed punct or \s (or endline)
} ,
2019-11-05 15:29:42 -06:00
code : /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/ ,
br : /^( {2,}|\\)\n(?!\s*$)/ ,
2019-12-04 21:14:51 +01:00
del : noopTest ,
2020-05-29 16:33:49 -04:00
text : /^(`+|[^`])(?:[\s\S]*?(?:(?=[\\<!\[`*]|\b_|$)|[^ ](?= {2,}\n))|(?= {2,}\n))/ ,
2020-06-18 10:00:27 -04:00
punctuation : /^([\s*punctuation])/
2019-11-05 15:29:42 -06:00
} ;
// list of punctuation marks from common mark spec
2020-06-12 16:29:25 -04:00
// without * and _ to workaround cases with double emphasis
inline . _punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~' ;
2020-05-29 16:33:49 -04:00
inline . punctuation = edit ( inline . punctuation ) . replace ( /punctuation/g , inline . _punctuation ) . getRegex ( ) ;
2020-06-17 00:41:06 -04:00
// sequences em should skip over [title](link), `code`, <html>
2020-07-08 16:58:58 -04:00
inline . _blockSkip = '\\[[^\\]]*?\\]\\([^\\)]*?\\)|`[^`]*?`|<[^>]*?>' ;
inline . _overlapSkip = '__[^_]*?__|\\*\\*\\[^\\*\\]*?\\*\\*' ;
2020-06-12 15:30:25 -04:00
2020-07-09 19:35:22 -04:00
inline . em . start = edit ( inline . em . start )
2020-06-12 15:30:25 -04:00
. replace ( /punctuation/g , inline . _punctuation )
2020-07-08 16:00:12 -04:00
. getRegex ( ) ;
2020-07-09 19:35:22 -04:00
inline . em . middle = edit ( inline . em . middle )
2020-07-08 17:01:42 -04:00
. replace ( /punctuation/g , inline . _punctuation )
2020-07-09 19:35:22 -04:00
. replace ( /overlapSkip/g , inline . _overlapSkip )
2020-07-08 17:01:42 -04:00
. getRegex ( ) ;
2020-07-08 16:58:58 -04:00
2020-07-09 19:35:22 -04:00
inline . em . endAst = edit ( inline . em . endAst , 'g' )
2020-07-08 16:00:12 -04:00
. replace ( /punctuation/g , inline . _punctuation )
. getRegex ( ) ;
2020-07-09 19:35:22 -04:00
inline . em . endUnd = edit ( inline . em . endUnd , 'g' )
2020-07-08 16:00:12 -04:00
. replace ( /punctuation/g , inline . _punctuation )
. getRegex ( ) ;
2020-07-09 19:35:22 -04:00
inline . strong . start = edit ( inline . strong . start )
. replace ( /punctuation/g , inline . _punctuation )
2020-06-12 15:30:25 -04:00
. getRegex ( ) ;
2019-11-05 15:29:42 -06:00
2020-07-09 19:35:22 -04:00
inline . strong . middle = edit ( inline . strong . middle )
Added fixes to Strong
Fixes examples 391, 397, 399, 400, 401, 431, 443, 475, 476, 479, and 480
2020-06-17 11:06:25 -04:00
. replace ( /punctuation/g , inline . _punctuation )
2020-07-08 16:58:58 -04:00
. replace ( /blockSkip/g , inline . _blockSkip )
Added fixes to Strong
Fixes examples 391, 397, 399, 400, 401, 431, 443, 475, 476, 479, and 480
2020-06-17 11:06:25 -04:00
. getRegex ( ) ;
2020-07-09 19:35:22 -04:00
inline . strong . endAst = edit ( inline . strong . endAst , 'g' )
2020-07-08 17:01:42 -04:00
. replace ( /punctuation/g , inline . _punctuation )
. getRegex ( ) ;
2020-07-08 16:58:58 -04:00
2020-07-09 19:35:22 -04:00
inline . strong . endUnd = edit ( inline . strong . endUnd , 'g' )
2020-07-08 16:00:12 -04:00
. replace ( /punctuation/g , inline . _punctuation )
. getRegex ( ) ;
2020-07-09 19:35:22 -04:00
inline . blockSkip = edit ( inline . _blockSkip , 'g' )
. getRegex ( ) ;
inline . overlapSkip = edit ( inline . _overlapSkip , 'g' )
2020-07-08 16:00:12 -04:00
. getRegex ( ) ;
2019-11-05 15:29:42 -06:00
inline . _escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g ;
inline . _scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/ ;
inline . _email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/ ;
inline . autolink = edit ( inline . autolink )
. replace ( 'scheme' , inline . _scheme )
. replace ( 'email' , inline . _email )
. getRegex ( ) ;
inline . _attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/ ;
inline . tag = edit ( inline . tag )
. replace ( 'comment' , block . _comment )
. replace ( 'attribute' , inline . _attribute )
. getRegex ( ) ;
2020-05-20 10:23:42 -05:00
inline . _label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/ ;
2019-11-05 15:29:42 -06:00
inline . _href = /<(?:\\[<>]?|[^\s<>\\])*>|[^\s\x00-\x1f]*/ ;
inline . _title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/ ;
inline . link = edit ( inline . link )
. replace ( 'label' , inline . _label )
. replace ( 'href' , inline . _href )
. replace ( 'title' , inline . _title )
. getRegex ( ) ;
inline . reflink = edit ( inline . reflink )
. replace ( 'label' , inline . _label )
. getRegex ( ) ;
2020-06-20 10:25:48 -05:00
inline . reflinkSearch = edit ( inline . reflinkSearch , 'g' )
. replace ( 'reflink' , inline . reflink )
. replace ( 'nolink' , inline . nolink )
. getRegex ( ) ;
2019-11-05 15:29:42 -06:00
/ * *
* Normal Inline Grammar
* /
inline . normal = merge ( { } , inline ) ;
/ * *
* Pedantic Inline Grammar
* /
inline . pedantic = merge ( { } , inline . normal , {
2020-07-09 19:35:22 -04:00
strong : {
start : /^__|\*\*/ ,
middle : /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/ ,
endAst : /\*\*(?!\*)/g ,
endUnd : /__(?!_)/g
} ,
em : {
start : /^_|\*/ ,
middle : /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/ ,
endAst : /\*(?!\*)/g ,
endUnd : /_(?!_)/g
} ,
2019-11-05 15:29:42 -06:00
link : edit ( /^!?\[(label)\]\((.*?)\)/ )
. replace ( 'label' , inline . _label )
. getRegex ( ) ,
reflink : edit ( /^!?\[(label)\]\s*\[([^\]]*)\]/ )
. replace ( 'label' , inline . _label )
. getRegex ( )
} ) ;
/ * *
* GFM Inline Grammar
* /
inline . gfm = merge ( { } , inline . normal , {
escape : edit ( inline . escape ) . replace ( '])' , '~|])' ) . getRegex ( ) ,
_extended _email : /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/ ,
url : /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/ ,
_backpedal : /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/ ,
del : /^~+(?=\S)([\s\S]*?\S)~+/ ,
text : /^(`+|[^`])(?:[\s\S]*?(?:(?=[\\<!\[`*~]|\b_|https?:\/\/|ftp:\/\/|www\.|$)|[^ ](?= {2,}\n)|[^a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-](?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@))|(?= {2,}\n|[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@))/
} ) ;
inline . gfm . url = edit ( inline . gfm . url , 'i' )
. replace ( 'email' , inline . gfm . _extended _email )
. getRegex ( ) ;
/ * *
* GFM + Line Breaks Inline Grammar
* /
inline . breaks = merge ( { } , inline . gfm , {
br : edit ( inline . br ) . replace ( '{2,}' , '*' ) . getRegex ( ) ,
text : edit ( inline . gfm . text )
. replace ( '\\b_' , '\\b_| {2,}\\n' )
. replace ( /\{2,\}/g , '*' )
. getRegex ( )
} ) ;
2019-11-06 11:11:06 -06:00
module . exports = {
block ,
inline
} ;