From 7d19665167ae389bfaf272a8266de82aba231e7d Mon Sep 17 00:00:00 2001 From: rossipedia Date: Sun, 10 Apr 2022 18:37:04 -0600 Subject: [PATCH] fix: only convert leading tabs to spaces (#1559) (#2434) * fix: non leading-tabs in markdown content (#1559) Only replaces tabs at the beginning of a block construct. Tabs in the middle of the item are unaffected. All tests passing. Tabs in both GFM and CommonMark at 100% fixes #1559 * update new/html_comments.html to preserve tab * combine redundant if condition * add test for tab immediately after blockquote character --- src/Lexer.js | 10 +++++++--- src/Tokenizer.js | 4 ++-- src/rules.js | 4 ++-- test/specs/new/html_comments.html | 2 +- test/specs/new/tab_after_blockquote.html | 1 + test/specs/new/tab_after_blockquote.md | 1 + 6 files changed, 14 insertions(+), 8 deletions(-) create mode 100644 test/specs/new/tab_after_blockquote.html create mode 100644 test/specs/new/tab_after_blockquote.md diff --git a/src/Lexer.js b/src/Lexer.js index eb182bd4..3c0a2c7e 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -115,8 +115,7 @@ export class Lexer { */ lex(src) { src = src - .replace(/\r\n|\r/g, '\n') - .replace(/\t/g, ' '); + .replace(/\r\n|\r/g, '\n'); this.blockTokens(src, this.tokens); @@ -133,8 +132,13 @@ export class Lexer { */ blockTokens(src, tokens = []) { if (this.options.pedantic) { - src = src.replace(/^ +$/gm, ''); + src = src.replace(/\t/g, ' ').replace(/^ +$/gm, ''); + } else { + src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => { + return leading + ' '.repeat(tabs.length); + }); } + let token, lastToken, cutSrc, lastParagraphClipped; while (src) { diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 2efb3b7e..0eec752c 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -151,7 +151,7 @@ export class Tokenizer { blockquote(src) { const cap = this.rules.block.blockquote.exec(src); if (cap) { - const text = cap[0].replace(/^ *> ?/gm, ''); + const text = cap[0].replace(/^ *>[ \t]?/gm, ''); return { type: 'blockquote', @@ -187,7 +187,7 @@ export class Tokenizer { } // Get next list item - const itemRegex = new RegExp(`^( {0,3}${bull})((?: [^\\n]*)?(?:\\n|$))`); + const itemRegex = new RegExp(`^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))`); // Check if current bullet point can start a new List Item while (src) { diff --git a/src/rules.js b/src/rules.js index 58b917d2..25d14153 100644 --- a/src/rules.js +++ b/src/rules.js @@ -11,10 +11,10 @@ export const block = { newline: /^(?: *(?:\n|$))+/, code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/, fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?=\n|$)|$)/, - hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/, + hr: /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/, heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/, blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, - list: /^( {0,3}bull)( [^\n]+?)?(?:\n|$)/, + list: /^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/, html: '^ {0,3}(?:' // optional indentation + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:[^\\n]*\\n+|$)' // (1) + '|comment[^\\n]*(\\n+|$)' // (2) diff --git a/test/specs/new/html_comments.html b/test/specs/new/html_comments.html index 745d823b..a1c1f1b7 100644 --- a/test/specs/new/html_comments.html +++ b/test/specs/new/html_comments.html @@ -37,7 +37,7 @@

Example 10

diff --git a/test/specs/new/tab_after_blockquote.html b/test/specs/new/tab_after_blockquote.html new file mode 100644 index 00000000..73aab0bf --- /dev/null +++ b/test/specs/new/tab_after_blockquote.html @@ -0,0 +1 @@ +

test

\ No newline at end of file diff --git a/test/specs/new/tab_after_blockquote.md b/test/specs/new/tab_after_blockquote.md new file mode 100644 index 00000000..4371a12d --- /dev/null +++ b/test/specs/new/tab_after_blockquote.md @@ -0,0 +1 @@ +> test \ No newline at end of file