From 7d19665167ae389bfaf272a8266de82aba231e7d Mon Sep 17 00:00:00 2001
From: rossipedia <git@rossipedia.com>
Date: Sun, 10 Apr 2022 18:37:04 -0600
Subject: [PATCH] fix: only convert leading tabs to spaces (#1559) (#2434)

* fix: non leading-tabs in markdown content (#1559)

Only replaces tabs at the beginning of a block construct. Tabs in the
middle of the item are unaffected.

All tests passing. Tabs in both GFM and CommonMark at 100%

fixes #1559

* update new/html_comments.html to preserve tab

* combine redundant if condition

* add test for tab immediately after blockquote character
---
 src/Lexer.js                             | 10 +++++++---
 src/Tokenizer.js                         |  4 ++--
 src/rules.js                             |  4 ++--
 test/specs/new/html_comments.html        |  2 +-
 test/specs/new/tab_after_blockquote.html |  1 +
 test/specs/new/tab_after_blockquote.md   |  1 +
 6 files changed, 14 insertions(+), 8 deletions(-)
 create mode 100644 test/specs/new/tab_after_blockquote.html
 create mode 100644 test/specs/new/tab_after_blockquote.md
diff --git a/src/Lexer.js b/src/Lexer.js
index eb182bd4..3c0a2c7e 100644
--- a/src/Lexer.js
+++ b/src/Lexer.js
@@ -115,8 +115,7 @@ export class Lexer {
    */
   lex(src) {
     src = src
-      .replace(/\r\n|\r/g, '\n')
-      .replace(/\t/g, '    ');
+      .replace(/\r\n|\r/g, '\n');
 
     this.blockTokens(src, this.tokens);
 
@@ -133,8 +132,13 @@ export class Lexer {
    */
   blockTokens(src, tokens = []) {
     if (this.options.pedantic) {
-      src = src.replace(/^ +$/gm, '');
+      src = src.replace(/\t/g, '    ').replace(/^ +$/gm, '');
+    } else {
+      src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => {
+        return leading + '    '.repeat(tabs.length);
+      });
     }
+
     let token, lastToken, cutSrc, lastParagraphClipped;
 
     while (src) {
diff --git a/src/Tokenizer.js b/src/Tokenizer.js
index 2efb3b7e..0eec752c 100644
--- a/src/Tokenizer.js
+++ b/src/Tokenizer.js
@@ -151,7 +151,7 @@ export class Tokenizer {
   blockquote(src) {
     const cap = this.rules.block.blockquote.exec(src);
     if (cap) {
-      const text = cap[0].replace(/^ *> ?/gm, '');
+      const text = cap[0].replace(/^ *>[ \t]?/gm, '');
 
       return {
         type: 'blockquote',
@@ -187,7 +187,7 @@ export class Tokenizer {
       }
 
       // Get next list item
-      const itemRegex = new RegExp(`^( {0,3}${bull})((?: [^\\n]*)?(?:\\n|$))`);
+      const itemRegex = new RegExp(`^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))`);
 
       // Check if current bullet point can start a new List Item
       while (src) {
diff --git a/src/rules.js b/src/rules.js
index 58b917d2..25d14153 100644
--- a/src/rules.js
+++ b/src/rules.js
@@ -11,10 +11,10 @@ export const block = {
   newline: /^(?: *(?:\n|$))+/,
   code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/,
   fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?=\n|$)|$)/,
-  hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
+  hr: /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/,
   heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
   blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
-  list: /^( {0,3}bull)( [^\n]+?)?(?:\n|$)/,
+  list: /^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/,
   html: '^ {0,3}(?:' // optional indentation
     + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
     + '|comment[^\\n]*(\\n+|$)' // (2)
diff --git a/test/specs/new/html_comments.html b/test/specs/new/html_comments.html
index 745d823b..a1c1f1b7 100644
--- a/test/specs/new/html_comments.html
+++ b/test/specs/new/html_comments.html
@@ -37,7 +37,7 @@
 <h3 id="example-10">Example 10</h3>
 
 <!-- multi
-line    
+line	
 comment
 -->
 
diff --git a/test/specs/new/tab_after_blockquote.html b/test/specs/new/tab_after_blockquote.html
new file mode 100644
index 00000000..73aab0bf
--- /dev/null
+++ b/test/specs/new/tab_after_blockquote.html
@@ -0,0 +1 @@
+<blockquote><p>test</p></blockquote>
\ No newline at end of file
diff --git a/test/specs/new/tab_after_blockquote.md b/test/specs/new/tab_after_blockquote.md
new file mode 100644
index 00000000..4371a12d
--- /dev/null
+++ b/test/specs/new/tab_after_blockquote.md
@@ -0,0 +1 @@
+>	test
\ No newline at end of file