fix: Don't replace tabs with spaces (#3438)

* fix: don't convert tabs to spaces

* test exact

* save nextLineWithoutTabs

* fix code
This commit is contained in:
Tony Brix 2024-09-03 18:06:32 -06:00 committed by GitHub
parent 2ff0547e87
commit 9ed6456a37
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 28 additions and 20 deletions

View File

@ -106,10 +106,6 @@ export class _Lexer {
blockTokens(src: string, tokens: Token[] = [], lastParagraphClipped = false) { blockTokens(src: string, tokens: Token[] = [], lastParagraphClipped = false) {
if (this.options.pedantic) { if (this.options.pedantic) {
src = src.replace(/\t/g, ' ').replace(/^ +$/gm, ''); src = src.replace(/\t/g, ' ').replace(/^ +$/gm, '');
} else {
src = src.replace(/^( *)(\t+)/gm, (_, leading, tabs) => {
return leading + ' '.repeat(tabs.length);
});
} }
let token: Tokens.Generic | undefined; let token: Tokens.Generic | undefined;

View File

@ -90,7 +90,7 @@ export class _Tokenizer {
code(src: string): Tokens.Code | undefined { code(src: string): Tokens.Code | undefined {
const cap = this.rules.block.code.exec(src); const cap = this.rules.block.code.exec(src);
if (cap) { if (cap) {
const text = cap[0].replace(/^ {1,4}/gm, ''); const text = cap[0].replace(/^(?: {1,4}| {0,3}\t)/gm, '');
return { return {
type: 'code', type: 'code',
raw: cap[0], raw: cap[0],
@ -294,7 +294,7 @@ export class _Tokenizer {
indent += cap[1].length; indent += cap[1].length;
} }
if (blankLine && /^ *$/.test(nextLine)) { // Items begin with at most one blank line if (blankLine && /^[ \t]*$/.test(nextLine)) { // Items begin with at most one blank line
raw += nextLine + '\n'; raw += nextLine + '\n';
src = src.substring(nextLine.length + 1); src = src.substring(nextLine.length + 1);
endEarly = true; endEarly = true;
@ -309,11 +309,15 @@ export class _Tokenizer {
// Check if following lines should be included in List Item // Check if following lines should be included in List Item
while (src) { while (src) {
const rawLine = src.split('\n', 1)[0]; const rawLine = src.split('\n', 1)[0];
let nextLineWithoutTabs;
nextLine = rawLine; nextLine = rawLine;
// Re-align to follow commonmark nesting rules // Re-align to follow commonmark nesting rules
if (this.options.pedantic) { if (this.options.pedantic) {
nextLine = nextLine.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' '); nextLine = nextLine.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' ');
nextLineWithoutTabs = nextLine;
} else {
nextLineWithoutTabs = nextLine.replace(/\t/g, ' ');
} }
// End list item if found code fences // End list item if found code fences
@ -332,12 +336,12 @@ export class _Tokenizer {
} }
// Horizontal rule found // Horizontal rule found
if (hrRegex.test(src)) { if (hrRegex.test(nextLine)) {
break; break;
} }
if (nextLine.search(/[^ ]/) >= indent || !nextLine.trim()) { // Dedent if possible if (nextLineWithoutTabs.search(/[^ ]/) >= indent || !nextLine.trim()) { // Dedent if possible
itemContents += '\n' + nextLine.slice(indent); itemContents += '\n' + nextLineWithoutTabs.slice(indent);
} else { } else {
// not enough indentation // not enough indentation
if (blankLine) { if (blankLine) {
@ -345,7 +349,7 @@ export class _Tokenizer {
} }
// paragraph continuation unless last line was a different block level element // paragraph continuation unless last line was a different block level element
if (line.search(/[^ ]/) >= 4) { // indented code block if (line.replace(/\t/g, ' ').search(/[^ ]/) >= 4) { // indented code block
break; break;
} }
if (fencesBeginRegex.test(line)) { if (fencesBeginRegex.test(line)) {
@ -367,7 +371,7 @@ export class _Tokenizer {
raw += rawLine + '\n'; raw += rawLine + '\n';
src = src.substring(rawLine.length + 1); src = src.substring(rawLine.length + 1);
line = nextLine.slice(indent); line = nextLineWithoutTabs.slice(indent);
} }
} }
@ -375,7 +379,7 @@ export class _Tokenizer {
// If the previous item ended with a blank line, the list is loose // If the previous item ended with a blank line, the list is loose
if (endsWithBlankLine) { if (endsWithBlankLine) {
list.loose = true; list.loose = true;
} else if (/\n *\n *$/.test(raw)) { } else if (/\n[ \t]*\n[ \t]*$/.test(raw)) {
endsWithBlankLine = true; endsWithBlankLine = true;
} }
} }

View File

@ -6,15 +6,15 @@ import {
* Block-Level Grammar * Block-Level Grammar
*/ */
const newline = /^(?: *(?:\n|$))+/; const newline = /^(?:[ \t]*(?:\n|$))+/;
const blockCode = /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/; const blockCode = /^((?: {4}| {0,3}\t)[^\n]+(?:\n(?:[ \t]*(?:\n|$))*)?)+/;
const fences = /^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/; const fences = /^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/;
const hr = /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/; const hr = /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/;
const heading = /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/; const heading = /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/;
const bullet = /(?:[*+-]|\d{1,9}[.)])/; const bullet = /(?:[*+-]|\d{1,9}[.)])/;
const lheading = edit(/^(?!bull |blockCode|fences|blockquote|heading|html)((?:.|\n(?!\s*?\n|bull |blockCode|fences|blockquote|heading|html))+?)\n {0,3}(=+|-+) *(?:\n+|$)/) const lheading = edit(/^(?!bull |blockCode|fences|blockquote|heading|html)((?:.|\n(?!\s*?\n|bull |blockCode|fences|blockquote|heading|html))+?)\n {0,3}(=+|-+) *(?:\n+|$)/)
.replace(/bull/g, bullet) // lists can interrupt .replace(/bull/g, bullet) // lists can interrupt
.replace(/blockCode/g, / {4}/) // indented code blocks can interrupt .replace(/blockCode/g, /(?: {4}| {0,3}\t)/) // indented code blocks can interrupt
.replace(/fences/g, / {0,3}(?:`{3,}|~{3,})/) // fenced code blocks can interrupt .replace(/fences/g, / {0,3}(?:`{3,}|~{3,})/) // fenced code blocks can interrupt
.replace(/blockquote/g, / {0,3}>/) // blockquote can interrupt .replace(/blockquote/g, / {0,3}>/) // blockquote can interrupt
.replace(/heading/g, / {0,3}#{1,6}/) // ATX heading can interrupt .replace(/heading/g, / {0,3}#{1,6}/) // ATX heading can interrupt
@ -23,7 +23,7 @@ const lheading = edit(/^(?!bull |blockCode|fences|blockquote|heading|html)((?:.|
const _paragraph = /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/; const _paragraph = /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/;
const blockText = /^[^\n]+/; const blockText = /^[^\n]+/;
const _blockLabel = /(?!\s*\])(?:\\.|[^\[\]\\])+/; const _blockLabel = /(?!\s*\])(?:\\.|[^\[\]\\])+/;
const def = edit(/^ {0,3}\[(label)\]: *(?:\n *)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/) const def = edit(/^ {0,3}\[(label)\]: *(?:\n[ \t]*)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n[ \t]*)?| *\n[ \t]*)(title))? *(?:\n+|$)/)
.replace('label', _blockLabel) .replace('label', _blockLabel)
.replace('title', /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/) .replace('title', /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/)
.getRegex(); .getRegex();
@ -46,9 +46,9 @@ const html = edit(
+ '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3) + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3)
+ '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4) + '|<![A-Z][\\s\\S]*?(?:>\\n*|$)' // (4)
+ '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5) + '|<!\\[CDATA\\[[\\s\\S]*?(?:\\]\\]>\\n*|$)' // (5)
+ '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6) + '|</?(tag)(?: +|\\n|/?>)[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (6)
+ '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag + '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (7) open tag
+ '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag + '|</(?!script|pre|style|textarea)[a-z][\\w-]*\\s*>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n[ \t]*)+\\n|$)' // (7) closing tag
+ ')', 'i') + ')', 'i')
.replace('comment', _comment) .replace('comment', _comment)
.replace('tag', _tag) .replace('tag', _tag)
@ -104,7 +104,7 @@ const gfmTable = edit(
.replace('hr', hr) .replace('hr', hr)
.replace('heading', ' {0,3}#{1,6}(?:\\s|$)') .replace('heading', ' {0,3}#{1,6}(?:\\s|$)')
.replace('blockquote', ' {0,3}>') .replace('blockquote', ' {0,3}>')
.replace('code', ' {4}[^\\n]') .replace('code', '(?: {4}| {0,3}\t)[^\\n]')
.replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n')
.replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt
.replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)') .replace('html', '</?(?:tag)(?: +|\\n|/?>)|<(?:script|pre|style|textarea|!--)')

View File

@ -0,0 +1,2 @@
<pre><code> tab
</code></pre>

View File

@ -0,0 +1,6 @@
---
renderExact: true
---
```
tab
```