fix: Move all regexps to rules (#3519)
This commit is contained in:
parent
58d66e59d1
commit
1f88deb58a
@ -1,6 +1,6 @@
|
||||
import { _Tokenizer } from './Tokenizer.ts';
|
||||
import { _defaults } from './defaults.ts';
|
||||
import { block, inline } from './rules.ts';
|
||||
import { other, block, inline } from './rules.ts';
|
||||
import type { Token, TokensList, Tokens } from './Tokens.ts';
|
||||
import type { MarkedOptions, TokenizerExtension } from './MarkedOptions.ts';
|
||||
|
||||
@ -36,6 +36,7 @@ export class _Lexer {
|
||||
};
|
||||
|
||||
const rules = {
|
||||
other,
|
||||
block: block.normal,
|
||||
inline: inline.normal,
|
||||
};
|
||||
@ -85,7 +86,7 @@ export class _Lexer {
|
||||
*/
|
||||
lex(src: string) {
|
||||
src = src
|
||||
.replace(/\r\n|\r/g, '\n');
|
||||
.replace(other.carriageReturn, '\n');
|
||||
|
||||
this.blockTokens(src, this.tokens);
|
||||
|
||||
@ -105,7 +106,7 @@ export class _Lexer {
|
||||
blockTokens(src: string, tokens?: TokensList, lastParagraphClipped?: boolean): TokensList;
|
||||
blockTokens(src: string, tokens: Token[] = [], lastParagraphClipped = false) {
|
||||
if (this.options.pedantic) {
|
||||
src = src.replace(/\t/g, ' ').replace(/^ +$/gm, '');
|
||||
src = src.replace(other.tabCharGlobal, ' ').replace(other.spaceLine, '');
|
||||
}
|
||||
|
||||
let token: Tokens.Generic | undefined;
|
||||
|
@ -3,6 +3,7 @@ import {
|
||||
cleanUrl,
|
||||
escape,
|
||||
} from './helpers.ts';
|
||||
import { other } from './rules.ts';
|
||||
import type { MarkedOptions } from './MarkedOptions.ts';
|
||||
import type { Tokens } from './Tokens.ts';
|
||||
import type { _Parser } from './Parser.ts';
|
||||
@ -22,9 +23,9 @@ export class _Renderer {
|
||||
}
|
||||
|
||||
code({ text, lang, escaped }: Tokens.Code): string {
|
||||
const langString = (lang || '').match(/^\S*/)?.[0];
|
||||
const langString = (lang || '').match(other.notSpaceStart)?.[0];
|
||||
|
||||
const code = text.replace(/\n$/, '') + '\n';
|
||||
const code = text.replace(other.endingNewline, '') + '\n';
|
||||
|
||||
if (!langString) {
|
||||
return '<pre><code>'
|
||||
|
106
src/Tokenizer.ts
106
src/Tokenizer.ts
@ -9,10 +9,10 @@ import type { _Lexer } from './Lexer.ts';
|
||||
import type { Links, Tokens, Token } from './Tokens.ts';
|
||||
import type { MarkedOptions } from './MarkedOptions.ts';
|
||||
|
||||
function outputLink(cap: string[], link: Pick<Tokens.Link, 'href' | 'title'>, raw: string, lexer: _Lexer): Tokens.Link | Tokens.Image {
|
||||
function outputLink(cap: string[], link: Pick<Tokens.Link, 'href' | 'title'>, raw: string, lexer: _Lexer, rules: Rules): Tokens.Link | Tokens.Image {
|
||||
const href = link.href;
|
||||
const title = link.title || null;
|
||||
const text = cap[1].replace(/\\([\[\]])/g, '$1');
|
||||
const text = cap[1].replace(rules.other.outputLinkReplace, '$1');
|
||||
|
||||
if (cap[0].charAt(0) !== '!') {
|
||||
lexer.state.inLink = true;
|
||||
@ -36,8 +36,8 @@ function outputLink(cap: string[], link: Pick<Tokens.Link, 'href' | 'title'>, ra
|
||||
};
|
||||
}
|
||||
|
||||
function indentCodeCompensation(raw: string, text: string) {
|
||||
const matchIndentToCode = raw.match(/^(\s+)(?:```)/);
|
||||
function indentCodeCompensation(raw: string, text: string, rules: Rules) {
|
||||
const matchIndentToCode = raw.match(rules.other.indentCodeCompensation);
|
||||
|
||||
if (matchIndentToCode === null) {
|
||||
return text;
|
||||
@ -48,7 +48,7 @@ function indentCodeCompensation(raw: string, text: string) {
|
||||
return text
|
||||
.split('\n')
|
||||
.map(node => {
|
||||
const matchIndentInNode = node.match(/^\s+/);
|
||||
const matchIndentInNode = node.match(rules.other.beginningSpace);
|
||||
if (matchIndentInNode === null) {
|
||||
return node;
|
||||
}
|
||||
@ -89,7 +89,7 @@ export class _Tokenizer {
|
||||
code(src: string): Tokens.Code | undefined {
|
||||
const cap = this.rules.block.code.exec(src);
|
||||
if (cap) {
|
||||
const text = cap[0].replace(/^(?: {1,4}| {0,3}\t)/gm, '');
|
||||
const text = cap[0].replace(this.rules.other.codeRemoveIndent, '');
|
||||
return {
|
||||
type: 'code',
|
||||
raw: cap[0],
|
||||
@ -105,7 +105,7 @@ export class _Tokenizer {
|
||||
const cap = this.rules.block.fences.exec(src);
|
||||
if (cap) {
|
||||
const raw = cap[0];
|
||||
const text = indentCodeCompensation(raw, cap[3] || '');
|
||||
const text = indentCodeCompensation(raw, cap[3] || '', this.rules);
|
||||
|
||||
return {
|
||||
type: 'code',
|
||||
@ -122,11 +122,11 @@ export class _Tokenizer {
|
||||
let text = cap[2].trim();
|
||||
|
||||
// remove trailing #s
|
||||
if (/#$/.test(text)) {
|
||||
if (this.rules.other.endingHash.test(text)) {
|
||||
const trimmed = rtrim(text, '#');
|
||||
if (this.options.pedantic) {
|
||||
text = trimmed.trim();
|
||||
} else if (!trimmed || / $/.test(trimmed)) {
|
||||
} else if (!trimmed || this.rules.other.endingSpaceChar.test(trimmed)) {
|
||||
// CommonMark requires space before trailing #s
|
||||
text = trimmed.trim();
|
||||
}
|
||||
@ -167,7 +167,7 @@ export class _Tokenizer {
|
||||
let i;
|
||||
for (i = 0; i < lines.length; i++) {
|
||||
// get lines up to a continuation
|
||||
if (/^ {0,3}>/.test(lines[i])) {
|
||||
if (this.rules.other.blockquoteStart.test(lines[i])) {
|
||||
currentLines.push(lines[i]);
|
||||
inBlockquote = true;
|
||||
} else if (!inBlockquote) {
|
||||
@ -181,8 +181,8 @@ export class _Tokenizer {
|
||||
const currentRaw = currentLines.join('\n');
|
||||
const currentText = currentRaw
|
||||
// precede setext continuation with 4 spaces so it isn't a setext
|
||||
.replace(/\n {0,3}((?:=+|-+) *)(?=\n|$)/g, '\n $1')
|
||||
.replace(/^ {0,3}>[ \t]?/gm, '');
|
||||
.replace(this.rules.other.blockquoteSetextReplace, '\n $1')
|
||||
.replace(this.rules.other.blockquoteSetextReplace2, '');
|
||||
raw = raw ? `${raw}\n${currentRaw}` : currentRaw;
|
||||
text = text ? `${text}\n${currentText}` : currentText;
|
||||
|
||||
@ -258,7 +258,7 @@ export class _Tokenizer {
|
||||
}
|
||||
|
||||
// Get next list item
|
||||
const itemRegex = new RegExp(`^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))`);
|
||||
const itemRegex = this.rules.other.listItemRegex(bull);
|
||||
let endsWithBlankLine = false;
|
||||
// Check if current bullet point can start a new List Item
|
||||
while (src) {
|
||||
@ -276,7 +276,7 @@ export class _Tokenizer {
|
||||
raw = cap[0];
|
||||
src = src.substring(raw.length);
|
||||
|
||||
let line = cap[2].split('\n', 1)[0].replace(/^\t+/, (t: string) => ' '.repeat(3 * t.length));
|
||||
let line = cap[2].split('\n', 1)[0].replace(this.rules.other.listReplaceTabs, (t: string) => ' '.repeat(3 * t.length));
|
||||
let nextLine = src.split('\n', 1)[0];
|
||||
let blankLine = !line.trim();
|
||||
|
||||
@ -287,24 +287,24 @@ export class _Tokenizer {
|
||||
} else if (blankLine) {
|
||||
indent = cap[1].length + 1;
|
||||
} else {
|
||||
indent = cap[2].search(/[^ ]/); // Find first non-space char
|
||||
indent = cap[2].search(this.rules.other.nonSpaceChar); // Find first non-space char
|
||||
indent = indent > 4 ? 1 : indent; // Treat indented code blocks (> 4 spaces) as having only 1 indent
|
||||
itemContents = line.slice(indent);
|
||||
indent += cap[1].length;
|
||||
}
|
||||
|
||||
if (blankLine && /^[ \t]*$/.test(nextLine)) { // Items begin with at most one blank line
|
||||
if (blankLine && this.rules.other.blankLine.test(nextLine)) { // Items begin with at most one blank line
|
||||
raw += nextLine + '\n';
|
||||
src = src.substring(nextLine.length + 1);
|
||||
endEarly = true;
|
||||
}
|
||||
|
||||
if (!endEarly) {
|
||||
const nextBulletRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])((?:[ \t][^\\n]*)?(?:\\n|$))`);
|
||||
const hrRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)`);
|
||||
const fencesBeginRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:\`\`\`|~~~)`);
|
||||
const headingBeginRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}#`);
|
||||
const htmlBeginRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}<(?:[a-z].*>|!--)`, 'i');
|
||||
const nextBulletRegex = this.rules.other.nextBulletRegex(indent);
|
||||
const hrRegex = this.rules.other.hrRegex(indent);
|
||||
const fencesBeginRegex = this.rules.other.fencesBeginRegex(indent);
|
||||
const headingBeginRegex = this.rules.other.headingBeginRegex(indent);
|
||||
const htmlBeginRegex = this.rules.other.htmlBeginRegex(indent);
|
||||
|
||||
// Check if following lines should be included in List Item
|
||||
while (src) {
|
||||
@ -314,10 +314,10 @@ export class _Tokenizer {
|
||||
|
||||
// Re-align to follow commonmark nesting rules
|
||||
if (this.options.pedantic) {
|
||||
nextLine = nextLine.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' ');
|
||||
nextLine = nextLine.replace(this.rules.other.listReplaceNesting, ' ');
|
||||
nextLineWithoutTabs = nextLine;
|
||||
} else {
|
||||
nextLineWithoutTabs = nextLine.replace(/\t/g, ' ');
|
||||
nextLineWithoutTabs = nextLine.replace(this.rules.other.tabCharGlobal, ' ');
|
||||
}
|
||||
|
||||
// End list item if found code fences
|
||||
@ -345,7 +345,7 @@ export class _Tokenizer {
|
||||
break;
|
||||
}
|
||||
|
||||
if (nextLineWithoutTabs.search(/[^ ]/) >= indent || !nextLine.trim()) { // Dedent if possible
|
||||
if (nextLineWithoutTabs.search(this.rules.other.nonSpaceChar) >= indent || !nextLine.trim()) { // Dedent if possible
|
||||
itemContents += '\n' + nextLineWithoutTabs.slice(indent);
|
||||
} else {
|
||||
// not enough indentation
|
||||
@ -354,7 +354,7 @@ export class _Tokenizer {
|
||||
}
|
||||
|
||||
// paragraph continuation unless last line was a different block level element
|
||||
if (line.replace(/\t/g, ' ').search(/[^ ]/) >= 4) { // indented code block
|
||||
if (line.replace(this.rules.other.tabCharGlobal, ' ').search(this.rules.other.nonSpaceChar) >= 4) { // indented code block
|
||||
break;
|
||||
}
|
||||
if (fencesBeginRegex.test(line)) {
|
||||
@ -384,7 +384,7 @@ export class _Tokenizer {
|
||||
// If the previous item ended with a blank line, the list is loose
|
||||
if (endsWithBlankLine) {
|
||||
list.loose = true;
|
||||
} else if (/\n[ \t]*\n[ \t]*$/.test(raw)) {
|
||||
} else if (this.rules.other.doubleBlankLine.test(raw)) {
|
||||
endsWithBlankLine = true;
|
||||
}
|
||||
}
|
||||
@ -393,10 +393,10 @@ export class _Tokenizer {
|
||||
let ischecked: boolean | undefined;
|
||||
// Check for task list items
|
||||
if (this.options.gfm) {
|
||||
istask = /^\[[ xX]\] /.exec(itemContents);
|
||||
istask = this.rules.other.listIsTask.exec(itemContents);
|
||||
if (istask) {
|
||||
ischecked = istask[0] !== '[ ] ';
|
||||
itemContents = itemContents.replace(/^\[[ xX]\] +/, '');
|
||||
itemContents = itemContents.replace(this.rules.other.listReplaceTask, '');
|
||||
}
|
||||
}
|
||||
|
||||
@ -426,7 +426,7 @@ export class _Tokenizer {
|
||||
if (!list.loose) {
|
||||
// Check if list should be loose
|
||||
const spacers = list.items[i].tokens.filter(t => t.type === 'space');
|
||||
const hasMultipleLineBreaks = spacers.length > 0 && spacers.some(t => /\n.*\n/.test(t.raw));
|
||||
const hasMultipleLineBreaks = spacers.length > 0 && spacers.some(t => this.rules.other.anyLine.test(t.raw));
|
||||
|
||||
list.loose = hasMultipleLineBreaks;
|
||||
}
|
||||
@ -460,8 +460,8 @@ export class _Tokenizer {
|
||||
def(src: string): Tokens.Def | undefined {
|
||||
const cap = this.rules.block.def.exec(src);
|
||||
if (cap) {
|
||||
const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
|
||||
const href = cap[2] ? cap[2].replace(/^<(.*)>$/, '$1').replace(this.rules.inline.anyPunctuation, '$1') : '';
|
||||
const tag = cap[1].toLowerCase().replace(this.rules.other.multipleSpaceGlobal, ' ');
|
||||
const href = cap[2] ? cap[2].replace(this.rules.other.hrefBrackets, '$1').replace(this.rules.inline.anyPunctuation, '$1') : '';
|
||||
const title = cap[3] ? cap[3].substring(1, cap[3].length - 1).replace(this.rules.inline.anyPunctuation, '$1') : cap[3];
|
||||
return {
|
||||
type: 'def',
|
||||
@ -479,14 +479,14 @@ export class _Tokenizer {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!/[:|]/.test(cap[2])) {
|
||||
if (!this.rules.other.tableDelimiter.test(cap[2])) {
|
||||
// delimiter row must have a pipe (|) or colon (:) otherwise it is a setext heading
|
||||
return;
|
||||
}
|
||||
|
||||
const headers = splitCells(cap[1]);
|
||||
const aligns = cap[2].replace(/^\||\| *$/g, '').split('|');
|
||||
const rows = cap[3] && cap[3].trim() ? cap[3].replace(/\n[ \t]*$/, '').split('\n') : [];
|
||||
const aligns = cap[2].replace(this.rules.other.tableAlignChars, '').split('|');
|
||||
const rows = cap[3] && cap[3].trim() ? cap[3].replace(this.rules.other.tableRowBlankLine, '').split('\n') : [];
|
||||
|
||||
const item: Tokens.Table = {
|
||||
type: 'table',
|
||||
@ -502,11 +502,11 @@ export class _Tokenizer {
|
||||
}
|
||||
|
||||
for (const align of aligns) {
|
||||
if (/^ *-+: *$/.test(align)) {
|
||||
if (this.rules.other.tableAlignRight.test(align)) {
|
||||
item.align.push('right');
|
||||
} else if (/^ *:-+: *$/.test(align)) {
|
||||
} else if (this.rules.other.tableAlignCenter.test(align)) {
|
||||
item.align.push('center');
|
||||
} else if (/^ *:-+ *$/.test(align)) {
|
||||
} else if (this.rules.other.tableAlignLeft.test(align)) {
|
||||
item.align.push('left');
|
||||
} else {
|
||||
item.align.push(null);
|
||||
@ -590,14 +590,14 @@ export class _Tokenizer {
|
||||
tag(src: string): Tokens.Tag | undefined {
|
||||
const cap = this.rules.inline.tag.exec(src);
|
||||
if (cap) {
|
||||
if (!this.lexer.state.inLink && /^<a /i.test(cap[0])) {
|
||||
if (!this.lexer.state.inLink && this.rules.other.startATag.test(cap[0])) {
|
||||
this.lexer.state.inLink = true;
|
||||
} else if (this.lexer.state.inLink && /^<\/a>/i.test(cap[0])) {
|
||||
} else if (this.lexer.state.inLink && this.rules.other.endATag.test(cap[0])) {
|
||||
this.lexer.state.inLink = false;
|
||||
}
|
||||
if (!this.lexer.state.inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
|
||||
if (!this.lexer.state.inRawBlock && this.rules.other.startPreScriptTag.test(cap[0])) {
|
||||
this.lexer.state.inRawBlock = true;
|
||||
} else if (this.lexer.state.inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) {
|
||||
} else if (this.lexer.state.inRawBlock && this.rules.other.endPreScriptTag.test(cap[0])) {
|
||||
this.lexer.state.inRawBlock = false;
|
||||
}
|
||||
|
||||
@ -616,9 +616,9 @@ export class _Tokenizer {
|
||||
const cap = this.rules.inline.link.exec(src);
|
||||
if (cap) {
|
||||
const trimmedUrl = cap[2].trim();
|
||||
if (!this.options.pedantic && /^</.test(trimmedUrl)) {
|
||||
if (!this.options.pedantic && this.rules.other.startAngleBracket.test(trimmedUrl)) {
|
||||
// commonmark requires matching angle brackets
|
||||
if (!(/>$/.test(trimmedUrl))) {
|
||||
if (!(this.rules.other.endAngleBracket.test(trimmedUrl))) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -642,7 +642,7 @@ export class _Tokenizer {
|
||||
let title = '';
|
||||
if (this.options.pedantic) {
|
||||
// split pedantic href and title
|
||||
const link = /^([^'"]*[^\s])\s+(['"])(.*)\2/.exec(href);
|
||||
const link = this.rules.other.pedanticHrefTitle.exec(href);
|
||||
|
||||
if (link) {
|
||||
href = link[1];
|
||||
@ -653,8 +653,8 @@ export class _Tokenizer {
|
||||
}
|
||||
|
||||
href = href.trim();
|
||||
if (/^</.test(href)) {
|
||||
if (this.options.pedantic && !(/>$/.test(trimmedUrl))) {
|
||||
if (this.rules.other.startAngleBracket.test(href)) {
|
||||
if (this.options.pedantic && !(this.rules.other.endAngleBracket.test(trimmedUrl))) {
|
||||
// pedantic allows starting angle bracket without ending angle bracket
|
||||
href = href.slice(1);
|
||||
} else {
|
||||
@ -664,7 +664,7 @@ export class _Tokenizer {
|
||||
return outputLink(cap, {
|
||||
href: href ? href.replace(this.rules.inline.anyPunctuation, '$1') : href,
|
||||
title: title ? title.replace(this.rules.inline.anyPunctuation, '$1') : title,
|
||||
}, cap[0], this.lexer);
|
||||
}, cap[0], this.lexer, this.rules);
|
||||
}
|
||||
}
|
||||
|
||||
@ -672,7 +672,7 @@ export class _Tokenizer {
|
||||
let cap;
|
||||
if ((cap = this.rules.inline.reflink.exec(src))
|
||||
|| (cap = this.rules.inline.nolink.exec(src))) {
|
||||
const linkString = (cap[2] || cap[1]).replace(/\s+/g, ' ');
|
||||
const linkString = (cap[2] || cap[1]).replace(this.rules.other.multipleSpaceGlobal, ' ');
|
||||
const link = links[linkString.toLowerCase()];
|
||||
if (!link) {
|
||||
const text = cap[0].charAt(0);
|
||||
@ -682,7 +682,7 @@ export class _Tokenizer {
|
||||
text,
|
||||
};
|
||||
}
|
||||
return outputLink(cap, link, cap[0], this.lexer);
|
||||
return outputLink(cap, link, cap[0], this.lexer, this.rules);
|
||||
}
|
||||
}
|
||||
|
||||
@ -691,7 +691,7 @@ export class _Tokenizer {
|
||||
if (!match) return;
|
||||
|
||||
// _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well
|
||||
if (match[3] && prevChar.match(/[\p{L}\p{N}]/u)) return;
|
||||
if (match[3] && prevChar.match(this.rules.other.unicodeAlphaNumeric)) return;
|
||||
|
||||
const nextChar = match[1] || match[2] || '';
|
||||
|
||||
@ -759,9 +759,9 @@ export class _Tokenizer {
|
||||
codespan(src: string): Tokens.Codespan | undefined {
|
||||
const cap = this.rules.inline.code.exec(src);
|
||||
if (cap) {
|
||||
let text = cap[2].replace(/\n/g, ' ');
|
||||
const hasNonSpaceChars = /[^ ]/.test(text);
|
||||
const hasSpaceCharsOnBothEnds = /^ /.test(text) && / $/.test(text);
|
||||
let text = cap[2].replace(this.rules.other.newLineCharGlobal, ' ');
|
||||
const hasNonSpaceChars = this.rules.other.nonSpaceChar.test(text);
|
||||
const hasSpaceCharsOnBothEnds = this.rules.other.startingSpaceChar.test(text) && this.rules.other.endingSpaceChar.test(text);
|
||||
if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) {
|
||||
text = text.substring(1, text.length - 1);
|
||||
}
|
||||
|
@ -1,10 +1,8 @@
|
||||
import { other } from './rules.ts';
|
||||
|
||||
/**
|
||||
* Helpers
|
||||
*/
|
||||
const escapeTest = /[&<>"']/;
|
||||
const escapeReplace = new RegExp(escapeTest.source, 'g');
|
||||
const escapeTestNoEncode = /[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/;
|
||||
const escapeReplaceNoEncode = new RegExp(escapeTestNoEncode.source, 'g');
|
||||
const escapeReplacements: { [index: string]: string } = {
|
||||
'&': '&',
|
||||
'<': '<',
|
||||
@ -16,23 +14,21 @@ const getEscapeReplacement = (ch: string) => escapeReplacements[ch];
|
||||
|
||||
export function escape(html: string, encode?: boolean) {
|
||||
if (encode) {
|
||||
if (escapeTest.test(html)) {
|
||||
return html.replace(escapeReplace, getEscapeReplacement);
|
||||
if (other.escapeTest.test(html)) {
|
||||
return html.replace(other.escapeReplace, getEscapeReplacement);
|
||||
}
|
||||
} else {
|
||||
if (escapeTestNoEncode.test(html)) {
|
||||
return html.replace(escapeReplaceNoEncode, getEscapeReplacement);
|
||||
if (other.escapeTestNoEncode.test(html)) {
|
||||
return html.replace(other.escapeReplaceNoEncode, getEscapeReplacement);
|
||||
}
|
||||
}
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
const unescapeTest = /&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/ig;
|
||||
|
||||
export function unescape(html: string) {
|
||||
// explicitly match decimal, hex, and named HTML entities
|
||||
return html.replace(unescapeTest, (_, n) => {
|
||||
return html.replace(other.unescapeTest, (_, n) => {
|
||||
n = n.toLowerCase();
|
||||
if (n === 'colon') return ':';
|
||||
if (n.charAt(0) === '#') {
|
||||
@ -44,40 +40,19 @@ export function unescape(html: string) {
|
||||
});
|
||||
}
|
||||
|
||||
const caret = /(^|[^\[])\^/g;
|
||||
|
||||
export function edit(regex: string | RegExp, opt?: string) {
|
||||
let source = typeof regex === 'string' ? regex : regex.source;
|
||||
opt = opt || '';
|
||||
const obj = {
|
||||
replace: (name: string | RegExp, val: string | RegExp) => {
|
||||
let valSource = typeof val === 'string' ? val : val.source;
|
||||
valSource = valSource.replace(caret, '$1');
|
||||
source = source.replace(name, valSource);
|
||||
return obj;
|
||||
},
|
||||
getRegex: () => {
|
||||
return new RegExp(source, opt);
|
||||
},
|
||||
};
|
||||
return obj;
|
||||
}
|
||||
|
||||
export function cleanUrl(href: string) {
|
||||
try {
|
||||
href = encodeURI(href).replace(/%25/g, '%');
|
||||
href = encodeURI(href).replace(other.percentDecode, '%');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
return href;
|
||||
}
|
||||
|
||||
export const noopTest = { exec: () => null } as unknown as RegExp;
|
||||
|
||||
export function splitCells(tableRow: string, count?: number) {
|
||||
// ensure that every cell-delimiting pipe has a space
|
||||
// before it to distinguish it from an escaped pipe
|
||||
const row = tableRow.replace(/\|/g, (match, offset, str) => {
|
||||
const row = tableRow.replace(other.findPipe, (match, offset, str) => {
|
||||
let escaped = false;
|
||||
let curr = offset;
|
||||
while (--curr >= 0 && str[curr] === '\\') escaped = !escaped;
|
||||
@ -90,7 +65,7 @@ export function splitCells(tableRow: string, count?: number) {
|
||||
return ' |';
|
||||
}
|
||||
}),
|
||||
cells = row.split(/ \|/);
|
||||
cells = row.split(other.splitPipe);
|
||||
let i = 0;
|
||||
|
||||
// First/last cell in a row cannot be empty if it has no leading/trailing pipe
|
||||
@ -111,7 +86,7 @@ export function splitCells(tableRow: string, count?: number) {
|
||||
|
||||
for (; i < cells.length; i++) {
|
||||
// leading or trailing whitespace is ignored per the gfm spec
|
||||
cells[i] = cells[i].trim().replace(/\\\|/g, '|');
|
||||
cells[i] = cells[i].trim().replace(other.slashPipe, '|');
|
||||
}
|
||||
return cells;
|
||||
}
|
||||
|
80
src/rules.ts
80
src/rules.ts
@ -1,6 +1,79 @@
|
||||
import {
|
||||
edit, noopTest,
|
||||
} from './helpers.ts';
|
||||
const noopTest = { exec: () => null } as unknown as RegExp;
|
||||
|
||||
function edit(regex: string | RegExp, opt = '') {
|
||||
let source = typeof regex === 'string' ? regex : regex.source;
|
||||
const obj = {
|
||||
replace: (name: string | RegExp, val: string | RegExp) => {
|
||||
let valSource = typeof val === 'string' ? val : val.source;
|
||||
valSource = valSource.replace(other.caret, '$1');
|
||||
source = source.replace(name, valSource);
|
||||
return obj;
|
||||
},
|
||||
getRegex: () => {
|
||||
return new RegExp(source, opt);
|
||||
},
|
||||
};
|
||||
return obj;
|
||||
}
|
||||
|
||||
export const other = {
|
||||
codeRemoveIndent: /^(?: {1,4}| {0,3}\t)/gm,
|
||||
outputLinkReplace: /\\([\[\]])/g,
|
||||
indentCodeCompensation: /^(\s+)(?:```)/,
|
||||
beginningSpace: /^\s+/,
|
||||
endingHash: /#$/,
|
||||
startingSpaceChar: /^ /,
|
||||
endingSpaceChar: / $/,
|
||||
nonSpaceChar: /[^ ]/,
|
||||
newLineCharGlobal: /\n/g,
|
||||
tabCharGlobal: /\t/g,
|
||||
multipleSpaceGlobal: /\s+/g,
|
||||
blankLine: /^[ \t]*$/,
|
||||
doubleBlankLine: /\n[ \t]*\n[ \t]*$/,
|
||||
blockquoteStart: /^ {0,3}>/,
|
||||
blockquoteSetextReplace: /\n {0,3}((?:=+|-+) *)(?=\n|$)/g,
|
||||
blockquoteSetextReplace2: /^ {0,3}>[ \t]?/gm,
|
||||
listReplaceTabs: /^\t+/,
|
||||
listReplaceNesting: /^ {1,4}(?=( {4})*[^ ])/g,
|
||||
listIsTask: /^\[[ xX]\] /,
|
||||
listReplaceTask: /^\[[ xX]\] +/,
|
||||
anyLine: /\n.*\n/,
|
||||
hrefBrackets: /^<(.*)>$/,
|
||||
tableDelimiter: /[:|]/,
|
||||
tableAlignChars: /^\||\| *$/g,
|
||||
tableRowBlankLine: /\n[ \t]*$/,
|
||||
tableAlignRight: /^ *-+: *$/,
|
||||
tableAlignCenter: /^ *:-+: *$/,
|
||||
tableAlignLeft: /^ *:-+ *$/,
|
||||
startATag: /^<a /i,
|
||||
endATag: /^<\/a>/i,
|
||||
startPreScriptTag: /^<(pre|code|kbd|script)(\s|>)/i,
|
||||
endPreScriptTag: /^<\/(pre|code|kbd|script)(\s|>)/i,
|
||||
startAngleBracket: /^</,
|
||||
endAngleBracket: />$/,
|
||||
pedanticHrefTitle: /^([^'"]*[^\s])\s+(['"])(.*)\2/,
|
||||
unicodeAlphaNumeric: /[\p{L}\p{N}]/u,
|
||||
escapeTest: /[&<>"']/,
|
||||
escapeReplace: /[&<>"']/g,
|
||||
escapeTestNoEncode: /[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/,
|
||||
escapeReplaceNoEncode: /[<>"']|&(?!(#\d{1,7}|#[Xx][a-fA-F0-9]{1,6}|\w+);)/g,
|
||||
unescapeTest: /&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/ig,
|
||||
caret: /(^|[^\[])\^/g,
|
||||
percentDecode: /%25/g,
|
||||
findPipe: /\|/g,
|
||||
splitPipe: / \|/,
|
||||
slashPipe: /\\\|/g,
|
||||
carriageReturn: /\r\n|\r/g,
|
||||
spaceLine: /^ +$/gm,
|
||||
notSpaceStart: /^\S*/,
|
||||
endingNewline: /\n$/,
|
||||
listItemRegex: (bull: string) => new RegExp(`^( {0,3}${bull})((?:[\t ][^\\n]*)?(?:\\n|$))`),
|
||||
nextBulletRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])((?:[ \t][^\\n]*)?(?:\\n|$))`),
|
||||
hrRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}((?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$)`),
|
||||
fencesBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:\`\`\`|~~~)`),
|
||||
headingBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}#`),
|
||||
htmlBeginRegex: (indent: number) => new RegExp(`^ {0,${Math.min(3, indent - 1)}}<(?:[a-z].*>|!--)`, 'i'),
|
||||
};
|
||||
|
||||
/**
|
||||
* Block-Level Grammar
|
||||
@ -336,6 +409,7 @@ export const inline = {
|
||||
};
|
||||
|
||||
export interface Rules {
|
||||
other: typeof other
|
||||
block: Record<BlockKeys, RegExp>
|
||||
inline: Record<InlineKeys, RegExp>
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user