fix: Simplify unicode punctuation (#2841)
This commit is contained in:
parent
5c9447139e
commit
f19fe76db9
@ -348,8 +348,8 @@ export class Lexer {
|
||||
}
|
||||
|
||||
// Mask out escaped characters
|
||||
while ((match = this.tokenizer.rules.inline.escapedPunct.exec(maskedSrc)) != null) {
|
||||
maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.escapedPunct.lastIndex);
|
||||
while ((match = this.tokenizer.rules.inline.anyPunctuation.exec(maskedSrc)) != null) {
|
||||
maskedSrc = maskedSrc.slice(0, match.index) + '++' + maskedSrc.slice(this.tokenizer.rules.inline.anyPunctuation.lastIndex);
|
||||
}
|
||||
|
||||
while (src) {
|
||||
|
@ -613,7 +613,7 @@ export class Tokenizer {
|
||||
|
||||
const nextChar = match[1] || match[2] || '';
|
||||
|
||||
if (!nextChar || (nextChar && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) {
|
||||
if (!nextChar || !prevChar || this.rules.inline.punctuation.exec(prevChar)) {
|
||||
const lLength = match[0].length - 1;
|
||||
let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0;
|
||||
|
||||
|
33
src/rules.js
33
src/rules.js
@ -172,48 +172,49 @@ export const inline = {
|
||||
nolink: /^!?\[(ref)\](?:\[\])?/,
|
||||
reflinkSearch: 'reflink|nolink(?!\\()',
|
||||
emStrong: {
|
||||
lDelim: /^(?:\*+(?:([punct_])|[^\s*]))|^_+(?:([punct*])|([^\s_]))/,
|
||||
lDelim: /^(?:\*+(?:((?!\*)[punct])|[^\s*]))|^_+(?:((?!_)[punct])|([^\s_]))/,
|
||||
// (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right.
|
||||
// () Skip orphan inside strong () Consume to delim (1) #*** (2) a***#, a*** (3) #***a, ***a (4) ***# (5) #***# (6) a***a
|
||||
rDelimAst: /^[^_*]*?\_\_[^_*]*?\*[^_*]*?(?=\_\_)|[^*]+(?=[^*])|[punct_](\*+)(?=[\s]|$)|[^punct*_\s](\*+)(?=[punct_\s]|$)|[punct_\s](\*+)(?=[^punct*_\s])|[\s](\*+)(?=[punct_])|[punct_](\*+)(?=[punct_])|[^punct*_\s](\*+)(?=[^punct*_\s])/,
|
||||
rDelimUnd: /^[^_*]*?\*\*[^_*]*?\_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|[punct*](\_+)(?=[\s]|$)|[^punct*_\s](\_+)(?=[punct*\s]|$)|[punct*\s](\_+)(?=[^punct*_\s])|[\s](\_+)(?=[punct*])|[punct*](\_+)(?=[punct*])/ // ^- Not allowed for _
|
||||
// | Skip orphan inside strong | Consume to delim | (1) #*** | (2) a***#, a*** | (3) #***a, ***a | (4) ***# | (5) #***# | (6) a***a
|
||||
rDelimAst: /^[^_*]*?__[^_*]*?\*[^_*]*?(?=__)|[^*]+(?=[^*])|(?!\*)[punct](\*+)(?=[\s]|$)|[^punct\s](\*+)(?!\*)(?=[punct\s]|$)|(?!\*)[punct\s](\*+)(?=[^punct\s])|[\s](\*+)(?!\*)(?=[punct])|(?!\*)[punct](\*+)(?!\*)(?=[punct])|[^punct\s](\*+)(?=[^punct\s])/,
|
||||
rDelimUnd: /^[^_*]*?\*\*[^_*]*?_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|(?!_)[punct](_+)(?=[\s]|$)|[^punct\s](_+)(?!_)(?=[punct\s]|$)|(?!_)[punct\s](_+)(?=[^punct\s])|[\s](_+)(?!_)(?=[punct])|(?!_)[punct](_+)(?!_)(?=[punct])/ // ^- Not allowed for _
|
||||
},
|
||||
code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
|
||||
br: /^( {2,}|\\)\n(?!\s*$)/,
|
||||
del: noopTest,
|
||||
text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\<!\[`*_]|\b_|$)|[^ ](?= {2,}\n)))/,
|
||||
punctuation: /^([\spunctuation])/
|
||||
punctuation: /^((?![*_])[\spunctuation])/
|
||||
};
|
||||
|
||||
// list of punctuation marks from CommonMark spec
|
||||
// without * and _ to handle the different emphasis markers * and _
|
||||
inline._uc_punctuation = '\\u00A1\\u00A7\\u00AB\\u00B6\\u00B7\\u00BB\\u00BF\\u037E\\u0387\\u055A-\\u055F\\u0589\\u058A\\u05BE\\u05C0\\u05C3\\u05C6\\u05F3\\u05F4\\u0609\\u060A\\u060C\\u060D\\u061B\\u061E\\u061F\\u066A-\\u066D\\u06D4\\u0700-\\u070D\\u07F7-\\u07F9\\u0830-\\u083E\\u085E\\u0964\\u0965\\u0970\\u0AF0\\u0DF4\\u0E4F\\u0E5A\\u0E5B\\u0F04-\\u0F12\\u0F14\\u0F3A-\\u0F3D\\u0F85\\u0FD0-\\u0FD4\\u0FD9\\u0FDA\\u104A-\\u104F\\u10FB\\u1360-\\u1368\\u1400\\u166D\\u166E\\u169B\\u169C\\u16EB-\\u16ED\\u1735\\u1736\\u17D4-\\u17D6\\u17D8-\\u17DA\\u1800-\\u180A\\u1944\\u1945\\u1A1E\\u1A1F\\u1AA0-\\u1AA6\\u1AA8-\\u1AAD\\u1B5A-\\u1B60\\u1BFC-\\u1BFF\\u1C3B-\\u1C3F\\u1C7E\\u1C7F\\u1CC0-\\u1CC7\\u1CD3\\u2010-\\u2027\\u2030-\\u2043\\u2045-\\u2051\\u2053-\\u205E\\u207D\\u207E\\u208D\\u208E\\u2308-\\u230B\\u2329\\u232A\\u2768-\\u2775\\u27C5\\u27C6\\u27E6-\\u27EF\\u2983-\\u2998\\u29D8-\\u29DB\\u29FC\\u29FD\\u2CF9-\\u2CFC\\u2CFE\\u2CFF\\u2D70\\u2E00-\\u2E2E\\u2E30-\\u2E42\\u3001-\\u3003\\u3008-\\u3011\\u3014-\\u301F\\u3030\\u303D\\u30A0\\u30FB\\uA4FE\\uA4FF\\uA60D-\\uA60F\\uA673\\uA67E\\uA6F2-\\uA6F7\\uA874-\\uA877\\uA8CE\\uA8CF\\uA8F8-\\uA8FA\\uA8FC\\uA92E\\uA92F\\uA95F\\uA9C1-\\uA9CD\\uA9DE\\uA9DF\\uAA5C-\\uAA5F\\uAADE\\uAADF\\uAAF0\\uAAF1\\uABEB\\uFD3E\\uFD3F\\uFE10-\\uFE19\\uFE30-\\uFE52\\uFE54-\\uFE61\\uFE63\\uFE68\\uFE6A\\uFE6B\\uFF01-\\uFF03\\uFF05-\\uFF0A\\uFF0C-\\uFF0F\\uFF1A\\uFF1B\\uFF1F\\uFF20\\uFF3B-\\uFF3D\\uFF3F\\uFF5B\\uFF5D\\uFF5F-\\uFF65';
|
||||
inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~\\\\' + inline._uc_punctuation;
|
||||
inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex();
|
||||
// list of unicode punctuation marks, plus any missing characters from CommonMark spec
|
||||
inline._punctuation = '\\p{P}$+<=>`^|~';
|
||||
inline.punctuation = edit(inline.punctuation, 'u').replace(/punctuation/g, inline._punctuation).getRegex();
|
||||
|
||||
// sequences em should skip over [title](link), `code`, <html>
|
||||
inline.blockSkip = /\[[^[\]]*?\]\([^\(\)]*?\)|`[^`]*?`|<[^<>]*?>/g;
|
||||
inline.escapedPunct = /\\[punct_*]/g;
|
||||
inline.anyPunctuation = /\\[punct]/g;
|
||||
inline._escapes = /\\([punct])/g;
|
||||
|
||||
inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex();
|
||||
|
||||
inline.emStrong.lDelim = edit(inline.emStrong.lDelim)
|
||||
inline.emStrong.lDelim = edit(inline.emStrong.lDelim, 'u')
|
||||
.replace(/punct/g, inline._punctuation)
|
||||
.getRegex();
|
||||
|
||||
inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'g')
|
||||
inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst, 'gu')
|
||||
.replace(/punct/g, inline._punctuation)
|
||||
.getRegex();
|
||||
|
||||
inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'g')
|
||||
inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd, 'gu')
|
||||
.replace(/punct/g, inline._punctuation)
|
||||
.getRegex();
|
||||
|
||||
inline.escapedPunct = edit(inline.escapedPunct, 'g')
|
||||
inline.anyPunctuation = edit(inline.anyPunctuation, 'gu')
|
||||
.replace(/punct/g, inline._punctuation)
|
||||
.getRegex();
|
||||
|
||||
inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
|
||||
inline._escapes = edit(inline._escapes, 'gu')
|
||||
.replace(/punct/g, inline._punctuation)
|
||||
.getRegex();
|
||||
|
||||
inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
|
||||
inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/;
|
||||
|
Loading…
x
Reference in New Issue
Block a user