feat: add async option (#2474)

* fix: return values from walkTokens

* docs: add async docs

* test: add async test

* docs: add nav to async

* Update docs/USING_PRO.md

Co-authored-by: Steven <steven@ceriously.com>

* test: expect promise

* Update docs/USING_ADVANCED.md

Co-authored-by: Steven <steven@ceriously.com>

Co-authored-by: Steven <steven@ceriously.com>
This commit is contained in:
Tony Brix 2022-08-30 09:36:16 -05:00 committed by GitHub
parent 33724a3201
commit 994b2e6127
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 215 additions and 171 deletions

View File

@ -44,6 +44,7 @@ console.log(marked.parse(markdownString));
|Member |Type |Default |Since |Notes |
|:-----------|:---------|:--------|:--------|:-------------|
|async |`boolean` |`false` |4.1.0 |If true, `walkTokens` functions can be async and `marked.parse` will return a promise that resolves when all walk tokens functions resolve.|
|baseUrl |`string` |`null` |0.3.9 |A prefix url for any relative link. |
|breaks |`boolean` |`false` |v0.2.7 |If true, add `<br>` on a single line break (copies GitHub behavior on comments, but not on rendered markdown files). Requires `gfm` be `true`.|
|gfm |`boolean` |`true` |v0.2.1 |If true, use approved [GitHub Flavored Markdown (GFM) specification](https://github.github.com/gfm/).|

View File

@ -438,6 +438,78 @@ console.log(marked.parse('A Description List:\n'
***
<h2 id="async">Async Marked : <code>async</code></h2>
Marked will return a promise if the `async` option is true. The `async` option will tell marked to await any `walkTokens` functions before parsing the tokens and returning an HTML string.
Simple Example:
```js
const walkTokens = async (token) => {
if (token.type === 'link') {
try {
await fetch(token.href);
} catch (ex) {
token.title = 'invalid';
}
}
};
marked.use({ walkTokens, async: true });
const markdown = `
[valid link](https://example.com)
[invalid link](https://invalidurl.com)
`;
const html = await marked.parse(markdown);
```
Custom Extension Example:
```js
const importUrl = {
extensions: [{
name: 'importUrl',
level: 'block',
start(src) { return src.indexOf('\n:'); },
tokenizer(src) {
const rule = /^:(https?:\/\/.+?):/;
const match = rule.exec(src);
if (match) {
return {
type: 'importUrl',
raw: match[0],
url: match[1],
html: '' // will be replaced in walkTokens
};
}
},
renderer(token) {
return token.html;
}
}],
async: true, // needed to tell marked to return a promise
async walkTokens(token) {
if (token.type === 'importUrl') {
const res = await fetch(token.url);
token.html = await res.text();
}
}
};
marked.use(importUrl);
const markdown = `
# example.com
:https://example.com:
`;
const html = await marked.parse(markdown);
```
<h2 id="lexer">The Lexer</h2>
The lexer takes a markdown string and calls the tokenizer functions.

View File

@ -51,6 +51,7 @@
<li><a href="/using_pro#tokenizer">Tokenizer</a></li>
<li><a href="/using_pro#walk-tokens">Walk Tokens</a></li>
<li><a href="/using_pro#extensions">Custom Extensions</a></li>
<li><a href="/using_pro#async">Async Marked</a></li>
<li><a href="/using_pro#lexer">Lexer</a></li>
<li><a href="/using_pro#parser">Parser</a></li>
</ul>

View File

@ -19,7 +19,7 @@ function outputLink(cap, link, raw, lexer) {
href,
title,
text,
tokens: lexer.inlineTokens(text, [])
tokens: lexer.inlineTokens(text)
};
lexer.state.inLink = false;
return token;
@ -125,15 +125,13 @@ export class Tokenizer {
}
}
const token = {
return {
type: 'heading',
raw: cap[0],
depth: cap[1].length,
text,
tokens: []
tokens: this.lexer.inline(text)
};
this.lexer.inline(token.text, token.tokens);
return token;
}
}
@ -355,10 +353,10 @@ export class Tokenizer {
text: cap[0]
};
if (this.options.sanitize) {
const text = this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0]);
token.type = 'paragraph';
token.text = this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0]);
token.tokens = [];
this.lexer.inline(token.text, token.tokens);
token.text = text;
token.tokens = this.lexer.inline(text);
}
return token;
}
@ -416,8 +414,7 @@ export class Tokenizer {
// header child tokens
l = item.header.length;
for (j = 0; j < l; j++) {
item.header[j].tokens = [];
this.lexer.inline(item.header[j].text, item.header[j].tokens);
item.header[j].tokens = this.lexer.inline(item.header[j].text);
}
// cell child tokens
@ -425,8 +422,7 @@ export class Tokenizer {
for (j = 0; j < l; j++) {
row = item.rows[j];
for (k = 0; k < row.length; k++) {
row[k].tokens = [];
this.lexer.inline(row[k].text, row[k].tokens);
row[k].tokens = this.lexer.inline(row[k].text);
}
}
@ -438,45 +434,40 @@ export class Tokenizer {
lheading(src) {
const cap = this.rules.block.lheading.exec(src);
if (cap) {
const token = {
return {
type: 'heading',
raw: cap[0],
depth: cap[2].charAt(0) === '=' ? 1 : 2,
text: cap[1],
tokens: []
tokens: this.lexer.inline(cap[1])
};
this.lexer.inline(token.text, token.tokens);
return token;
}
}
paragraph(src) {
const cap = this.rules.block.paragraph.exec(src);
if (cap) {
const token = {
const text = cap[1].charAt(cap[1].length - 1) === '\n'
? cap[1].slice(0, -1)
: cap[1];
return {
type: 'paragraph',
raw: cap[0],
text: cap[1].charAt(cap[1].length - 1) === '\n'
? cap[1].slice(0, -1)
: cap[1],
tokens: []
text,
tokens: this.lexer.inline(text)
};
this.lexer.inline(token.text, token.tokens);
return token;
}
}
text(src) {
const cap = this.rules.block.text.exec(src);
if (cap) {
const token = {
return {
type: 'text',
raw: cap[0],
text: cap[0],
tokens: []
tokens: this.lexer.inline(cap[0])
};
this.lexer.inline(token.text, token.tokens);
return token;
}
}
@ -645,7 +636,7 @@ export class Tokenizer {
type: 'em',
raw: src.slice(0, lLength + match.index + rLength + 1),
text,
tokens: this.lexer.inlineTokens(text, [])
tokens: this.lexer.inlineTokens(text)
};
}
@ -655,7 +646,7 @@ export class Tokenizer {
type: 'strong',
raw: src.slice(0, lLength + match.index + rLength + 1),
text,
tokens: this.lexer.inlineTokens(text, [])
tokens: this.lexer.inlineTokens(text)
};
}
}
@ -696,7 +687,7 @@ export class Tokenizer {
type: 'del',
raw: cap[0],
text: cap[2],
tokens: this.lexer.inlineTokens(cap[2], [])
tokens: this.lexer.inlineTokens(cap[2])
};
}
}

View File

@ -1,5 +1,6 @@
export function getDefaults() {
return {
async: false,
baseUrl: null,
breaks: false,
extensions: null,

View File

@ -105,13 +105,7 @@ export function marked(src, opt, callback) {
return;
}
try {
const tokens = Lexer.lex(src, opt);
if (opt.walkTokens) {
marked.walkTokens(tokens, opt.walkTokens);
}
return Parser.parse(tokens, opt);
} catch (e) {
function onError(e) {
e.message += '\nPlease report this to https://github.com/markedjs/marked.';
if (opt.silent) {
return '<p>An error occurred:</p><pre>'
@ -120,6 +114,23 @@ export function marked(src, opt, callback) {
}
throw e;
}
try {
const tokens = Lexer.lex(src, opt);
if (opt.walkTokens) {
if (opt.async) {
return Promise.all(marked.walkTokens(tokens, opt.walkTokens))
.then(() => {
return Parser.parse(tokens, opt);
})
.catch(onError);
}
marked.walkTokens(tokens, opt.walkTokens);
}
return Parser.parse(tokens, opt);
} catch (e) {
onError(e);
}
}
/**
@ -236,10 +247,12 @@ marked.use = function(...args) {
if (pack.walkTokens) {
const walkTokens = marked.defaults.walkTokens;
opts.walkTokens = function(token) {
pack.walkTokens.call(this, token);
let values = [];
values.push(pack.walkTokens.call(this, token));
if (walkTokens) {
walkTokens.call(this, token);
values = values.concat(walkTokens.call(this, token));
}
return values;
};
}
@ -256,35 +269,37 @@ marked.use = function(...args) {
*/
marked.walkTokens = function(tokens, callback) {
let values = [];
for (const token of tokens) {
callback.call(marked, token);
values = values.concat(callback.call(marked, token));
switch (token.type) {
case 'table': {
for (const cell of token.header) {
marked.walkTokens(cell.tokens, callback);
values = values.concat(marked.walkTokens(cell.tokens, callback));
}
for (const row of token.rows) {
for (const cell of row) {
marked.walkTokens(cell.tokens, callback);
values = values.concat(marked.walkTokens(cell.tokens, callback));
}
}
break;
}
case 'list': {
marked.walkTokens(token.items, callback);
values = values.concat(marked.walkTokens(token.items, callback));
break;
}
default: {
if (marked.defaults.extensions && marked.defaults.extensions.childTokens && marked.defaults.extensions.childTokens[token.type]) { // Walk any extensions
marked.defaults.extensions.childTokens[token.type].forEach(function(childTokens) {
marked.walkTokens(token[childTokens], callback);
values = values.concat(marked.walkTokens(token[childTokens], callback));
});
} else if (token.tokens) {
marked.walkTokens(token.tokens, callback);
values = values.concat(marked.walkTokens(token.tokens, callback));
}
}
}
}
return values;
};
/**

196
test/bench.js vendored
View File

@ -3,6 +3,7 @@ import { fileURLToPath } from 'url';
import { isEqual } from './helpers/html-differ.js';
import { loadFiles } from './helpers/load.js';
import { marked as cjsMarked } from '../lib/marked.cjs';
import { marked as esmMarked } from '../lib/marked.esm.js';
const __dirname = dirname(fileURLToPath(import.meta.url));
@ -30,9 +31,10 @@ export function load() {
export async function runBench(options) {
options = options || {};
const specs = load();
const tests = {};
// Non-GFM, Non-pedantic
marked.setOptions({
cjsMarked.setOptions({
gfm: false,
breaks: false,
pedantic: false,
@ -40,9 +42,9 @@ export async function runBench(options) {
smartLists: false
});
if (options.marked) {
marked.setOptions(options.marked);
cjsMarked.setOptions(options.marked);
}
await bench('cjs marked', specs, marked.parse);
tests['cjs marked'] = cjsMarked.parse;
esmMarked.setOptions({
gfm: false,
@ -54,113 +56,76 @@ export async function runBench(options) {
if (options.marked) {
esmMarked.setOptions(options.marked);
}
await bench('esm marked', specs, esmMarked.parse);
tests['esm marked'] = esmMarked.parse;
// GFM
marked.setOptions({
gfm: true,
breaks: false,
pedantic: false,
sanitize: false,
smartLists: false
});
if (options.marked) {
marked.setOptions(options.marked);
}
await bench('cjs marked (gfm)', specs, marked.parse);
esmMarked.setOptions({
gfm: true,
breaks: false,
pedantic: false,
sanitize: false,
smartLists: false
});
if (options.marked) {
esmMarked.setOptions(options.marked);
}
await bench('esm marked (gfm)', specs, esmMarked.parse);
// Pedantic
marked.setOptions({
gfm: false,
breaks: false,
pedantic: true,
sanitize: false,
smartLists: false
});
if (options.marked) {
marked.setOptions(options.marked);
}
await bench('cjs marked (pedantic)', specs, marked.parse);
esmMarked.setOptions({
gfm: false,
breaks: false,
pedantic: true,
sanitize: false,
smartLists: false
});
if (options.marked) {
esmMarked.setOptions(options.marked);
}
await bench('esm marked (pedantic)', specs, esmMarked.parse);
// esmMarked.setOptions({
// gfm: true,
// breaks: false,
// pedantic: false,
// sanitize: false,
// smartLists: false
// });
// if (options.marked) {
// esmMarked.setOptions(options.marked);
// }
// tests['esm marked (gfm)'] = esmMarked.parse;
try {
await bench('commonmark', specs, (await (async() => {
tests.commonmark = (await (async() => {
const { Parser, HtmlRenderer } = await import('commonmark');
const parser = new Parser();
const writer = new HtmlRenderer();
return function(text) {
return writer.render(parser.parse(text));
};
})()));
})());
} catch (e) {
console.error('Could not bench commonmark. (Error: %s)', e.message);
}
try {
await bench('markdown-it', specs, (await (async() => {
tests['markdown-it'] = (await (async() => {
const MarkdownIt = (await import('markdown-it')).default;
const md = new MarkdownIt();
return md.render.bind(md);
})()));
})());
} catch (e) {
console.error('Could not bench markdown-it. (Error: %s)', e.message);
}
await bench(tests, specs);
}
export async function bench(name, specs, engine) {
const before = process.hrtime();
for (let i = 0; i < 1e3; i++) {
for (const spec of specs) {
await engine(spec.markdown);
export async function bench(tests, specs) {
const stats = {};
for (const name in tests) {
stats[name] = {
elapsed: 0n,
correct: 0
};
}
console.log();
for (let i = 0; i < specs.length; i++) {
const spec = specs[i];
process.stdout.write(`${(i * 100 / specs.length).toFixed(1).padStart(5)}% ${i.toString().padStart(specs.length.toString().length)} of ${specs.length}\r`);
for (const name in tests) {
const test = tests[name];
const before = process.hrtime.bigint();
for (let n = 0; n < 1e3; n++) {
await test(spec.markdown);
}
const after = process.hrtime.bigint();
stats[name].elapsed += after - before;
stats[name].correct += (await isEqual(spec.html, await test(spec.markdown)) ? 1 : 0);
}
}
const elapsed = process.hrtime(before);
const ms = prettyElapsedTime(elapsed).toFixed();
let correct = 0;
for (const spec of specs) {
if (await isEqual(spec.html, await engine(spec.markdown))) {
correct++;
}
for (const name in tests) {
const ms = prettyElapsedTime(stats[name].elapsed);
const percent = (stats[name].correct / specs.length * 100).toFixed(2);
console.log(`${name} completed in ${ms}ms and passed ${percent}%`);
}
const percent = (correct / specs.length * 100).toFixed(2);
console.log('%s completed in %sms and passed %s%', name, ms, percent);
}
/**
* A simple one-time benchmark
*/
export async function time(options) {
options = options || {};
const specs = load();
if (options.marked) {
marked.setOptions(options.marked);
}
await bench('marked', specs, marked);
}
/**
@ -204,35 +169,23 @@ function parseArg(argv) {
while (argv.length) {
const arg = getarg();
switch (arg) {
case '-t':
case '--time':
options.time = true;
break;
case '-m':
case '--minified':
options.minified = true;
break;
default:
if (arg.indexOf('--') === 0) {
const opt = camelize(arg.replace(/^--(no-)?/, ''));
if (!defaults.hasOwnProperty(opt)) {
continue;
}
options.marked = options.marked || {};
if (arg.indexOf('--no-') === 0) {
options.marked[opt] = typeof defaults[opt] !== 'boolean'
? null
: false;
} else {
options.marked[opt] = typeof defaults[opt] !== 'boolean'
? argv.shift()
: true;
}
} else {
orphans.push(arg);
}
break;
if (arg.indexOf('--') === 0) {
const opt = camelize(arg.replace(/^--(no-)?/, ''));
if (!defaults.hasOwnProperty(opt)) {
continue;
}
options.marked = options.marked || {};
if (arg.indexOf('--no-') === 0) {
options.marked[opt] = typeof defaults[opt] !== 'boolean'
? null
: false;
} else {
options.marked[opt] = typeof defaults[opt] !== 'boolean'
? argv.shift()
: true;
}
} else {
orphans.push(arg);
}
}
@ -257,28 +210,19 @@ function camelize(text) {
* Main
*/
export default async function main(argv) {
marked = (await import('../lib/marked.cjs')).marked;
marked = cjsMarked;
const opt = parseArg(argv);
if (opt.minified) {
marked = (await import('../marked.min.js')).marked;
}
if (opt.time) {
await time(opt);
} else {
await runBench(opt);
}
await runBench(opt);
}
/**
* returns time to millisecond granularity
* @param hrtimeElapsed {bigint}
*/
function prettyElapsedTime(hrtimeElapsed) {
const seconds = hrtimeElapsed[0];
const frac = Math.round(hrtimeElapsed[1] / 1e3) / 1e3;
return seconds * 1e3 + frac;
return Number(hrtimeElapsed / 1_000_000n);
}
process.title = 'marked bench';

View File

@ -1058,4 +1058,23 @@ br
});
expect(marked('*text*').trim()).toBe('<p><em>text walked</em></p>');
});
it('should wait for async `walkTokens` function', async() => {
marked.use({
async: true,
async walkTokens(token) {
if (token.type === 'em') {
await new Promise((resolve) => {
setTimeout(resolve, 100);
});
token.text += ' walked';
token.tokens = this.Lexer.lexInline(token.text);
}
}
});
const promise = marked('*text*');
expect(promise).toBeInstanceOf(Promise);
const html = await promise;
expect(html.trim()).toBe('<p><em>text walked</em></p>');
});
});