feat: add async option (#2474)

* fix: return values from walkTokens * docs: add async docs * test: add async test * docs: add nav to async * Update docs/USING_PRO.md Co-authored-by: Steven <steven@ceriously.com> * test: expect promise * Update docs/USING_ADVANCED.md Co-authored-by: Steven <steven@ceriously.com> Co-authored-by: Steven <steven@ceriously.com>
2022-08-30 09:36:16 -05:00 · 2022-08-30 09:36:16 -05:00 · 994b2e6127
commit 994b2e6127
parent 33724a3201
8 changed files with 215 additions and 171 deletions
--- a/docs/USING_ADVANCED.md
+++ b/docs/USING_ADVANCED.md
@ -44,6 +44,7 @@ console.log(marked.parse(markdownString));

 |Member      |Type      |Default  |Since    |Notes         |
 |:-----------|:---------|:--------|:--------|:-------------|
+|async       |`boolean` |`false`  |4.1.0    |If true, `walkTokens` functions can be async and `marked.parse` will return a promise that resolves when all walk tokens functions resolve.|
 |baseUrl     |`string`  |`null`   |0.3.9    |A prefix url for any relative link. |
 |breaks      |`boolean` |`false`  |v0.2.7   |If true, add `<br>` on a single line break (copies GitHub behavior on comments, but not on rendered markdown files). Requires `gfm` be `true`.|
 |gfm         |`boolean` |`true`   |v0.2.1   |If true, use approved [GitHub Flavored Markdown (GFM) specification](https://github.github.com/gfm/).|
--- a/docs/USING_PRO.md
+++ b/docs/USING_PRO.md
@ -438,6 +438,78 @@ console.log(marked.parse('A Description List:\n'

 ***

+<h2 id="async">Async Marked : <code>async</code></h2>
+
+Marked will return a promise if the `async` option is true. The `async` option will tell marked to await any `walkTokens` functions before parsing the tokens and returning an HTML string.
+
+Simple Example:
+
+```js
+const walkTokens = async (token) => {
+  if (token.type === 'link') {
+    try {
+      await fetch(token.href);
+    } catch (ex) {
+      token.title = 'invalid';
+    }
+  }
+};
+
+marked.use({ walkTokens, async: true });
+
+const markdown = `
+[valid link](https://example.com)
+
+[invalid link](https://invalidurl.com)
+`;
+
+const html = await marked.parse(markdown);
+```
+
+Custom Extension Example:
+
+```js
+const importUrl = {
+  extensions: [{
+    name: 'importUrl',
+    level: 'block',
+    start(src) { return src.indexOf('\n:'); },
+    tokenizer(src) {
+      const rule = /^:(https?:\/\/.+?):/;
+      const match = rule.exec(src);
+      if (match) {
+        return {
+          type: 'importUrl',
+          raw: match[0],
+          url: match[1],
+          html: '' // will be replaced in walkTokens
+        };
+      }
+    },
+    renderer(token) {
+      return token.html;
+    }
+  }],
+  async: true, // needed to tell marked to return a promise
+  async walkTokens(token) {
+    if (token.type === 'importUrl') {
+      const res = await fetch(token.url);
+      token.html = await res.text();
+    }
+  }
+};
+
+marked.use(importUrl);
+
+const markdown = `
+# example.com
+
+:https://example.com:
+`;
+
+const html = await marked.parse(markdown);
+```
+
 <h2 id="lexer">The Lexer</h2>

 The lexer takes a markdown string and calls the tokenizer functions.
--- a/docs/_document.html
+++ b/docs/_document.html
@ -51,6 +51,7 @@
                            <li><a href="/using_pro#tokenizer">Tokenizer</a></li>
                            <li><a href="/using_pro#walk-tokens">Walk Tokens</a></li>
                            <li><a href="/using_pro#extensions">Custom Extensions</a></li>
+                            <li><a href="/using_pro#async">Async Marked</a></li>
                            <li><a href="/using_pro#lexer">Lexer</a></li>
                            <li><a href="/using_pro#parser">Parser</a></li>
                        </ul>
--- a/src/Tokenizer.js
+++ b/src/Tokenizer.js
@ -19,7 +19,7 @@ function outputLink(cap, link, raw, lexer) {
      href,
      title,
      text,
-      tokens: lexer.inlineTokens(text, [])
+      tokens: lexer.inlineTokens(text)
    };
    lexer.state.inLink = false;
    return token;
@ -125,15 +125,13 @@ export class Tokenizer {
        }
      }

-      const token = {
+      return {
        type: 'heading',
        raw: cap[0],
        depth: cap[1].length,
        text,
-        tokens: []
+        tokens: this.lexer.inline(text)
      };
-      this.lexer.inline(token.text, token.tokens);
-      return token;
    }
  }

@ -355,10 +353,10 @@ export class Tokenizer {
        text: cap[0]
      };
      if (this.options.sanitize) {
+        const text = this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0]);
        token.type = 'paragraph';
-        token.text = this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0]);
-        token.tokens = [];
-        this.lexer.inline(token.text, token.tokens);
+        token.text = text;
+        token.tokens = this.lexer.inline(text);
      }
      return token;
    }
@ -416,8 +414,7 @@ export class Tokenizer {
        // header child tokens
        l = item.header.length;
        for (j = 0; j < l; j++) {
-          item.header[j].tokens = [];
-          this.lexer.inline(item.header[j].text, item.header[j].tokens);
+          item.header[j].tokens = this.lexer.inline(item.header[j].text);
        }

        // cell child tokens
@ -425,8 +422,7 @@ export class Tokenizer {
        for (j = 0; j < l; j++) {
          row = item.rows[j];
          for (k = 0; k < row.length; k++) {
-            row[k].tokens = [];
-            this.lexer.inline(row[k].text, row[k].tokens);
+            row[k].tokens = this.lexer.inline(row[k].text);
          }
        }

@ -438,45 +434,40 @@ export class Tokenizer {
  lheading(src) {
    const cap = this.rules.block.lheading.exec(src);
    if (cap) {
-      const token = {
+      return {
        type: 'heading',
        raw: cap[0],
        depth: cap[2].charAt(0) === '=' ? 1 : 2,
        text: cap[1],
-        tokens: []
+        tokens: this.lexer.inline(cap[1])
      };
-      this.lexer.inline(token.text, token.tokens);
-      return token;
    }
  }

  paragraph(src) {
    const cap = this.rules.block.paragraph.exec(src);
    if (cap) {
-      const token = {
+      const text = cap[1].charAt(cap[1].length - 1) === '\n'
+        ? cap[1].slice(0, -1)
+        : cap[1];
+      return {
        type: 'paragraph',
        raw: cap[0],
-        text: cap[1].charAt(cap[1].length - 1) === '\n'
-          ? cap[1].slice(0, -1)
-          : cap[1],
-        tokens: []
+        text,
+        tokens: this.lexer.inline(text)
      };
-      this.lexer.inline(token.text, token.tokens);
-      return token;
    }
  }

  text(src) {
    const cap = this.rules.block.text.exec(src);
    if (cap) {
-      const token = {
+      return {
        type: 'text',
        raw: cap[0],
        text: cap[0],
-        tokens: []
+        tokens: this.lexer.inline(cap[0])
      };
-      this.lexer.inline(token.text, token.tokens);
-      return token;
    }
  }

@ -645,7 +636,7 @@ export class Tokenizer {
            type: 'em',
            raw: src.slice(0, lLength + match.index + rLength + 1),
            text,
-            tokens: this.lexer.inlineTokens(text, [])
+            tokens: this.lexer.inlineTokens(text)
          };
        }

@ -655,7 +646,7 @@ export class Tokenizer {
          type: 'strong',
          raw: src.slice(0, lLength + match.index + rLength + 1),
          text,
-          tokens: this.lexer.inlineTokens(text, [])
+          tokens: this.lexer.inlineTokens(text)
        };
      }
    }
@ -696,7 +687,7 @@ export class Tokenizer {
        type: 'del',
        raw: cap[0],
        text: cap[2],
-        tokens: this.lexer.inlineTokens(cap[2], [])
+        tokens: this.lexer.inlineTokens(cap[2])
      };
    }
  }
--- a/src/defaults.js
+++ b/src/defaults.js
@ -1,5 +1,6 @@
 export function getDefaults() {
  return {
+    async: false,
    baseUrl: null,
    breaks: false,
    extensions: null,
--- a/src/marked.js
+++ b/src/marked.js
@ -105,13 +105,7 @@ export function marked(src, opt, callback) {
    return;
  }

-  try {
-    const tokens = Lexer.lex(src, opt);
-    if (opt.walkTokens) {
-      marked.walkTokens(tokens, opt.walkTokens);
-    }
-    return Parser.parse(tokens, opt);
-  } catch (e) {
+  function onError(e) {
    e.message += '\nPlease report this to https://github.com/markedjs/marked.';
    if (opt.silent) {
      return '<p>An error occurred:</p><pre>'
@ -120,6 +114,23 @@ export function marked(src, opt, callback) {
    }
    throw e;
  }
+
+  try {
+    const tokens = Lexer.lex(src, opt);
+    if (opt.walkTokens) {
+      if (opt.async) {
+        return Promise.all(marked.walkTokens(tokens, opt.walkTokens))
+          .then(() => {
+            return Parser.parse(tokens, opt);
+          })
+          .catch(onError);
+      }
+      marked.walkTokens(tokens, opt.walkTokens);
+    }
+    return Parser.parse(tokens, opt);
+  } catch (e) {
+    onError(e);
+  }
 }

 /**
@ -236,10 +247,12 @@ marked.use = function(...args) {
    if (pack.walkTokens) {
      const walkTokens = marked.defaults.walkTokens;
      opts.walkTokens = function(token) {
-        pack.walkTokens.call(this, token);
+        let values = [];
+        values.push(pack.walkTokens.call(this, token));
        if (walkTokens) {
-          walkTokens.call(this, token);
+          values = values.concat(walkTokens.call(this, token));
        }
+        return values;
      };
    }

@ -256,35 +269,37 @@ marked.use = function(...args) {
 */

 marked.walkTokens = function(tokens, callback) {
+  let values = [];
  for (const token of tokens) {
-    callback.call(marked, token);
+    values = values.concat(callback.call(marked, token));
    switch (token.type) {
      case 'table': {
        for (const cell of token.header) {
-          marked.walkTokens(cell.tokens, callback);
+          values = values.concat(marked.walkTokens(cell.tokens, callback));
        }
        for (const row of token.rows) {
          for (const cell of row) {
-            marked.walkTokens(cell.tokens, callback);
+            values = values.concat(marked.walkTokens(cell.tokens, callback));
          }
        }
        break;
      }
      case 'list': {
-        marked.walkTokens(token.items, callback);
+        values = values.concat(marked.walkTokens(token.items, callback));
        break;
      }
      default: {
        if (marked.defaults.extensions && marked.defaults.extensions.childTokens && marked.defaults.extensions.childTokens[token.type]) { // Walk any extensions
          marked.defaults.extensions.childTokens[token.type].forEach(function(childTokens) {
-            marked.walkTokens(token[childTokens], callback);
+            values = values.concat(marked.walkTokens(token[childTokens], callback));
          });
        } else if (token.tokens) {
-          marked.walkTokens(token.tokens, callback);
+          values = values.concat(marked.walkTokens(token.tokens, callback));
        }
      }
    }
  }
+  return values;
 };

 /**
--- a/test/bench.js
+++ b/test/bench.js
@ -3,6 +3,7 @@ import { fileURLToPath } from 'url';
 import { isEqual } from './helpers/html-differ.js';
 import { loadFiles } from './helpers/load.js';

+import { marked as cjsMarked } from '../lib/marked.cjs';
 import { marked as esmMarked } from '../lib/marked.esm.js';

 const __dirname = dirname(fileURLToPath(import.meta.url));
@ -30,9 +31,10 @@ export function load() {
 export async function runBench(options) {
  options = options || {};
  const specs = load();
+  const tests = {};

  // Non-GFM, Non-pedantic
-  marked.setOptions({
+  cjsMarked.setOptions({
    gfm: false,
    breaks: false,
    pedantic: false,
@ -40,9 +42,9 @@ export async function runBench(options) {
    smartLists: false
  });
  if (options.marked) {
-    marked.setOptions(options.marked);
+    cjsMarked.setOptions(options.marked);
  }
-  await bench('cjs marked', specs, marked.parse);
+  tests['cjs marked'] = cjsMarked.parse;

  esmMarked.setOptions({
    gfm: false,
@ -54,113 +56,76 @@ export async function runBench(options) {
  if (options.marked) {
    esmMarked.setOptions(options.marked);
  }
-  await bench('esm marked', specs, esmMarked.parse);
+  tests['esm marked'] = esmMarked.parse;

-  // GFM
-  marked.setOptions({
-    gfm: true,
-    breaks: false,
-    pedantic: false,
-    sanitize: false,
-    smartLists: false
-  });
-  if (options.marked) {
-    marked.setOptions(options.marked);
-  }
-  await bench('cjs marked (gfm)', specs, marked.parse);
-
-  esmMarked.setOptions({
-    gfm: true,
-    breaks: false,
-    pedantic: false,
-    sanitize: false,
-    smartLists: false
-  });
-  if (options.marked) {
-    esmMarked.setOptions(options.marked);
-  }
-  await bench('esm marked (gfm)', specs, esmMarked.parse);
-
-  // Pedantic
-  marked.setOptions({
-    gfm: false,
-    breaks: false,
-    pedantic: true,
-    sanitize: false,
-    smartLists: false
-  });
-  if (options.marked) {
-    marked.setOptions(options.marked);
-  }
-  await bench('cjs marked (pedantic)', specs, marked.parse);
-
-  esmMarked.setOptions({
-    gfm: false,
-    breaks: false,
-    pedantic: true,
-    sanitize: false,
-    smartLists: false
-  });
-  if (options.marked) {
-    esmMarked.setOptions(options.marked);
-  }
-  await bench('esm marked (pedantic)', specs, esmMarked.parse);
+  // esmMarked.setOptions({
+  //   gfm: true,
+  //   breaks: false,
+  //   pedantic: false,
+  //   sanitize: false,
+  //   smartLists: false
+  // });
+  // if (options.marked) {
+  //   esmMarked.setOptions(options.marked);
+  // }
+  // tests['esm marked (gfm)'] = esmMarked.parse;

  try {
-    await bench('commonmark', specs, (await (async() => {
+    tests.commonmark = (await (async() => {
      const { Parser, HtmlRenderer } = await import('commonmark');
      const parser = new Parser();
      const writer = new HtmlRenderer();
      return function(text) {
        return writer.render(parser.parse(text));
      };
-    })()));
+    })());
  } catch (e) {
    console.error('Could not bench commonmark. (Error: %s)', e.message);
  }

  try {
-    await bench('markdown-it', specs, (await (async() => {
+    tests['markdown-it'] = (await (async() => {
      const MarkdownIt = (await import('markdown-it')).default;
      const md = new MarkdownIt();
      return md.render.bind(md);
-    })()));
+    })());
  } catch (e) {
    console.error('Could not bench markdown-it. (Error: %s)', e.message);
  }
+
+  await bench(tests, specs);
 }

-export async function bench(name, specs, engine) {
-  const before = process.hrtime();
-  for (let i = 0; i < 1e3; i++) {
-    for (const spec of specs) {
-      await engine(spec.markdown);
+export async function bench(tests, specs) {
+  const stats = {};
+  for (const name in tests) {
+    stats[name] = {
+      elapsed: 0n,
+      correct: 0
+    };
+  }
+
+  console.log();
+  for (let i = 0; i < specs.length; i++) {
+    const spec = specs[i];
+    process.stdout.write(`${(i * 100 / specs.length).toFixed(1).padStart(5)}% ${i.toString().padStart(specs.length.toString().length)} of ${specs.length}\r`);
+    for (const name in tests) {
+      const test = tests[name];
+      const before = process.hrtime.bigint();
+      for (let n = 0; n < 1e3; n++) {
+        await test(spec.markdown);
+      }
+      const after = process.hrtime.bigint();
+      stats[name].elapsed += after - before;
+      stats[name].correct += (await isEqual(spec.html, await test(spec.markdown)) ? 1 : 0);
    }
  }
-  const elapsed = process.hrtime(before);
-  const ms = prettyElapsedTime(elapsed).toFixed();

-  let correct = 0;
-  for (const spec of specs) {
-    if (await isEqual(spec.html, await engine(spec.markdown))) {
-      correct++;
-    }
+  for (const name in tests) {
+    const ms = prettyElapsedTime(stats[name].elapsed);
+    const percent = (stats[name].correct / specs.length * 100).toFixed(2);
+    console.log(`${name} completed in ${ms}ms and passed ${percent}%`);
  }
-  const percent = (correct / specs.length * 100).toFixed(2);
-
-  console.log('%s completed in %sms and passed %s%', name, ms, percent);
-}
-
-/**
- * A simple one-time benchmark
- */
-export async function time(options) {
-  options = options || {};
-  const specs = load();
-  if (options.marked) {
-    marked.setOptions(options.marked);
-  }
-  await bench('marked', specs, marked);
 }

 /**
@ -204,35 +169,23 @@ function parseArg(argv) {

  while (argv.length) {
    const arg = getarg();
-    switch (arg) {
-      case '-t':
-      case '--time':
-        options.time = true;
-        break;
-      case '-m':
-      case '--minified':
-        options.minified = true;
-        break;
-      default:
-        if (arg.indexOf('--') === 0) {
-          const opt = camelize(arg.replace(/^--(no-)?/, ''));
-          if (!defaults.hasOwnProperty(opt)) {
-            continue;
-          }
-          options.marked = options.marked || {};
-          if (arg.indexOf('--no-') === 0) {
-            options.marked[opt] = typeof defaults[opt] !== 'boolean'
-              ? null
-              : false;
-          } else {
-            options.marked[opt] = typeof defaults[opt] !== 'boolean'
-              ? argv.shift()
-              : true;
-          }
-        } else {
-          orphans.push(arg);
-        }
-        break;
+    if (arg.indexOf('--') === 0) {
+      const opt = camelize(arg.replace(/^--(no-)?/, ''));
+      if (!defaults.hasOwnProperty(opt)) {
+        continue;
+      }
+      options.marked = options.marked || {};
+      if (arg.indexOf('--no-') === 0) {
+        options.marked[opt] = typeof defaults[opt] !== 'boolean'
+          ? null
+          : false;
+      } else {
+        options.marked[opt] = typeof defaults[opt] !== 'boolean'
+          ? argv.shift()
+          : true;
+      }
+    } else {
+      orphans.push(arg);
    }
  }

@ -257,28 +210,19 @@ function camelize(text) {
 * Main
 */
 export default async function main(argv) {
-  marked = (await import('../lib/marked.cjs')).marked;
+  marked = cjsMarked;

  const opt = parseArg(argv);

-  if (opt.minified) {
-    marked = (await import('../marked.min.js')).marked;
-  }
-
-  if (opt.time) {
-    await time(opt);
-  } else {
-    await runBench(opt);
-  }
+  await runBench(opt);
 }

 /**
 * returns time to millisecond granularity
+ * @param hrtimeElapsed {bigint}
 */
 function prettyElapsedTime(hrtimeElapsed) {
-  const seconds = hrtimeElapsed[0];
-  const frac = Math.round(hrtimeElapsed[1] / 1e3) / 1e3;
-  return seconds * 1e3 + frac;
+  return Number(hrtimeElapsed / 1_000_000n);
 }

 process.title = 'marked bench';
--- a/test/unit/marked-spec.js
+++ b/test/unit/marked-spec.js
@ -1058,4 +1058,23 @@ br
    });
    expect(marked('*text*').trim()).toBe('<p><em>text walked</em></p>');
  });
+
+  it('should wait for async `walkTokens` function', async() => {
+    marked.use({
+      async: true,
+      async walkTokens(token) {
+        if (token.type === 'em') {
+          await new Promise((resolve) => {
+            setTimeout(resolve, 100);
+          });
+          token.text += ' walked';
+          token.tokens = this.Lexer.lexInline(token.text);
+        }
+      }
+    });
+    const promise = marked('*text*');
+    expect(promise).toBeInstanceOf(Promise);
+    const html = await promise;
+    expect(html.trim()).toBe('<p><em>text walked</em></p>');
+  });
 });