marked/lib/marked.js

/**
 * marked - A markdown parser
 * Copyright (c) 2011, Christopher Jeffrey. (MIT Licensed)
 */

/**
 * Block-Level Grammar
 */

var rules = {
  newline: /^\n/,
  block: /^[ ]{4,}[^\n]*(?:\n[ ]{4,}[^\n]*)*/,
  heading: /^ *(#{1,6}) *([^\n#]*) *#*/,
  lheading: /^([^\n]+)\n *(=|-){3,}/,
  hr: /^( ?[\-*_]){3,}/,
  blockquote: /^ *>[^\n]*(?:\n *>[^\n]*)*/,
  list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]+(?:\n(?:\1 )+[^\n]+)*(?:\n+|$)){2,}/g,
  html: /^<([^\/\s>]+)[^\n>]*>[^\n]*(?:\n[^\n]+)*\n?<\/\1>/,
  text: /^[^\n]+/
};

var keys = Object.keys(rules)
  , len = keys.length;

/**
 * Lexer
 */

var lex = function(str) {
  var tokens = []
    , links = {};

  // normalize whitespace
  str = str.replace(/\r\n/g, '\n')
           .replace(/\r/g, '\n');

  str = str.replace(/\t/g, '    ');
  //str = str.replace(/(^|\n) +(\n|$)/g, '$1$2');

  // unfortunately, this is the most 
  // performant method of getting link
  // definitions out of the way.
  str = str.replace(
    /^ {0,3}\[([^\]]+)\]: *([^ ]+)(?: +"([^"]+)")?/gm, 
    function(_, id, href, title) {
    links[id] = {
      href: href,
      title: title
    };
    return '';
  });

  tokens.links = links;

  return lex.token(str, tokens, 0);
};

lex.token = function lex(str, tokens, line) {
 while (str.length) 
  for (var i = 0; i < len; i++) {
    var key = keys[i]
      , rule = rules[key];

    cap = rule.exec(str);
    if (!cap) continue;
    str = str.substring(cap[0].length);

    switch (key) {
      case 'newline':
        line++;
        break;
      case 'hr':
        tokens.push({
          type: 'hr', 
          line: line
        });
        break;
      case 'lheading':
        tokens.push({
          type: 'heading', 
          depth: cap[2] === '=' ? 1 : 2, 
          text: cap[1],
          line: line
        });
        break;
      case 'heading':
        tokens.push({
          type: 'heading', 
          depth: cap[1].length, 
          text: cap[2], 
          line: line
        });
        break;
      case 'block':
        cap = cap[0].replace(/^ {4}/gm, '');
        tokens.push({
          type: 'block', 
          text: cap, 
          line: line
        });
        break;
      case 'list':
        tokens.push({
          type: 'list_start',
          ordered: isFinite(cap[2]), 
          line: line
        });
        // get each top-level 
        // item in the list
        cap = cap[0].match(
          /^( *)(\*|\+|-|\d+\.)[^\n]+(?:\n(?:\1 )+[^\n]+)*/gm
        ); 
        cap.forEach(function(item) {
          // remove the list items sigil 
          // so its seen as the next token
          item = item.replace(/^ *(\*|\+|-|\d+\.) */, '');
          // outdent whatever the 
          // list item contains, hacky
          var len = /\n( +)/.exec(item);
          if (len) {
            len = len[1].length;
            item = item.replace(
              new RegExp('^ {' + len + '}', 'gm'), 
              ''
            );
          }
          tokens.push({
            type: 'list_item_start', 
            line: line
          });

          // recurse
          lex(item, tokens, line);

          tokens.push({
            type: 'list_item_end', 
            line: line
          });
        });
        tokens.push({
          type: 'list_end', 
          line: line
        });
        break;
      case 'html':
      case 'text':
        tokens.push({
          type: key, 
          text: cap[0], 
          line: line
        });
        break;
      case 'blockquote':
        tokens.push({
          type: 'blockquote_start', 
          line: line
        });
        cap = cap[0].replace(/^ *>/gm, ''); 

        // recurse
        lex(cap, tokens, line);

        tokens.push({
          type: 'blockquote_end', 
          line: line
        });
        break;
    }
    break;
  }

  return tokens;
};

/**
 * Inline Processing
 */

// this is really bad. i should define 
// some lexemes for all of the inline stuff, 
// but this was just easier for the time being.

var inline = function(str) {
  var hash = ['#'];

  str = str.replace(/#/g, '#0#');

  str = str.replace(/`([^`]+)`/g, function(__, text) {
    text = '<code>' + escape(text) + '</code>';
    return '#' + (hash.push(text) - 1) + '#';
  });

  // for <http://hello.world/> links
  str = str.replace(
    /<([^<>:\/ ]+:(?:\/\/)?[^>\n]+?|[^<>\n]+?(@)[^<>\n]+?)>/g, 
    function(__, href, at) {
      if (at) {
        // according to the markdown "spec"
        // we need to mangle email addresses
        var href = mangle(href)
          , mail = mangle('mailto:') + href;
        return '<a href="' + mail + '">' + href + '</a>';
      }
      return '<a href="' + href + '">' + href + '</a>';
    }
  );

  str = str.replace(/<[^\n>]+>/g, function(tag) {
    return '#' + (hash.push(tag) - 1) + '#';
  });

  str = escape(str);

  // links
  str = str.replace(
    /\[([^\]]+)\]\(([^\)]+)\)/g, 
    '<a href="$2">$1</a>'
  );

  // This is [an example][id] 
  // reference-style link.
  str = str.replace(
    /\[([^\]]+)\]\[([^\]]+)\]/g, 
    function(__, text, id) {
      var link = tokens.links[id];
      return '<a href="' 
        + link.href + '"' 
        + (link.title 
          ? ' title="' 
            + link.title + '"'
          : '') 
        + '>' + text + '</a>';
    }
  );

  // img
  str = str.replace(
    /!\[([^\]]+)\]\(([^\s\)]+)\s*([^\)]*)\)/g, 
    function(_, alt, src, title) {
    return '<img src="' 
      + src + '" alt="'
      + alt + '"' 
      + (title 
        ? ' title="' + title + '"' 
        : '') 
      + '>';
  });

  // strong
  str = str.replace(/__([^_]+)__/g, '<strong>$1</strong>');
  str = str.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>');

  // em
  str = str.replace(/_([^_]+)_/g, '<em>$1</em>');
  str = str.replace(/\*([^*]+)\*/g, '<em>$1</em>');

  // br
  str = str.replace(/  $/gm, '<br>');

  str = str.replace(/#(\d+)#/g, function(__, i) {
    return hash[i];
  });

  return str;
};

/**
 * Parsing
 */

var tokens
  , token;

var next = function() {
  return token = tokens.pop();
};

var tok = function() {
  switch (token.type) {
    case 'hr': 
      return '<hr>';
    case 'heading': 
      return '<h' + token.depth + '>' 
        + inline(token.text)
        + '</h' + token.depth + '>';
    case 'block': 
      return '<pre><code>' 
        + escape(token.text)
        + '</code></pre>';
    case 'blockquote_start': 
      var body = [];
      while (next().type !== 'blockquote_end') {
        body.push(tok());
      }
      return '<blockquote>' 
        + body.join('') 
        + '</blockquote>';
    case 'list_start':
      var body = []
        , type = token.ordered ? 'ol' : 'ul';
      while (next().type !== 'list_end') {
        body.push(tok());
      }
      return '<' + type + '>' 
        + body.join('') 
        + '</' + type + '>';
    case 'list_item_start': 
      var body = [];
      while (next().type !== 'list_item_end') {
        // TODO incorporate paragraph 
        // list items here
        if (token.type === 'text') {
          body.push(inline(token.text));
        } else {
          body.push(tok());
        }
      }
      return '<li>' 
        + body.join(' ') 
        + '</li>';
    case 'html':
      return inline(token.text);
    case 'text': 
      var body = []
        , last = token.line;
      while (token && token.type === 'text') {
        if (token.line > last) break;
        last = token.line + 1;
        body.push(token.text);
        next();
      }
      if (token) tokens.push(token);
      return '<p>' 
        + inline(body.join(' '))
        + '</p>';
  }
};

var parse = function(src) {
  tokens = src.reverse();

  var out = [];
  while (next()) {
    out.push(tok());
  }

  tokens = null;
  token = null;

  return out.join(' ');
};

/**
 * Helpers
 */

var escape = function(html) {
  return html
    .replace(/&/g, '&amp;')
    .replace(/</g, '&lt;')
    .replace(/>/g, '&gt;')
    .replace(/"/g, '&quot;')
    .replace(/'/g, '&apos;');
};

var mangle = function(str) {
  var ch
    , i = 0
    , l = str.length
    , out = '';

  for (; i < l; i++) {
    ch = str[i].charCodeAt(0);
    if (Math.random() > .5) {
      ch = 'x' + ch.toString(16);
    }
    out += '&#' + ch + ';';
  }

  return out;
};

/**
 * Expose
 */

exports = function(str) {
  return parse(lex(str));
};

exports.parser = parse;
exports.lexer = lex;

module.exports = exports;
marked down 2011-07-24 08:15:35 -05:00			`/**`
			`* marked - A markdown parser`
			`* Copyright (c) 2011, Christopher Jeffrey. (MIT Licensed)`
			`*/`

			`/**`
			`* Block-Level Grammar`
			`*/`

			`var rules = {`
			`newline: /^\n/,`
			`block: /^[ ]{4,}[^\n](?:\n[ ]{4,}[^\n])*/,`
			`heading: /^ (#{1,6}) ([^\n#]) #*/,`
			`lheading: /^([^\n]+)\n *(=\|-){3,}/,`
			`hr: /^( ?[\-*_]){3,}/,`
			`blockquote: /^ >[^\n](?:\n >[^\n])*/,`
			`list: /^(?:( )(\\|\+\|-\|\d+\.)[^\n]+(?:\n(?:\1 )+[^\n]+)*(?:\n+\|$)){2,}/g,`
			`html: /^<([^\/\s>]+)[^\n>]>[^\n](?:\n[^\n]+)*\n?<\/\1>/,`
			`text: /^[^\n]+/`
			`};`

			`var keys = Object.keys(rules)`
			`, len = keys.length;`

			`/**`
better 2011-08-13 17:06:08 -05:00			`* Lexer`
marked down 2011-07-24 08:15:35 -05:00			`*/`

better 2011-08-13 17:06:08 -05:00			`var lex = function(str) {`
			`var tokens = []`
			`, links = {};`
marked down 2011-07-24 08:15:35 -05:00
better 2011-08-13 17:06:08 -05:00			`// normalize whitespace`
			`str = str.replace(/\r\n/g, '\n')`
			`.replace(/\r/g, '\n');`

			`str = str.replace(/\t/g, ' ');`
			`//str = str.replace(/(^\|\n) +(\n\|$)/g, '$1$2');`

			`// unfortunately, this is the most`
			`// performant method of getting link`
			`// definitions out of the way.`
			`str = str.replace(`
			`/^ {0,3}\[([^\]]+)\]: *([^ ]+)(?: +"([^"]+)")?/gm,`
			`function(_, id, href, title) {`
			`links[id] = {`
			`href: href,`
			`title: title`
			`};`
			`return '';`
			`});`

			`tokens.links = links;`
marked down 2011-07-24 08:15:35 -05:00
better 2011-08-13 17:06:08 -05:00			`return lex.token(str, tokens, 0);`
			`};`
better recursion? 2011-08-13 16:53:15 -05:00
better 2011-08-13 17:06:08 -05:00			`lex.token = function lex(str, tokens, line) {`
			`while (str.length)`
			`for (var i = 0; i < len; i++) {`
			`var key = keys[i]`
			`, rule = rules[key];`
better recursion? 2011-08-13 16:53:15 -05:00
			`cap = rule.exec(str);`
			`if (!cap) continue;`
			`str = str.substring(cap[0].length);`

			`switch (key) {`
			`case 'newline':`
			`line++;`
			`break;`
			`case 'hr':`
			`tokens.push({`
			`type: 'hr',`
			`line: line`
			`});`
			`break;`
			`case 'lheading':`
			`tokens.push({`
			`type: 'heading',`
			`depth: cap[2] === '=' ? 1 : 2,`
			`text: cap[1],`
			`line: line`
			`});`
			`break;`
			`case 'heading':`
			`tokens.push({`
			`type: 'heading',`
			`depth: cap[1].length,`
			`text: cap[2],`
			`line: line`
			`});`
			`break;`
			`case 'block':`
			`cap = cap[0].replace(/^ {4}/gm, '');`
			`tokens.push({`
			`type: 'block',`
			`text: cap,`
			`line: line`
			`});`
			`break;`
			`case 'list':`
			`tokens.push({`
			`type: 'list_start',`
			`ordered: isFinite(cap[2]),`
			`line: line`
			`});`
			`// get each top-level`
			`// item in the list`
			`cap = cap[0].match(`
			`/^( )(\\|\+\|-\|\d+\.)[^\n]+(?:\n(?:\1 )+[^\n]+)*/gm`
			`);`
			`cap.forEach(function(item) {`
			`// remove the list items sigil`
			`// so its seen as the next token`
			`item = item.replace(/^ (\\|\+\|-\|\d+\.) */, '');`
			`// outdent whatever the`
			`// list item contains, hacky`
			`var len = /\n( +)/.exec(item);`
			`if (len) {`
			`len = len[1].length;`
			`item = item.replace(`
			`new RegExp('^ {' + len + '}', 'gm'),`
			`''`
			`);`
			`}`
			`tokens.push({`
			`type: 'list_item_start',`
			`line: line`
			`});`
better 2011-08-13 17:06:08 -05:00
			`// recurse`
better recursion? 2011-08-13 16:53:15 -05:00			`lex(item, tokens, line);`
better 2011-08-13 17:06:08 -05:00
better recursion? 2011-08-13 16:53:15 -05:00			`tokens.push({`
			`type: 'list_item_end',`
			`line: line`
			`});`
			`});`
			`tokens.push({`
			`type: 'list_end',`
			`line: line`
			`});`
			`break;`
			`case 'html':`
			`case 'text':`
			`tokens.push({`
			`type: key,`
			`text: cap[0],`
			`line: line`
			`});`
			`break;`
			`case 'blockquote':`
			`tokens.push({`
			`type: 'blockquote_start',`
			`line: line`
			`});`
			`cap = cap[0].replace(/^ *>/gm, '');`
better 2011-08-13 17:06:08 -05:00
			`// recurse`
better recursion? 2011-08-13 16:53:15 -05:00			`lex(cap, tokens, line);`
better 2011-08-13 17:06:08 -05:00
better recursion? 2011-08-13 16:53:15 -05:00			`tokens.push({`
			`type: 'blockquote_end',`
			`line: line`
			`});`
			`break;`
			`}`
			`break;`
			`}`
marked down 2011-07-24 08:15:35 -05:00
			`return tokens;`
			`};`

			`/**`
			`* Inline Processing`
			`*/`

			`// this is really bad. i should define`
			`// some lexemes for all of the inline stuff,`
			`// but this was just easier for the time being.`

			`var inline = function(str) {`
more compliant inline 2011-08-13 18:21:30 -05:00			`var hash = ['#'];`

			`str = str.replace(/#/g, '#0#');`

			str = str.replace(/`([^`]+)`/g, function(__, text) {
			`text = '<code>' + escape(text) + '</code>';`
			`return '#' + (hash.push(text) - 1) + '#';`
			`});`

			`// for <http://hello.world/> links`
marked down 2011-07-24 08:15:35 -05:00			`str = str.replace(`
more compliant inline 2011-08-13 18:21:30 -05:00			`/<([^<>:\/ ]+:(?:\/\/)?[^>\n]+?\|[^<>\n]+?(@)[^<>\n]+?)>/g,`
			`function(__, href, at) {`
			`if (at) {`
			`// according to the markdown "spec"`
			`// we need to mangle email addresses`
			`var href = mangle(href)`
			`, mail = mangle('mailto:') + href;`
			`return '<a href="' + mail + '">' + href + '</a>';`
			`}`
			`return '<a href="' + href + '">' + href + '</a>';`
			`}`
			`);`

			`str = str.replace(/<[^\n>]+>/g, function(tag) {`
			`return '#' + (hash.push(tag) - 1) + '#';`
marked down 2011-07-24 08:15:35 -05:00			`});`

more compliant inline 2011-08-13 18:21:30 -05:00			`str = escape(str);`

marked down 2011-07-24 08:15:35 -05:00			`// links`
			`str = str.replace(`
			`/\[([^\]]+)\]\(([^\)]+)\)/g,`
			`'<a href="$2">$1</a>'`
			`);`

			`// This is [an example][id]`
			`// reference-style link.`
			`str = str.replace(`
			`/\[([^\]]+)\]\[([^\]]+)\]/g,`
better 2011-08-13 17:06:08 -05:00			`function(__, text, id) {`
			`var link = tokens.links[id];`
			`return '<a href="'`
			`+ link.href + '"'`
			`+ (link.title`
			`? ' title="'`
			`+ link.title + '"'`
			`: '')`
			`+ '>' + text + '</a>';`
			`}`
			`);`
marked down 2011-07-24 08:15:35 -05:00
more compliant inline 2011-08-13 18:21:30 -05:00			`// img`
marked down 2011-07-24 08:15:35 -05:00			`str = str.replace(`
more compliant inline 2011-08-13 18:21:30 -05:00			`/!\[([^\]]+)\]\(([^\s\)]+)\s([^\)])\)/g,`
			`function(_, alt, src, title) {`
			`return '<img src="'`
			`+ src + '" alt="'`
			`+ alt + '"'`
			`+ (title`
			`? ' title="' + title + '"'`
			`: '')`
			`+ '>';`
			`});`
marked down 2011-07-24 08:15:35 -05:00
			`// strong`
			`str = str.replace(/__([^_]+)__/g, '<strong>$1</strong>');`
			`str = str.replace(/\\([^]+)\\*/g, '<strong>$1</strong>');`

			`// em`
			`str = str.replace(/_([^_]+)_/g, '<em>$1</em>');`
			`str = str.replace(/\([^]+)\*/g, '<em>$1</em>');`

			`// br`
			`str = str.replace(/ $/gm, '<br>');`

more compliant inline 2011-08-13 18:21:30 -05:00			`str = str.replace(/#(\d+)#/g, function(__, i) {`
			`return hash[i];`
			`});`

marked down 2011-07-24 08:15:35 -05:00			`return str;`
			`};`

			`/**`
			`* Parsing`
			`*/`

better 2011-08-13 17:06:08 -05:00			`var tokens`
			`, token;`
recursion 2011-08-13 16:38:46 -05:00
marked down 2011-07-24 08:15:35 -05:00			`var next = function() {`
			`return token = tokens.pop();`
			`};`

			`var tok = function() {`
			`switch (token.type) {`
			`case 'hr':`
			`return '<hr>';`
			`case 'heading':`
			`return '<h' + token.depth + '>'`
			`+ inline(token.text)`
			`+ '</h' + token.depth + '>';`
			`case 'block':`
			`return '<pre><code>'`
			`+ escape(token.text)`
			`+ '</code></pre>';`
			`case 'blockquote_start':`
			`var body = [];`
			`while (next().type !== 'blockquote_end') {`
			`body.push(tok());`
			`}`
			`return '<blockquote>'`
			`+ body.join('')`
			`+ '</blockquote>';`
			`case 'list_start':`
			`var body = []`
			`, type = token.ordered ? 'ol' : 'ul';`
			`while (next().type !== 'list_end') {`
			`body.push(tok());`
			`}`
			`return '<' + type + '>'`
			`+ body.join('')`
			`+ '</' + type + '>';`
			`case 'list_item_start':`
			`var body = [];`
			`while (next().type !== 'list_item_end') {`
more compliant inline 2011-08-13 18:21:30 -05:00			`// TODO incorporate paragraph`
			`// list items here`
marked down 2011-07-24 08:15:35 -05:00			`if (token.type === 'text') {`
			`body.push(inline(token.text));`
			`} else {`
			`body.push(tok());`
			`}`
			`}`
			`return '<li>'`
			`+ body.join(' ')`
			`+ '</li>';`
			`case 'html':`
			`return inline(token.text);`
			`case 'text':`
			`var body = []`
			`, last = token.line;`
			`while (token && token.type === 'text') {`
			`if (token.line > last) break;`
			`last = token.line + 1;`
			`body.push(token.text);`
			`next();`
			`}`
			`if (token) tokens.push(token);`
			`return '<p>'`
			`+ inline(body.join(' '))`
			`+ '</p>';`
			`}`
			`};`

			`var parse = function(src) {`
			`tokens = src.reverse();`

			`var out = [];`
			`while (next()) {`
			`out.push(tok());`
			`}`

			`tokens = null;`
			`token = null;`

fix code blocks 2011-07-25 15:37:42 -05:00			`return out.join(' ');`
marked down 2011-07-24 08:15:35 -05:00			`};`

			`/**`
			`* Helpers`
			`*/`

fix code blocks 2011-07-25 15:37:42 -05:00			`var escape = function(html) {`
marked down 2011-07-24 08:15:35 -05:00			`return html`
fix code blocks 2011-07-25 15:37:42 -05:00			`.replace(/&/g, '&')`
marked down 2011-07-24 08:15:35 -05:00			`.replace(/</g, '<')`
			`.replace(/>/g, '>')`
			`.replace(/"/g, '"')`
			`.replace(/'/g, ''');`
			`};`

			`var mangle = function(str) {`
			`var ch`
			`, i = 0`
			`, l = str.length`
			`, out = '';`

			`for (; i < l; i++) {`
			`ch = str[i].charCodeAt(0);`
			`if (Math.random() > .5) {`
			`ch = 'x' + ch.toString(16);`
			`}`
			`out += '&#' + ch + ';';`
			`}`

			`return out;`
			`};`

			`/**`
			`* Expose`
			`*/`

			`exports = function(str) {`
			`return parse(lex(str));`
			`};`

			`exports.parser = parse;`
			`exports.lexer = lex;`

recursion 2011-08-13 16:38:46 -05:00			`module.exports = exports;`