/** * marked - A markdown parser * Copyright (c) 2011, Christopher Jeffrey. (MIT Licensed) */ /** * Block-Level Grammar */ var rules = { newline: /^\n/, block: /^[ ]{4,}[^\n]*(?:\n[ ]{4,}[^\n]*)*/, heading: /^ *(#{1,6}) *([^\n#]*) *#*/, lheading: /^([^\n]+)\n *(=|-){3,}/, hr: /^( ?[\-*_]){3,}/, blockquote: /^ *>[^\n]*(?:\n *>[^\n]*)*/, list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]+(?:\n(?:\1 )+[^\n]+)*(?:\n+|$)){2,}/g, html: /^<([^\/\s>]+)[^\n>]*>[^\n]*(?:\n[^\n]+)*\n?<\/\1>/, text: /^[^\n]+/ }; var keys = Object.keys(rules) , len = keys.length; /** * Lexer */ var lex = function(str) { var tokens = [] , links = {}; // normalize whitespace str = str.replace(/\r\n/g, '\n') .replace(/\r/g, '\n'); str = str.replace(/\t/g, ' '); //str = str.replace(/(^|\n) +(\n|$)/g, '$1$2'); // unfortunately, this is the most // performant method of getting link // definitions out of the way. str = str.replace( /^ {0,3}\[([^\]]+)\]: *([^ ]+)(?: +"([^"]+)")?/gm, function(_, id, href, title) { links[id] = { href: href, title: title }; return ''; }); tokens.links = links; return lex.token(str, tokens, 0); }; lex.token = function lex(str, tokens, line) { while (str.length) for (var i = 0; i < len; i++) { var key = keys[i] , rule = rules[key]; cap = rule.exec(str); if (!cap) continue; str = str.substring(cap[0].length); switch (key) { case 'newline': line++; break; case 'hr': tokens.push({ type: 'hr', line: line }); break; case 'lheading': tokens.push({ type: 'heading', depth: cap[2] === '=' ? 1 : 2, text: cap[1], line: line }); break; case 'heading': tokens.push({ type: 'heading', depth: cap[1].length, text: cap[2], line: line }); break; case 'block': cap = cap[0].replace(/^ {4}/gm, ''); tokens.push({ type: 'block', text: cap, line: line }); break; case 'list': tokens.push({ type: 'list_start', ordered: isFinite(cap[2]), line: line }); // get each top-level // item in the list cap = cap[0].match( /^( *)(\*|\+|-|\d+\.)[^\n]+(?:\n(?:\1 )+[^\n]+)*/gm ); cap.forEach(function(item) { // remove the list items sigil // so its seen as the next token item = item.replace(/^ *(\*|\+|-|\d+\.) */, ''); // outdent whatever the // list item contains, hacky var len = /\n( +)/.exec(item); if (len) { len = len[1].length; item = item.replace( new RegExp('^ {' + len + '}', 'gm'), '' ); } tokens.push({ type: 'list_item_start', line: line }); // recurse lex(item, tokens, line); tokens.push({ type: 'list_item_end', line: line }); }); tokens.push({ type: 'list_end', line: line }); break; case 'html': case 'text': tokens.push({ type: key, text: cap[0], line: line }); break; case 'blockquote': tokens.push({ type: 'blockquote_start', line: line }); cap = cap[0].replace(/^ *>/gm, ''); // recurse lex(cap, tokens, line); tokens.push({ type: 'blockquote_end', line: line }); break; } break; } return tokens; }; /** * Inline Processing */ var inline = (function() { var inline = { autolink: /^<([^ ]*(:|@)[^ ]*)>/, tag: /^<[^\n>]+>/, img: /^!\[([^\]]+)\]\(([^\s\)]+)\s*([^\)]*)\)/, link: /^\[([^\]]+)\]\(([^\)]+)\)/, reflink: /^\[([^\]]+)\]\[([^\]]+)\]/, //autolink: /^<([^\n>]+)>/, // broken right now strong: /^__([\s\S]+?)__|^\*\*([\s\S]+?)\*\*/, em: /^_([^_]+)_|^\*([^*]+)\*/, escape: /^`([^`]+)`|^``([\s\S]+?)``/ }; inline.text = new RegExp( '^([\\s\\S]+?)(?=' + Object.keys(inline).map(function(key) { return inline[key].source.replace(/(^|[^\[])\^/g, '$1'); }).join('|') + '|$)' ); inline.keys = Object.keys(inline); inline.len = inline.keys.length; /** * Inline Lexer */ inline.lexer = function(str) { var rules = inline , len = inline.len , keys = inline.keys , i , key , rule , out = '' , links = tokens.links; while (str.length) { for (i = 0; i < len; i++) { key = keys[i]; rule = rules[key]; cap = rule.exec(str); if (!cap) continue; str = str.substring(cap[0].length); switch (key) { case 'tag': out += cap[0]; break; case 'img': out += '' + escape(cap[1])
              + ''; break; case 'link': case 'reflink': var link = links[cap[2]] || ''; out += '' + inline.lexer(cap[1]) + ''; break; case 'autolink': var mailto = cap[2] === '@' && cap[1] //cap[2] , href = cap[2] === ':' && cap[1] //cap[1] , mail; if (mailto) { mailto = mangle(mailto); mail = mangle('mailto:') + mailto; } out += '' + (mailto || escape(href)) + ''; break; case 'strong': out += '' + inline.lexer(cap[2] || cap[1]) + ''; break; case 'em': out += '' + inline.lexer(cap[2] || cap[1]) + ''; break; case 'escape': out += '' + escape(cap[2] || cap[1]) + ''; break; case 'text': out += escape(cap[1]); break; default: break; } break; } } return out; }; return inline.lexer; })(); /** * Parsing */ var tokens , token; var next = function() { return token = tokens.pop(); }; var tok = function() { switch (token.type) { case 'hr': return '
'; case 'heading': return '' + inline(token.text) + ''; case 'block': return '
' 
        + escape(token.text)
        + '
'; case 'blockquote_start': var body = []; while (next().type !== 'blockquote_end') { body.push(tok()); } return '
' + body.join('') + '
'; case 'list_start': var body = [] , type = token.ordered ? 'ol' : 'ul'; while (next().type !== 'list_end') { body.push(tok()); } return '<' + type + '>' + body.join('') + ''; case 'list_item_start': var body = []; while (next().type !== 'list_item_end') { // TODO incorporate paragraph // list items here if (token.type === 'text') { body.push(inline(token.text)); } else { body.push(tok()); } } return '
  • ' + body.join(' ') + '
  • '; case 'html': return inline(token.text); case 'text': var body = [] , last = token.line; while (token && token.type === 'text') { if (token.line > last) break; last = token.line + 1; body.push(token.text); next(); } if (token) tokens.push(token); return '

    ' + inline(body.join(' ')) + '

    '; } }; var parse = function(src) { tokens = src.reverse(); var out = []; while (next()) { out.push(tok()); } tokens = null; token = null; return out.join(' '); }; /** * Helpers */ var escape = function(html) { return html .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); }; var mangle = function(str) { var ch , i = 0 , l = str.length , out = ''; for (; i < l; i++) { ch = str[i].charCodeAt(0); if (Math.random() > .5) { ch = 'x' + ch.toString(16); } out += '&#' + ch + ';'; } return out; }; /** * Expose */ exports = function(str) { return parse(lex(str)); }; exports.parser = parse; exports.lexer = lex; module.exports = exports;