/** * marked - A markdown parser * Copyright (c) 2011, Christopher Jeffrey. (MIT Licensed) */ /** * Block-Level Grammar */ var rules = { newline: /^\n/, block: /^[ ]{4,}[^\n]*(?:\n[ ]{4,}[^\n]*)*/, heading: /^ *(#{1,6}) *([^\n#]*) *#*/, lheading: /^([^\n]+)\n *(=|-){3,}/, hr: /^( ?[\-*_]){3,}/, blockquote: /^ *>[^\n]*(?:\n *>[^\n]*)*/, list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]+(?:\n(?:\1 )+[^\n]+)*(?:\n+|$)){2,}/g, html: /^<([^\/\s>]+)[^\n>]*>[^\n]*(?:\n[^\n]+)*\n?<\/\1>/, text: /^[^\n]+/ }; var keys = Object.keys(rules) , len = keys.length; /** * Lexer */ var lex = function(str) { var tokens = [] , links = {}; // normalize whitespace str = str.replace(/\r\n/g, '\n') .replace(/\r/g, '\n'); str = str.replace(/\t/g, ' '); //str = str.replace(/(^|\n) +(\n|$)/g, '$1$2'); // unfortunately, this is the most // performant method of getting link // definitions out of the way. str = str.replace( /^ {0,3}\[([^\]]+)\]: *([^ ]+)(?: +"([^"]+)")?/gm, function(_, id, href, title) { links[id] = { href: href, title: title }; return ''; }); tokens.links = links; return lex.token(str, tokens, 0); }; lex.token = function lex(str, tokens, line) { while (str.length) for (var i = 0; i < len; i++) { var key = keys[i] , rule = rules[key]; cap = rule.exec(str); if (!cap) continue; str = str.substring(cap[0].length); switch (key) { case 'newline': line++; break; case 'hr': tokens.push({ type: 'hr', line: line }); break; case 'lheading': tokens.push({ type: 'heading', depth: cap[2] === '=' ? 1 : 2, text: cap[1], line: line }); break; case 'heading': tokens.push({ type: 'heading', depth: cap[1].length, text: cap[2], line: line }); break; case 'block': cap = cap[0].replace(/^ {4}/gm, ''); tokens.push({ type: 'block', text: cap, line: line }); break; case 'list': tokens.push({ type: 'list_start', ordered: isFinite(cap[2]), line: line }); // get each top-level // item in the list cap = cap[0].match( /^( *)(\*|\+|-|\d+\.)[^\n]+(?:\n(?:\1 )+[^\n]+)*/gm ); cap.forEach(function(item) { // remove the list items sigil // so its seen as the next token item = item.replace(/^ *(\*|\+|-|\d+\.) */, ''); // outdent whatever the // list item contains, hacky var len = /\n( +)/.exec(item); if (len) { len = len[1].length; item = item.replace( new RegExp('^ {' + len + '}', 'gm'), '' ); } tokens.push({ type: 'list_item_start', line: line }); // recurse lex(item, tokens, line); tokens.push({ type: 'list_item_end', line: line }); }); tokens.push({ type: 'list_end', line: line }); break; case 'html': case 'text': tokens.push({ type: key, text: cap[0], line: line }); break; case 'blockquote': tokens.push({ type: 'blockquote_start', line: line }); cap = cap[0].replace(/^ *>/gm, ''); // recurse lex(cap, tokens, line); tokens.push({ type: 'blockquote_end', line: line }); break; } break; } return tokens; }; /** * Inline Processing */ // this is really bad. i should define // some lexemes for all of the inline stuff, // but this was just easier for the time being. var inline = function(str) { var hash = ['#']; str = str.replace(/#/g, '#0#'); str = str.replace(/`([^`]+)`/g, function(__, text) { text = '' + escape(text) + ''; return '#' + (hash.push(text) - 1) + '#'; }); // for links str = str.replace( /<([^<>:\/ ]+:(?:\/\/)?[^>\n]+?|[^<>\n]+?(@)[^<>\n]+?)>/g, function(__, href, at) { if (at) { // according to the markdown "spec" // we need to mangle email addresses var href = mangle(href) , mail = mangle('mailto:') + href; return '' + href + ''; } return '' + href + ''; } ); str = str.replace(/<[^\n>]+>/g, function(tag) { return '#' + (hash.push(tag) - 1) + '#'; }); str = escape(str); // links str = str.replace( /\[([^\]]+)\]\(([^\)]+)\)/g, '$1' ); // This is [an example][id] // reference-style link. str = str.replace( /\[([^\]]+)\]\[([^\]]+)\]/g, function(__, text, id) { var link = tokens.links[id]; return '' + text + ''; } ); // img str = str.replace( /!\[([^\]]+)\]\(([^\s\)]+)\s*([^\)]*)\)/g, function(_, alt, src, title) { return ''
      + alt + ''; }); // strong str = str.replace(/__([^_]+)__/g, '$1'); str = str.replace(/\*\*([^*]+)\*\*/g, '$1'); // em str = str.replace(/_([^_]+)_/g, '$1'); str = str.replace(/\*([^*]+)\*/g, '$1'); // br str = str.replace(/ $/gm, '
'); str = str.replace(/#(\d+)#/g, function(__, i) { return hash[i]; }); return str; }; /** * Parsing */ var tokens , token; var next = function() { return token = tokens.pop(); }; var tok = function() { switch (token.type) { case 'hr': return '
'; case 'heading': return '' + inline(token.text) + ''; case 'block': return '
' 
        + escape(token.text)
        + '
'; case 'blockquote_start': var body = []; while (next().type !== 'blockquote_end') { body.push(tok()); } return '
' + body.join('') + '
'; case 'list_start': var body = [] , type = token.ordered ? 'ol' : 'ul'; while (next().type !== 'list_end') { body.push(tok()); } return '<' + type + '>' + body.join('') + ''; case 'list_item_start': var body = []; while (next().type !== 'list_item_end') { // TODO incorporate paragraph // list items here if (token.type === 'text') { body.push(inline(token.text)); } else { body.push(tok()); } } return '
  • ' + body.join(' ') + '
  • '; case 'html': return inline(token.text); case 'text': var body = [] , last = token.line; while (token && token.type === 'text') { if (token.line > last) break; last = token.line + 1; body.push(token.text); next(); } if (token) tokens.push(token); return '

    ' + inline(body.join(' ')) + '

    '; } }; var parse = function(src) { tokens = src.reverse(); var out = []; while (next()) { out.push(tok()); } tokens = null; token = null; return out.join(' '); }; /** * Helpers */ var escape = function(html) { return html .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); }; var mangle = function(str) { var ch , i = 0 , l = str.length , out = ''; for (; i < l; i++) { ch = str[i].charCodeAt(0); if (Math.random() > .5) { ch = 'x' + ch.toString(16); } out += '&#' + ch + ';'; } return out; }; /** * Expose */ exports = function(str) { return parse(lex(str)); }; exports.parser = parse; exports.lexer = lex; module.exports = exports;