/**
* marked - A markdown parser
* Copyright (c) 2011, Christopher Jeffrey. (MIT Licensed)
*/
/**
* Block-Level Grammar
*/
var rules = {
newline: /^\n/,
block: /^[ ]{4,}[^\n]*(?:\n[ ]{4,}[^\n]*)*/,
heading: /^ *(#{1,6}) *([^\n#]*) *#*/,
lheading: /^([^\n]+)\n *(=|-){3,}/,
hr: /^( ?[\-*_]){3,}/,
blockquote: /^ *>[^\n]*(?:\n *>[^\n]*)*/,
list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]+(?:\n(?:\1 )+[^\n]+)*(?:\n+|$)){2,}/g,
html: /^<([^\/\s>]+)[^\n>]*>[^\n]*(?:\n[^\n]+)*\n?<\/\1>/,
text: /^[^\n]+/
};
var keys = Object.keys(rules)
, len = keys.length;
/**
* Lexer
*/
var lex = function(str) {
var tokens = []
, links = {};
// normalize whitespace
str = str.replace(/\r\n/g, '\n')
.replace(/\r/g, '\n');
str = str.replace(/\t/g, ' ');
//str = str.replace(/(^|\n) +(\n|$)/g, '$1$2');
// unfortunately, this is the most
// performant method of getting link
// definitions out of the way.
str = str.replace(
/^ {0,3}\[([^\]]+)\]: *([^ ]+)(?: +"([^"]+)")?/gm,
function(_, id, href, title) {
links[id] = {
href: href,
title: title
};
return '';
});
tokens.links = links;
return lex.token(str, tokens, 0);
};
lex.token = function lex(str, tokens, line) {
while (str.length)
for (var i = 0; i < len; i++) {
var key = keys[i]
, rule = rules[key];
cap = rule.exec(str);
if (!cap) continue;
str = str.substring(cap[0].length);
switch (key) {
case 'newline':
line++;
break;
case 'hr':
tokens.push({
type: 'hr',
line: line
});
break;
case 'lheading':
tokens.push({
type: 'heading',
depth: cap[2] === '=' ? 1 : 2,
text: cap[1],
line: line
});
break;
case 'heading':
tokens.push({
type: 'heading',
depth: cap[1].length,
text: cap[2],
line: line
});
break;
case 'block':
cap = cap[0].replace(/^ {4}/gm, '');
tokens.push({
type: 'block',
text: cap,
line: line
});
break;
case 'list':
tokens.push({
type: 'list_start',
ordered: isFinite(cap[2]),
line: line
});
// get each top-level
// item in the list
cap = cap[0].match(
/^( *)(\*|\+|-|\d+\.)[^\n]+(?:\n(?:\1 )+[^\n]+)*/gm
);
cap.forEach(function(item) {
// remove the list items sigil
// so its seen as the next token
item = item.replace(/^ *(\*|\+|-|\d+\.) */, '');
// outdent whatever the
// list item contains, hacky
var len = /\n( +)/.exec(item);
if (len) {
len = len[1].length;
item = item.replace(
new RegExp('^ {' + len + '}', 'gm'),
''
);
}
tokens.push({
type: 'list_item_start',
line: line
});
// recurse
lex(item, tokens, line);
tokens.push({
type: 'list_item_end',
line: line
});
});
tokens.push({
type: 'list_end',
line: line
});
break;
case 'html':
case 'text':
tokens.push({
type: key,
text: cap[0],
line: line
});
break;
case 'blockquote':
tokens.push({
type: 'blockquote_start',
line: line
});
cap = cap[0].replace(/^ *>/gm, '');
// recurse
lex(cap, tokens, line);
tokens.push({
type: 'blockquote_end',
line: line
});
break;
}
break;
}
return tokens;
};
/**
* Inline Processing
*/
// this is really bad. i should define
// some lexemes for all of the inline stuff,
// but this was just easier for the time being.
var inline = function(str) {
// img
str = str.replace(
/!\[([^\]]+)\]\(([^\s\)]+)\s*([^\)]*)\)/g,
function(_, alt, src, title) {
return '';
});
// links
str = str.replace(
/\[([^\]]+)\]\(([^\)]+)\)/g,
'$1'
);
// This is [an example][id]
// reference-style link.
str = str.replace(
/\[([^\]]+)\]\[([^\]]+)\]/g,
function(__, text, id) {
var link = tokens.links[id];
return '' + text + '';
}
);
// for
' + escape(s) + '
';
});
// br
str = str.replace(/ $/gm, '
');
return str;
};
/**
* Parsing
*/
var tokens
, token;
var next = function() {
return token = tokens.pop();
};
var tok = function() {
switch (token.type) {
case 'hr':
return '
'
+ escape(token.text)
+ '
';
case 'blockquote_start':
var body = [];
while (next().type !== 'blockquote_end') {
body.push(tok());
}
return '' + body.join('') + ''; case 'list_start': var body = [] , type = token.ordered ? 'ol' : 'ul'; while (next().type !== 'list_end') { body.push(tok()); } return '<' + type + '>' + body.join('') + '' + type + '>'; case 'list_item_start': var body = []; while (next().type !== 'list_item_end') { if (token.type === 'text') { body.push(inline(token.text)); } else { body.push(tok()); } } return '
' + inline(body.join(' ')) + '
'; } }; var parse = function(src) { tokens = src.reverse(); var out = []; while (next()) { out.push(tok()); } tokens = null; token = null; return out.join(' '); }; /** * Helpers */ var escape = function(html) { return html .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); }; var mangle = function(str) { var ch , i = 0 , l = str.length , out = ''; for (; i < l; i++) { ch = str[i].charCodeAt(0); if (Math.random() > .5) { ch = 'x' + ch.toString(16); } out += '' + ch + ';'; } return out; }; /** * Expose */ exports = function(str) { return parse(lex(str)); }; exports.parser = parse; exports.lexer = lex; module.exports = exports;