marked/lib/marked.js

537 lines
12 KiB
JavaScript
Raw Normal View History

2011-07-24 08:15:35 -05:00
/**
* marked - A markdown parser
* Copyright (c) 2011, Christopher Jeffrey. (MIT Licensed)
*/
2011-08-13 23:19:46 -05:00
;(function() {
2011-07-24 08:15:35 -05:00
/**
* Block-Level Grammar
*/
2011-08-13 23:19:46 -05:00
var block = {
2011-08-18 18:59:42 -05:00
newline: /^\n+/,
2011-08-16 08:37:10 -05:00
block: /^ {4,}[^\n]*(?:\n {4,}[^\n]*)*/,
2011-07-24 08:15:35 -05:00
heading: /^ *(#{1,6}) *([^\n#]*) *#*/,
lheading: /^([^\n]+)\n *(=|-){3,}/,
hr: /^( ?[\-*_]){3,}/,
blockquote: /^ *>[^\n]*(?:\n *>[^\n]*)*/,
2011-08-20 14:27:46 -05:00
//list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]+(?:\n[^\n]+)*(?:\n{1,2}|$)){2,}/,
//list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]+(?:\n[^\n]*)*){2,}(?:\n\n\1?(?!\*|\+|-|\d\.)|$)/,
//list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]+(?:\n[^\n]+|\n\n)*){2,}/,
//list: /^( *)(\*|\+|-|\d+\.)[\s\S]+?(\1\2[\s\S]+?(?=\1\2))*/,
// simple
//list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]*(?:\n\1 [^\n]+)*){2,}/,
// very good, does it all
//list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]*(?:\n(?:\1 |)[^\n]+)*){2,}/,
// better, faster
//list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]*(?:\n[^\n]+)*){2,}/,
// fastest
//list: /^( *)(\*|\+|-|\d+\.)[^\n]*(?:\n[^\n]+)*/,
// fastest with loose list support
list: /^( *)(\*|\+|-|\d+\.)[^\n]*(?:\n[^\n]+|\n{2}\1\2[^\n]*)*/,
// the first list item
//list: /^( *)(\*|\+|-|\d+\.)[^\n]*(?:\n(?!\1\2)[^\n]+)*/,
// all list items
//list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]*(?:\n(?!\1\2)[^\n]+)*){2,}/,
// need the question mark because a trailing list item might not have a newline after it
//list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]*\n(?:(?!\1\2)[^\n]+\n?)*){2,}/,
// instead of question mark...
//list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]*\n(?:(?!\1\2)[^\n]+(?:\n|$))*){2,}/,
//list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]*\n(?:(?!\1\2)[^\n]+(?:\n|$))*){2,}/,
2011-07-24 08:15:35 -05:00
html: /^<([^\/\s>]+)[^\n>]*>[^\n]*(?:\n[^\n]+)*\n?<\/\1>/,
text: /^[^\n]+/
};
2011-08-14 02:35:37 -05:00
block.keys = [
'newline',
'block',
'heading',
'lheading',
'hr',
'blockquote',
'list',
'html',
'text'
];
2011-07-24 08:15:35 -05:00
/**
2011-08-13 17:06:08 -05:00
* Lexer
2011-07-24 08:15:35 -05:00
*/
2011-08-13 23:19:46 -05:00
block.lexer = function(str) {
2011-08-13 17:06:08 -05:00
var tokens = []
, links = {};
2011-07-24 08:15:35 -05:00
2011-08-13 17:06:08 -05:00
// normalize whitespace
str = str.replace(/\r\n/g, '\n')
2011-08-14 02:47:56 -05:00
.replace(/\r/g, '\n')
.replace(/\t/g, ' ');
2011-08-13 23:19:46 -05:00
// experimental
2011-08-20 08:11:49 -05:00
// str = str.replace(/^ +$/gm, '');
2011-08-13 17:06:08 -05:00
2011-08-13 23:19:46 -05:00
// grab link definitons
2011-08-13 17:06:08 -05:00
str = str.replace(
2011-08-16 08:37:10 -05:00
/^ {0,3}\[([^\]]+)\]: *([^ ]+)(?: +"([^"]+)")? *(?:\n|$)/gm,
2011-08-13 17:06:08 -05:00
function(_, id, href, title) {
2011-08-13 23:19:46 -05:00
links[id] = {
href: href,
title: title
};
return '';
}
);
2011-08-13 17:06:08 -05:00
tokens.links = links;
2011-07-24 08:15:35 -05:00
2011-08-18 18:59:42 -05:00
return block.token(str, tokens);
2011-08-13 17:06:08 -05:00
};
2011-08-13 16:53:15 -05:00
2011-08-18 18:59:42 -05:00
block.token = function(str, tokens) {
2011-08-14 01:02:14 -05:00
var rules = block
, keys = block.keys
2011-08-14 01:14:29 -05:00
, len = keys.length
2011-08-13 23:36:44 -05:00
, key
2011-08-20 10:35:40 -05:00
, cap
, loose;
2011-08-13 23:36:44 -05:00
2011-08-14 01:02:14 -05:00
var scan = function() {
if (!str) return;
2011-08-14 01:04:35 -05:00
for (var i = 0; i < len; i++) {
2011-08-14 01:02:14 -05:00
key = keys[i];
if (cap = rules[key].exec(str)) {
str = str.substring(cap[0].length);
return true;
}
}
};
2011-08-14 01:04:35 -05:00
while (scan()) {
switch (key) {
case 'newline':
2011-08-18 18:59:42 -05:00
if (cap[0].length > 1) {
tokens.push({
type: 'space'
});
}
2011-08-14 01:04:35 -05:00
break;
case 'hr':
tokens.push({
2011-08-18 18:59:42 -05:00
type: 'hr'
2011-08-14 01:04:35 -05:00
});
break;
case 'lheading':
tokens.push({
type: 'heading',
depth: cap[2] === '=' ? 1 : 2,
2011-08-18 18:59:42 -05:00
text: cap[1]
2011-08-14 01:04:35 -05:00
});
break;
case 'heading':
tokens.push({
type: 'heading',
depth: cap[1].length,
2011-08-18 18:59:42 -05:00
text: cap[2]
2011-08-14 01:04:35 -05:00
});
break;
case 'block':
cap = cap[0].replace(/^ {4}/gm, '');
2011-08-13 16:53:15 -05:00
tokens.push({
2011-08-14 01:04:35 -05:00
type: 'block',
2011-08-18 18:59:42 -05:00
text: cap
2011-08-13 16:53:15 -05:00
});
2011-08-14 01:04:35 -05:00
break;
case 'list':
tokens.push({
type: 'list_start',
2011-08-20 09:01:22 -05:00
ordered: isFinite(cap[2])
2011-08-14 01:04:35 -05:00
});
2011-08-20 14:27:46 -05:00
//loose = /\n *\n *(?:\*|\+|-|\d+\.)/.test(cap[0]);
loose = false; // /\n *\n(?! *$)/.test(cap[0]);
2011-08-20 11:03:05 -05:00
//loose = '\n *\n' + cap[1]
// + '(?:\\*|\\+|-|\\d+\\.)';
//loose = new RegExp(loose).test(cap[0]);
2011-08-14 01:04:35 -05:00
// get each top-level
// item in the list
2011-08-20 14:27:46 -05:00
console.log('----');
console.log(cap[0]);
console.log('----');
2011-08-14 01:04:35 -05:00
cap = cap[0].match(
2011-08-20 14:27:46 -05:00
// /^( *)(\*|\+|-|\d+\.)[^\n]+(?:\n\1+(?!\*|\+|-|\d\.)[^\n]*)*/gm
// /^( *)(\*|\+|-|\d+\.)[^\n]+(?:\n\1+(?!\*|\+|-|\d\.)[^\n]*|\n)*/gm
/^( *)(\*|\+|-|\d+\.)[^\n]*(?:\n(?!\1\2)[^\n]*)*/gm
2011-08-20 08:11:49 -05:00
);
2011-08-14 02:35:37 -05:00
each(cap, function(item) {
2011-08-14 01:04:35 -05:00
// remove the list items sigil
// so its seen as the next token
item = item.replace(/^ *(\*|\+|-|\d+\.) */, '');
// outdent whatever the
// list item contains, hacky
2011-08-16 08:54:34 -05:00
var space = /\n( +)/.exec(item);
if (space) {
space = new RegExp('^' + space[1], 'gm');
item = item.replace(space, '');
2011-08-14 01:04:35 -05:00
}
tokens.push({
2011-08-20 10:46:35 -05:00
type: loose
? 'loose_item_start'
: 'list_item_start'
2011-08-14 01:04:35 -05:00
});
2011-08-18 18:59:42 -05:00
block.token(item, tokens);
2011-08-14 01:04:35 -05:00
tokens.push({
2011-08-18 18:59:42 -05:00
type: 'list_item_end'
2011-08-14 01:04:35 -05:00
});
});
tokens.push({
2011-08-18 18:59:42 -05:00
type: 'list_end'
2011-08-14 01:04:35 -05:00
});
break;
case 'html':
case 'text':
tokens.push({
type: key,
2011-08-18 18:59:42 -05:00
text: cap[0]
2011-08-14 01:04:35 -05:00
});
break;
case 'blockquote':
tokens.push({
2011-08-18 18:59:42 -05:00
type: 'blockquote_start'
2011-08-14 01:04:35 -05:00
});
2011-08-16 08:44:41 -05:00
cap = cap[0].replace(/^ *>/gm, '');
2011-08-18 18:59:42 -05:00
block.token(cap, tokens);
2011-08-13 16:53:15 -05:00
tokens.push({
2011-08-18 18:59:42 -05:00
type: 'blockquote_end'
2011-08-13 16:53:15 -05:00
});
2011-08-14 01:04:35 -05:00
break;
}
2011-08-13 16:53:15 -05:00
}
2011-07-24 08:15:35 -05:00
return tokens;
};
/**
* Inline Processing
*/
2011-08-13 23:19:46 -05:00
var inline = {
2011-08-14 09:16:22 -05:00
escape: /^\\([\\`*{}\[\]()#+\-.!])/,
2011-08-18 19:31:53 -05:00
autolink: /^<([^ >]+(@|:\/)[^ >]+)>/,
2011-08-13 23:19:46 -05:00
tag: /^<[^\n>]+>/,
2011-08-14 18:44:38 -05:00
link: /^!?\[([^\]]+)\]\(([^\)]+)\)/,
2011-08-14 09:11:25 -05:00
reflink: /^!?\[([^\]]+)\]\[([^\]]+)\]/,
2011-08-13 23:19:46 -05:00
strong: /^__([\s\S]+?)__|^\*\*([\s\S]+?)\*\*/,
em: /^_([^_]+)_|^\*([^*]+)\*/,
2011-08-14 09:11:25 -05:00
code: /^`([^`]+)`|^``([\s\S]+?)``/
2011-08-13 23:19:46 -05:00
};
2011-08-13 21:04:18 -05:00
2011-08-14 02:35:37 -05:00
inline.keys = [
2011-08-14 09:16:22 -05:00
'escape',
2011-08-14 02:35:37 -05:00
'autolink',
'tag',
'link',
'reflink',
'strong',
'em',
2011-08-14 09:11:25 -05:00
'code'
2011-08-14 02:35:37 -05:00
];
// hacky, but performant
inline.text = (function(rules) {
var keys = rules.keys
, i = 0
, l = keys.length
, body = [];
2011-08-13 21:04:18 -05:00
2011-08-14 02:35:37 -05:00
for (; i < l; i++) {
body.push(rules[keys[i]].source
.replace(/(^|[^\[])\^/g, '$1'));
}
keys.push('text');
return new RegExp(
'^([\\s\\S]+?)(?='
+ body.join('|')
+ '|$)'
);
})(inline);
2011-08-13 23:19:46 -05:00
/**
* Inline Lexer
*/
inline.lexer = function(str) {
2011-08-14 01:02:14 -05:00
var out = ''
, links = tokens.links
2011-08-18 19:26:23 -05:00
, link
2011-08-14 01:02:14 -05:00
, text
2011-08-15 20:42:41 -05:00
, href;
2011-08-14 01:02:14 -05:00
2011-08-13 23:19:46 -05:00
var rules = inline
, keys = inline.keys
2011-08-14 01:14:29 -05:00
, len = keys.length
2011-08-13 23:36:44 -05:00
, key
2011-08-14 01:02:14 -05:00
, cap;
2011-08-13 23:36:44 -05:00
2011-08-14 01:02:14 -05:00
var scan = function() {
if (!str) return;
2011-08-14 01:04:35 -05:00
for (var i = 0; i < len; i++) {
2011-08-13 23:19:46 -05:00
key = keys[i];
2011-08-14 01:02:14 -05:00
if (cap = rules[key].exec(str)) {
str = str.substring(cap[0].length);
return true;
2011-08-13 21:04:18 -05:00
}
}
2011-08-14 01:02:14 -05:00
};
2011-08-14 01:04:35 -05:00
while (scan()) {
switch (key) {
2011-08-14 09:16:22 -05:00
case 'escape':
out += cap[1];
break;
2011-08-14 01:04:35 -05:00
case 'tag':
out += cap[0];
break;
case 'link':
case 'reflink':
2011-08-14 09:11:25 -05:00
if (cap[0][0] !== '!') {
2011-08-14 18:44:38 -05:00
if (key === 'reflink') {
link = links[cap[2]];
2011-08-18 19:29:05 -05:00
if (!link) throw new
Error('Undefined Reference: ' + cap[2]);
2011-08-14 18:44:38 -05:00
} else {
2011-08-18 19:26:23 -05:00
link = {
href: cap[2],
title: cap[3]
};
2011-08-14 18:44:38 -05:00
}
2011-08-14 09:11:25 -05:00
out += '<a href="'
+ escape(link.href)
+ '"'
+ (link.title
? ' title="'
+ escape(link.title)
+ '"'
: '')
+ '>'
+ inline.lexer(cap[1])
+ '</a>';
} else {
2011-08-14 18:44:38 -05:00
if (key === 'reflink') {
link = links[cap[2]];
2011-08-18 19:29:05 -05:00
if (!link) throw new
Error('Undefined Reference: ' + cap[2]);
2011-08-14 18:44:38 -05:00
} else {
2011-08-16 08:37:10 -05:00
text = /^([^\s]+)\s*(.+)?$/.exec(cap[2]);
2011-08-18 19:26:23 -05:00
link = {
href: text[1],
title: text[2]
};
2011-08-14 18:44:38 -05:00
}
2011-08-14 09:11:25 -05:00
out += '<img src="'
+ escape(link.href)
+ '" alt="'
+ escape(cap[1])
+ '"'
+ (link.title
? ' title="'
+ escape(link.title)
+ '"'
: '')
+ '>';
}
2011-08-14 01:04:35 -05:00
break;
case 'autolink':
if (cap[2] === '@') {
text = mangle(cap[1]);
href = mangle('mailto:') + text;
} else {
text = escape(cap[1]);
href = text;
}
out += '<a href="' + href + '">'
+ text
+ '</a>';
break;
case 'strong':
out += '<strong>'
+ inline.lexer(cap[2] || cap[1])
+ '</strong>';
break;
case 'em':
out += '<em>'
+ inline.lexer(cap[2] || cap[1])
+ '</em>';
break;
2011-08-14 09:11:25 -05:00
case 'code':
2011-08-14 01:04:35 -05:00
out += '<code>'
+ escape(cap[2] || cap[1])
+ '</code>';
break;
case 'text':
out += escape(cap[1]);
break;
default:
break;
}
2011-08-13 23:19:46 -05:00
}
2011-08-13 21:04:18 -05:00
2011-08-13 23:19:46 -05:00
return out;
};
2011-08-13 21:04:18 -05:00
2011-07-24 08:15:35 -05:00
/**
* Parsing
*/
2011-08-13 17:06:08 -05:00
var tokens
, token;
2011-08-13 16:38:46 -05:00
2011-07-24 08:15:35 -05:00
var next = function() {
return token = tokens.pop();
};
var tok = function() {
switch (token.type) {
2011-08-18 18:59:42 -05:00
case 'space':
return '';
2011-07-24 08:15:35 -05:00
case 'hr':
return '<hr>';
case 'heading':
return '<h' + token.depth + '>'
2011-08-13 23:19:46 -05:00
+ inline.lexer(token.text)
2011-07-24 08:15:35 -05:00
+ '</h' + token.depth + '>';
case 'block':
return '<pre><code>'
+ escape(token.text)
+ '</code></pre>';
case 'blockquote_start':
var body = [];
while (next().type !== 'blockquote_end') {
body.push(tok());
}
return '<blockquote>'
+ body.join('')
+ '</blockquote>';
case 'list_start':
var body = []
, type = token.ordered ? 'ol' : 'ul';
while (next().type !== 'list_end') {
body.push(tok());
}
return '<' + type + '>'
+ body.join('')
+ '</' + type + '>';
case 'list_item_start':
var body = [];
while (next().type !== 'list_item_end') {
2011-08-20 10:46:35 -05:00
body.push(token.type === 'text'
2011-08-15 20:35:57 -05:00
? inline.lexer(token.text)
: tok());
2011-07-24 08:15:35 -05:00
}
return '<li>'
+ body.join(' ')
+ '</li>';
2011-08-20 10:46:35 -05:00
case 'loose_item_start':
2011-08-20 08:59:47 -05:00
var body = [];
while (next().type !== 'list_item_end') {
body.push(tok());
}
return '<li>'
+ body.join(' ')
2011-08-20 10:46:35 -05:00
+ '</li>';
2011-07-24 08:15:35 -05:00
case 'html':
2011-08-13 23:19:46 -05:00
return inline.lexer(token.text);
2011-07-24 08:15:35 -05:00
case 'text':
2011-08-16 08:33:06 -05:00
var body = [ token.text ]
, top;
while ((top = tokens[tokens.length-1])
2011-08-18 18:59:42 -05:00
&& top.type === 'text') {
2011-08-16 08:33:06 -05:00
body.push(next().text);
2011-07-24 08:15:35 -05:00
}
return '<p>'
2011-08-13 23:19:46 -05:00
+ inline.lexer(body.join(' '))
2011-07-24 08:15:35 -05:00
+ '</p>';
}
};
var parse = function(src) {
tokens = src.reverse();
var out = [];
while (next()) {
out.push(tok());
}
tokens = null;
token = null;
2011-07-25 15:37:42 -05:00
return out.join(' ');
2011-07-24 08:15:35 -05:00
};
/**
* Helpers
*/
2011-07-25 15:37:42 -05:00
var escape = function(html) {
2011-07-24 08:15:35 -05:00
return html
2011-07-25 15:37:42 -05:00
.replace(/&/g, '&amp;')
2011-07-24 08:15:35 -05:00
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&apos;');
};
var mangle = function(str) {
2011-08-14 02:47:56 -05:00
var out = ''
, ch
2011-07-24 08:15:35 -05:00
, i = 0
2011-08-14 02:47:56 -05:00
, l = str.length;
2011-07-24 08:15:35 -05:00
for (; i < l; i++) {
2011-08-13 23:19:46 -05:00
ch = str.charCodeAt(i);
if (Math.random() > 0.5) {
2011-07-24 08:15:35 -05:00
ch = 'x' + ch.toString(16);
}
out += '&#' + ch + ';';
}
return out;
};
2011-08-14 02:35:37 -05:00
var each = function(obj, func) {
var i = 0, l = obj.length;
for (; i < l; i++) func(obj[i]);
};
2011-07-24 08:15:35 -05:00
/**
* Expose
*/
2011-08-13 23:19:46 -05:00
var marked = function(str) {
return parse(block.lexer(str));
2011-07-24 08:15:35 -05:00
};
2011-08-13 23:19:46 -05:00
marked.parser = parse;
marked.lexer = block.lexer;
if (typeof module !== 'undefined') {
module.exports = marked;
} else {
this.marked = marked;
}
2011-07-24 08:15:35 -05:00
2011-08-13 23:19:46 -05:00
}).call(this);