marked/lib/marked.js

516 lines
11 KiB
JavaScript
Raw Normal View History

2011-07-24 08:15:35 -05:00
/**
* marked - A markdown parser
* Copyright (c) 2011, Christopher Jeffrey. (MIT Licensed)
*/
/**
* Block-Level Grammar
*/
var rules = {
newline: /^\n/,
block: /^[ ]{4,}[^\n]*(?:\n[ ]{4,}[^\n]*)*/,
heading: /^ *(#{1,6}) *([^\n#]*) *#*/,
lheading: /^([^\n]+)\n *(=|-){3,}/,
hr: /^( ?[\-*_]){3,}/,
blockquote: /^ *>[^\n]*(?:\n *>[^\n]*)*/,
list: /^(?:( *)(\*|\+|-|\d+\.)[^\n]+(?:\n(?:\1 )+[^\n]+)*(?:\n+|$)){2,}/g,
html: /^<([^\/\s>]+)[^\n>]*>[^\n]*(?:\n[^\n]+)*\n?<\/\1>/,
text: /^[^\n]+/
};
var keys = Object.keys(rules)
, len = keys.length;
/**
2011-08-13 17:06:08 -05:00
* Lexer
2011-07-24 08:15:35 -05:00
*/
2011-08-13 17:06:08 -05:00
var lex = function(str) {
var tokens = []
, links = {};
2011-07-24 08:15:35 -05:00
2011-08-13 17:06:08 -05:00
// normalize whitespace
str = str.replace(/\r\n/g, '\n')
.replace(/\r/g, '\n');
str = str.replace(/\t/g, ' ');
//str = str.replace(/(^|\n) +(\n|$)/g, '$1$2');
// unfortunately, this is the most
// performant method of getting link
// definitions out of the way.
str = str.replace(
/^ {0,3}\[([^\]]+)\]: *([^ ]+)(?: +"([^"]+)")?/gm,
function(_, id, href, title) {
links[id] = {
href: href,
title: title
};
return '';
});
tokens.links = links;
2011-07-24 08:15:35 -05:00
2011-08-13 17:06:08 -05:00
return lex.token(str, tokens, 0);
};
2011-08-13 16:53:15 -05:00
2011-08-13 17:06:08 -05:00
lex.token = function lex(str, tokens, line) {
while (str.length)
for (var i = 0; i < len; i++) {
var key = keys[i]
, rule = rules[key];
2011-08-13 16:53:15 -05:00
cap = rule.exec(str);
if (!cap) continue;
str = str.substring(cap[0].length);
switch (key) {
case 'newline':
line++;
break;
case 'hr':
tokens.push({
type: 'hr',
line: line
});
break;
case 'lheading':
tokens.push({
type: 'heading',
depth: cap[2] === '=' ? 1 : 2,
text: cap[1],
line: line
});
break;
case 'heading':
tokens.push({
type: 'heading',
depth: cap[1].length,
text: cap[2],
line: line
});
break;
case 'block':
cap = cap[0].replace(/^ {4}/gm, '');
tokens.push({
type: 'block',
text: cap,
line: line
});
break;
case 'list':
tokens.push({
type: 'list_start',
ordered: isFinite(cap[2]),
line: line
});
// get each top-level
// item in the list
cap = cap[0].match(
/^( *)(\*|\+|-|\d+\.)[^\n]+(?:\n(?:\1 )+[^\n]+)*/gm
);
cap.forEach(function(item) {
// remove the list items sigil
// so its seen as the next token
item = item.replace(/^ *(\*|\+|-|\d+\.) */, '');
// outdent whatever the
// list item contains, hacky
var len = /\n( +)/.exec(item);
if (len) {
len = len[1].length;
item = item.replace(
new RegExp('^ {' + len + '}', 'gm'),
''
);
}
tokens.push({
type: 'list_item_start',
line: line
});
2011-08-13 17:06:08 -05:00
// recurse
2011-08-13 16:53:15 -05:00
lex(item, tokens, line);
2011-08-13 17:06:08 -05:00
2011-08-13 16:53:15 -05:00
tokens.push({
type: 'list_item_end',
line: line
});
});
tokens.push({
type: 'list_end',
line: line
});
break;
case 'html':
case 'text':
tokens.push({
type: key,
text: cap[0],
line: line
});
break;
case 'blockquote':
tokens.push({
type: 'blockquote_start',
line: line
});
cap = cap[0].replace(/^ *>/gm, '');
2011-08-13 17:06:08 -05:00
// recurse
2011-08-13 16:53:15 -05:00
lex(cap, tokens, line);
2011-08-13 17:06:08 -05:00
2011-08-13 16:53:15 -05:00
tokens.push({
type: 'blockquote_end',
line: line
});
break;
}
break;
}
2011-07-24 08:15:35 -05:00
return tokens;
};
/**
* Inline Processing
*/
// this is really bad. i should define
// some lexemes for all of the inline stuff,
// but this was just easier for the time being.
2011-08-13 21:04:18 -05:00
if(0) var inline = function(str) {
2011-08-13 18:21:30 -05:00
var hash = ['#'];
str = str.replace(/#/g, '#0#');
str = str.replace(/`([^`]+)`/g, function(__, text) {
text = '<code>' + escape(text) + '</code>';
return '#' + (hash.push(text) - 1) + '#';
});
// for <http://hello.world/> links
2011-07-24 08:15:35 -05:00
str = str.replace(
2011-08-13 18:21:30 -05:00
/<([^<>:\/ ]+:(?:\/\/)?[^>\n]+?|[^<>\n]+?(@)[^<>\n]+?)>/g,
function(__, href, at) {
if (at) {
// according to the markdown "spec"
// we need to mangle email addresses
var href = mangle(href)
, mail = mangle('mailto:') + href;
return '<a href="' + mail + '">' + href + '</a>';
}
return '<a href="' + href + '">' + href + '</a>';
}
);
str = str.replace(/<[^\n>]+>/g, function(tag) {
return '#' + (hash.push(tag) - 1) + '#';
2011-07-24 08:15:35 -05:00
});
2011-08-13 18:21:30 -05:00
str = escape(str);
2011-07-24 08:15:35 -05:00
// links
str = str.replace(
/\[([^\]]+)\]\(([^\)]+)\)/g,
'<a href="$2">$1</a>'
);
// This is [an example][id]
// reference-style link.
str = str.replace(
/\[([^\]]+)\]\[([^\]]+)\]/g,
2011-08-13 17:06:08 -05:00
function(__, text, id) {
var link = tokens.links[id];
return '<a href="'
+ link.href + '"'
+ (link.title
? ' title="'
+ link.title + '"'
: '')
+ '>' + text + '</a>';
}
);
2011-07-24 08:15:35 -05:00
2011-08-13 18:21:30 -05:00
// img
2011-07-24 08:15:35 -05:00
str = str.replace(
2011-08-13 18:21:30 -05:00
/!\[([^\]]+)\]\(([^\s\)]+)\s*([^\)]*)\)/g,
function(_, alt, src, title) {
return '<img src="'
+ src + '" alt="'
+ alt + '"'
+ (title
? ' title="' + title + '"'
: '')
+ '>';
});
2011-07-24 08:15:35 -05:00
// strong
str = str.replace(/__([^_]+)__/g, '<strong>$1</strong>');
str = str.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>');
// em
str = str.replace(/_([^_]+)_/g, '<em>$1</em>');
str = str.replace(/\*([^*]+)\*/g, '<em>$1</em>');
// br
str = str.replace(/ $/gm, '<br>');
2011-08-13 18:21:30 -05:00
str = str.replace(/#(\d+)#/g, function(__, i) {
return hash[i];
});
2011-07-24 08:15:35 -05:00
return str;
};
2011-08-13 21:04:18 -05:00
var inline = (function() {
var inline = {
tag: /^<[^\n>]+>/,
img: /^!\[([^\]]+)\]\(([^\s\)]+)\s*([^\)]*)\)/,
link: /^\[([^\]]+)\]\(([^\)]+)\)/,
reflink: /^\[([^\]]+)\]\[([^\]]+)\]/,
autolink: /^<([^<>:\/ ]+:(?:\/\/)?[^>\n]+?)|([^<>\n]+?@[^<>\n]+?)>/,
strong: /^__([\s\S]+?)__|^\*\*([\s\S]+?)\*\*/,
em: /^_([^_]+)_|^\*([^*]+)\*/,
escape: /^`([^`]+)`|^``([\s\S]+?)``/
};
inline.text = new RegExp(
'^([\\s\\S]+?)(?='
+ Object.keys(inline).map(function(key) {
return inline[key].source.replace(/(^|\|)\^/g, '$1');
}).join('|')
+ '|$)'
);
inline.keys = Object.keys(inline);
inline.len = inline.keys.length;
/**
* Inline Lexer
*/
inline.lexer = function(str) {
var rules = inline
, len = inline.len
, keys = inline.keys
, i
, key
, rule
2011-08-13 21:05:17 -05:00
, out = []
2011-08-13 21:04:18 -05:00
, links = tokens.links;
while (str.length) {
for (i = 0; i < len; i++) {
key = keys[i];
rule = rules[key];
cap = rule.exec(str);
if (!cap) continue;
str = str.substring(cap[0].length);
switch (key) {
case 'tag':
2011-08-13 21:05:17 -05:00
out.push(cap[0];
2011-08-13 21:04:18 -05:00
break;
case 'img':
2011-08-13 21:05:17 -05:00
out.push('<img src="'
2011-08-13 21:04:18 -05:00
+ escape(cap[2])
+ '" alt="' + escape(cap[1])
+ '"'
+ (cap[3]
? ' title="'
+ escape(cap[3])
+ '"'
: '')
2011-08-13 21:05:17 -05:00
+ '>');
2011-08-13 21:04:18 -05:00
break;
case 'link':
case 'reflink':
var link = links[cap[2]] || '';
2011-08-13 21:05:17 -05:00
out.push('<a href="'
2011-08-13 21:04:18 -05:00
+ escape(link.href || cap[2])
+ '"'
+ (link.title
? ' title="'
+ escape(link.title)
+ '"'
: '')
+ '>'
+ inline.lexer(cap[1])
2011-08-13 21:05:17 -05:00
+ '</a>');
2011-08-13 21:04:18 -05:00
break;
case 'autolink':
var mailto = cap[2]
, href = cap[1]
, mail;
if (mailto) {
mailto = mangle(mailto);
mail = mangle('mailto:') + mailto;
}
2011-08-13 21:05:17 -05:00
out.push('<a href="'
2011-08-13 21:04:18 -05:00
+ (mail || escape(href)) + '"'
+ '>'
+ (mailto || escape(href))
2011-08-13 21:05:17 -05:00
+ '</a>');
2011-08-13 21:04:18 -05:00
break;
case 'strong':
2011-08-13 21:05:17 -05:00
out.push('<strong>'
2011-08-13 21:04:18 -05:00
+ inline.lexer(cap[2] || cap[1])
2011-08-13 21:05:17 -05:00
+ '</strong>');
2011-08-13 21:04:18 -05:00
break;
case 'em':
2011-08-13 21:05:17 -05:00
out.push('<em>'
2011-08-13 21:04:18 -05:00
+ inline.lexer(cap[2] || cap[1])
2011-08-13 21:05:17 -05:00
+ '</em>');
2011-08-13 21:04:18 -05:00
break;
case 'escape':
2011-08-13 21:05:17 -05:00
out.push('<code>'
2011-08-13 21:04:18 -05:00
+ escape(cap[2] || cap[1])
2011-08-13 21:05:17 -05:00
+ '</code>');
2011-08-13 21:04:18 -05:00
break;
case 'text':
2011-08-13 21:05:17 -05:00
out.push(escape(cap[1]));
2011-08-13 21:04:18 -05:00
break;
default:
break;
}
break;
}
}
2011-08-13 21:05:17 -05:00
return out.join('');
2011-08-13 21:04:18 -05:00
};
return inline.lexer;
})();
2011-07-24 08:15:35 -05:00
/**
* Parsing
*/
2011-08-13 17:06:08 -05:00
var tokens
, token;
2011-08-13 16:38:46 -05:00
2011-07-24 08:15:35 -05:00
var next = function() {
return token = tokens.pop();
};
var tok = function() {
switch (token.type) {
case 'hr':
return '<hr>';
case 'heading':
return '<h' + token.depth + '>'
+ inline(token.text)
+ '</h' + token.depth + '>';
case 'block':
return '<pre><code>'
+ escape(token.text)
+ '</code></pre>';
case 'blockquote_start':
var body = [];
while (next().type !== 'blockquote_end') {
body.push(tok());
}
return '<blockquote>'
+ body.join('')
+ '</blockquote>';
case 'list_start':
var body = []
, type = token.ordered ? 'ol' : 'ul';
while (next().type !== 'list_end') {
body.push(tok());
}
return '<' + type + '>'
+ body.join('')
+ '</' + type + '>';
case 'list_item_start':
var body = [];
while (next().type !== 'list_item_end') {
2011-08-13 18:21:30 -05:00
// TODO incorporate paragraph
// list items here
2011-07-24 08:15:35 -05:00
if (token.type === 'text') {
body.push(inline(token.text));
} else {
body.push(tok());
}
}
return '<li>'
+ body.join(' ')
+ '</li>';
case 'html':
return inline(token.text);
case 'text':
var body = []
, last = token.line;
while (token && token.type === 'text') {
if (token.line > last) break;
last = token.line + 1;
body.push(token.text);
next();
}
if (token) tokens.push(token);
return '<p>'
+ inline(body.join(' '))
+ '</p>';
}
};
var parse = function(src) {
tokens = src.reverse();
var out = [];
while (next()) {
out.push(tok());
}
tokens = null;
token = null;
2011-07-25 15:37:42 -05:00
return out.join(' ');
2011-07-24 08:15:35 -05:00
};
/**
* Helpers
*/
2011-07-25 15:37:42 -05:00
var escape = function(html) {
2011-07-24 08:15:35 -05:00
return html
2011-07-25 15:37:42 -05:00
.replace(/&/g, '&amp;')
2011-07-24 08:15:35 -05:00
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&apos;');
};
var mangle = function(str) {
var ch
, i = 0
, l = str.length
, out = '';
for (; i < l; i++) {
ch = str[i].charCodeAt(0);
if (Math.random() > .5) {
ch = 'x' + ch.toString(16);
}
out += '&#' + ch + ';';
}
return out;
};
/**
* Expose
*/
exports = function(str) {
return parse(lex(str));
};
exports.parser = parse;
exports.lexer = lex;
2011-08-13 16:38:46 -05:00
module.exports = exports;