jean-pierre charras d17cef95fd Add an other markdown to html converter (sundown), better to convert markdown.
The previous converter (maddy) is still available, during some time.
We therefore have the time to choose between them.
sundown is better to convert a md text, but is written to C
maddy has a few issues to convert a md text, but is written to C++
Both have no dependency.
2019-10-01 18:53:23 +02:00

510 lines
13 KiB
C

/*
* Copyright (c) 2011, Vicent Marti
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "buffer.h"
#include "html.h"
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#if defined(_WIN32)
#define snprintf _snprintf
#endif
struct smartypants_data
{
int in_squote;
int in_dquote;
};
static size_t smartypants_cb__ltag( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__dquote( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__amp( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__period( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__number( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__dash( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__parens( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__squote( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__backtick( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t smartypants_cb__escape( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size );
static size_t( *smartypants_cb_ptrs[] )
( struct buf*, struct smartypants_data*, uint8_t, const uint8_t*, size_t ) =
{
NULL, /* 0 */
smartypants_cb__dash, /* 1 */
smartypants_cb__parens, /* 2 */
smartypants_cb__squote, /* 3 */
smartypants_cb__dquote, /* 4 */
smartypants_cb__amp, /* 5 */
smartypants_cb__period, /* 6 */
smartypants_cb__number, /* 7 */
smartypants_cb__ltag, /* 8 */
smartypants_cb__backtick, /* 9 */
smartypants_cb__escape, /* 10 */
};
static const uint8_t smartypants_cb_chars[] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 4, 0, 0, 0, 5, 3, 2, 0, 0, 0, 0, 1, 6, 0,
0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0,
9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static inline int word_boundary( uint8_t c )
{
return c == 0 || isspace( c ) || ispunct( c );
}
static int smartypants_quotes( struct buf* ob,
uint8_t previous_char,
uint8_t next_char,
uint8_t quote,
int* is_open )
{
char ent[8];
if( *is_open && !word_boundary( next_char ) )
return 0;
if( !(*is_open) && !word_boundary( previous_char ) )
return 0;
snprintf( ent, sizeof(ent), "&%c%cquo;", (*is_open) ? 'r' : 'l', quote );
*is_open = !(*is_open);
bufputs( ob, ent );
return 1;
}
static size_t smartypants_cb__squote( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( size >= 2 )
{
uint8_t t1 = tolower( text[1] );
if( t1 == '\'' )
{
if( smartypants_quotes( ob, previous_char, size >= 3 ? text[2] : 0, 'd',
&smrt->in_dquote ) )
return 1;
}
if( (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd')
&& ( size == 3 || word_boundary( text[2] ) ) )
{
BUFPUTSL( ob, "&rsquo;" );
return 0;
}
if( size >= 3 )
{
uint8_t t2 = tolower( text[2] );
if( ( (t1 == 'r' && t2 == 'e')
|| (t1 == 'l' && t2 == 'l')
|| (t1 == 'v' && t2 == 'e') )
&& ( size == 4 || word_boundary( text[3] ) ) )
{
BUFPUTSL( ob, "&rsquo;" );
return 0;
}
}
}
if( smartypants_quotes( ob, previous_char, size > 0 ? text[1] : 0, 's', &smrt->in_squote ) )
return 0;
bufputc( ob, text[0] );
return 0;
}
static size_t smartypants_cb__parens( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( size >= 3 )
{
uint8_t t1 = tolower( text[1] );
uint8_t t2 = tolower( text[2] );
if( t1 == 'c' && t2 == ')' )
{
BUFPUTSL( ob, "&copy;" );
return 2;
}
if( t1 == 'r' && t2 == ')' )
{
BUFPUTSL( ob, "&reg;" );
return 2;
}
if( size >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' )
{
BUFPUTSL( ob, "&trade;" );
return 3;
}
}
bufputc( ob, text[0] );
return 0;
}
static size_t smartypants_cb__dash( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( size >= 3 && text[1] == '-' && text[2] == '-' )
{
BUFPUTSL( ob, "&mdash;" );
return 2;
}
if( size >= 2 && text[1] == '-' )
{
BUFPUTSL( ob, "&ndash;" );
return 1;
}
bufputc( ob, text[0] );
return 0;
}
static size_t smartypants_cb__amp( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( size >= 6 && memcmp( text, "&quot;", 6 ) == 0 )
{
if( smartypants_quotes( ob, previous_char, size >= 7 ? text[6] : 0, 'd',
&smrt->in_dquote ) )
return 5;
}
if( size >= 4 && memcmp( text, "&#0;", 4 ) == 0 )
return 3;
bufputc( ob, '&' );
return 0;
}
static size_t smartypants_cb__period( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( size >= 3 && text[1] == '.' && text[2] == '.' )
{
BUFPUTSL( ob, "&hellip;" );
return 2;
}
if( size >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' )
{
BUFPUTSL( ob, "&hellip;" );
return 4;
}
bufputc( ob, text[0] );
return 0;
}
static size_t smartypants_cb__backtick( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( size >= 2 && text[1] == '`' )
{
if( smartypants_quotes( ob, previous_char, size >= 3 ? text[2] : 0, 'd',
&smrt->in_dquote ) )
return 1;
}
return 0;
}
static size_t smartypants_cb__number( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( word_boundary( previous_char ) && size >= 3 )
{
if( text[0] == '1' && text[1] == '/' && text[2] == '2' )
{
if( size == 3 || word_boundary( text[3] ) )
{
BUFPUTSL( ob, "&frac12;" );
return 2;
}
}
if( text[0] == '1' && text[1] == '/' && text[2] == '4' )
{
if( size == 3 || word_boundary( text[3] )
|| (size >= 5 && tolower( text[3] ) == 't' && tolower( text[4] ) == 'h') )
{
BUFPUTSL( ob, "&frac14;" );
return 2;
}
}
if( text[0] == '3' && text[1] == '/' && text[2] == '4' )
{
if( size == 3 || word_boundary( text[3] )
|| (size >= 6 && tolower( text[3] ) == 't' && tolower( text[4] ) == 'h'
&& tolower( text[5] ) == 's') )
{
BUFPUTSL( ob, "&frac34;" );
return 2;
}
}
}
bufputc( ob, text[0] );
return 0;
}
static size_t smartypants_cb__dquote( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( !smartypants_quotes( ob, previous_char, size > 0 ? text[1] : 0, 'd', &smrt->in_dquote ) )
BUFPUTSL( ob, "&quot;" );
return 0;
}
static size_t smartypants_cb__ltag( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
static const char* skip_tags[] =
{
"pre", "code", "var", "samp", "kbd", "math", "script", "style"
};
static const size_t skip_tags_count = 8;
size_t tag, i = 0;
while( i < size && text[i] != '>' )
i++;
for( tag = 0; tag < skip_tags_count; ++tag )
{
if( sdhtml_is_tag( text, size, skip_tags[tag] ) == HTML_TAG_OPEN )
break;
}
if( tag < skip_tags_count )
{
for( ; ; )
{
while( i < size && text[i] != '<' )
i++;
if( i == size )
break;
if( sdhtml_is_tag( text + i, size - i, skip_tags[tag] ) == HTML_TAG_CLOSE )
break;
i++;
}
while( i < size && text[i] != '>' )
i++;
}
bufput( ob, text, i + 1 );
return i;
}
static size_t smartypants_cb__escape( struct buf* ob,
struct smartypants_data* smrt,
uint8_t previous_char,
const uint8_t* text,
size_t size )
{
if( size < 2 )
return 0;
switch( text[1] )
{
case '\\':
case '"':
case '\'':
case '.':
case '-':
case '`':
bufputc( ob, text[1] );
return 1;
default:
bufputc( ob, '\\' );
return 0;
}
}
#if 0
static struct
{
uint8_t c0;
const uint8_t* pattern;
const uint8_t* entity;
int skip;
}
smartypants_subs[] =
{
{ '\'', "'s>", "&rsquo;", 0 },
{ '\'', "'t>", "&rsquo;", 0 },
{ '\'', "'re>", "&rsquo;", 0 },
{ '\'', "'ll>", "&rsquo;", 0 },
{ '\'', "'ve>", "&rsquo;", 0 },
{ '\'', "'m>", "&rsquo;", 0 },
{ '\'', "'d>", "&rsquo;", 0 },
{ '-', "--", "&mdash;", 1 },
{ '-', "<->", "&ndash;", 0 },
{ '.', "...", "&hellip;", 2 },
{ '.', ". . .", "&hellip;", 4 },
{ '(', "(c)", "&copy;", 2 },
{ '(', "(r)", "&reg;", 2 },
{ '(', "(tm)", "&trade;", 3 },
{ '3', "<3/4>", "&frac34;", 2 },
{ '3', "<3/4ths>", "&frac34;", 2 },
{ '1', "<1/2>", "&frac12;", 2 },
{ '1', "<1/4>", "&frac14;", 2 },
{ '1', "<1/4th>", "&frac14;", 2 },
{ '&', "&#0;", 0, 3 },
};
#endif
void sdhtml_smartypants( struct buf* ob, const uint8_t* text, size_t size )
{
size_t i;
struct smartypants_data smrt = { 0, 0 };
if( !text )
return;
bufgrow( ob, size );
for( i = 0; i < size; ++i )
{
size_t org;
uint8_t action = 0;
org = i;
while( i < size && (action = smartypants_cb_chars[text[i]]) == 0 )
i++;
if( i > org )
bufput( ob, text + org, i - org );
if( i < size )
{
i += smartypants_cb_ptrs[(int) action]
( ob, &smrt, i ? text[i - 1] : 0, text + i, size - i );
}
}
}