3 # MultiMarkdown -- A modification of John Gruber's original Markdown
4 # that adds new features and an output format that can more readily
5 # be converted into other document formats
7 # $Id: MultiMarkdown.pl 492 2008-01-18 23:08:43Z fletcher $
9 # Original Code Copyright (c) 2004-2007 John Gruber
10 # <http://daringfireball.net/projects/markdown/>
12 # MultiMarkdown changes Copyright (c) 2005-2008 Fletcher T. Penney
13 # <http://fletcherpenney.net/>
15 # MultiMarkdown Version 2.0.b5
17 # Based on Markdown.pl 1.0.2b8 - Wed 09 May 2007
20 # TODO: Change math mode delimiter?
21 # TODO: WikiWords inside of MMD links are converted to wiki links
22 # TODO: Still need to get the glossary working in non-memoir documents
23 # TODO: A mechanism to include arbitrary code (LaTeX, etc) without being "ugly"
24 # TODO: Look into discussion re: assigning classes to div's/span's on Markdown list.
25 # TODO: Should I just scrap the WikiWords feature to get rid of all the trouble it causes?
26 # TODO: Improve support for tables with long items and overall width in LaTeX
27 # TODO: Need a syntax for long table cells in MMD, even if no rowspan feature yet
28 # TODO: Create utilities to convert MMD tables to/from tab-delimited
37 # Include ASCIIMathML.pm
38 my $me = $0; # Where am I?
40 # Am I running in Windoze?
44 $me = dirname($me)."\\"; # Get just the directory portion
46 $me = dirname(readlink($me))."/"; # Get just the directory portion
49 require $me ."ASCIIMathML.pm";
51 use Digest::MD5 qw(md5_hex);
52 use vars qw($VERSION $g_use_metadata $g_use_wiki_links $g_base_url
53 $g_bibliography_title $g_allow_mathml $g_base_header_level $mathParser);
56 $mathParser = new Text::ASCIIMathML();
58 ## Disabled; causes problems under Perl 5.6.1:
60 # binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
63 # Global default settings:
65 my $g_empty_element_suffix = " />"; # Change to ">" for HTML output
67 my $g_allow_mathml = 1;
68 my $g_base_header_level = 1;
69 my $g_wikilinks_kill_switch = 1; # WikiLinks may become deprecated; this is the first step
75 # Reusable patterns to match balanced [brackets] and (parens). See
76 # Friedl's "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
77 my ($g_nested_brackets, $g_nested_parens);
78 $g_nested_brackets = qr{
80 [^\[\]]+ # Anything other than brackets
83 (??{ $g_nested_brackets }) # Recursive set of nested brackets
88 # Doesn't allow for whitespace, because we're using it to match URLs:
89 $g_nested_parens = qr{
91 [^()\s]+ # Anything other than parens or whitespace
94 (??{ $g_nested_parens }) # Recursive set of nested brackets
100 # Table of hash values for escaped characters:
102 foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
103 $g_escape_table{$char} = md5_hex($char);
107 # Global hashes, used by various utility routines
110 my %g_html_blocks = ();
112 my %g_metadata_newline = ();
113 my %g_crossrefs = ();
114 my %g_footnotes = ();
115 my %g_attributes = ();
116 my @g_used_footnotes = ();
117 my $g_footnote_counter = 0;
119 my $g_citation_counter = 0;
120 my @g_used_references = ();
121 my %g_references = ();
122 $g_bibliography_title = "Bibliography";
125 $g_metadata_newline{default} = "\n";
126 $g_metadata_newline{keywords} = ", ";
127 my $g_document_format = "";
129 # For use with WikiWords and [[Wiki Links]]
130 $g_use_wiki_links = 0;
131 $g_base_url = ""; # This is the base url to be used for WikiLinks
132 my $g_temp_no_wikiwords = 0;
135 # You can use \WikiWord to prevent a WikiWord from being treated as a link
138 # Used to track when we're inside an ordered or unordered list
139 # (see _ProcessListItems() for details):
140 my $g_list_level = 0;
143 #### Blosxom plug-in interface ##########################################
145 # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
146 # which posts Markdown should process, using a "meta-markup: markdown"
147 # header. If it's set to 0 (the default), Markdown will process all
149 my $g_blosxom_use_meta = 0;
153 my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
155 if ( (! $g_blosxom_use_meta) or
156 (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
158 $$body_ref = Markdown($$body_ref);
164 #### Movable Type plug-in interface #####################################
165 eval {require MT}; # Test to see if we're running in MT.
169 require MT::Template::Context;
170 import MT::Template::Context;
172 eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0.
176 my $plugin = new MT::Plugin({
178 description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
179 doc_link => 'http://daringfireball.net/projects/markdown/'
181 MT->add_plugin( $plugin );
184 MT::Template::Context->add_container_tag(MarkdownOptions => sub {
187 my $builder = $ctx->stash('builder');
188 my $tokens = $ctx->stash('tokens');
190 if (defined ($args->{'output'}) ) {
191 $ctx->stash('markdown_output', lc $args->{'output'});
194 defined (my $str = $builder->build($ctx, $tokens) )
195 or return $ctx->error($builder->errstr);
199 MT->add_text_filter('markdown' => {
201 docs => 'http://daringfireball.net/projects/markdown/',
207 my $output = $ctx->stash('markdown_output');
208 if (defined $output && $output =~ m/^html/i) {
209 $g_empty_element_suffix = ">";
210 $ctx->stash('markdown_output', '');
212 elsif (defined $output && $output eq 'raw') {
214 $ctx->stash('markdown_output', '');
218 $g_empty_element_suffix = " />";
221 $text = $raw ? $text : Markdown($text);
226 # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
231 $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
235 MT->add_text_filter('markdown_with_smartypants' => {
236 label => 'Markdown With SmartyPants',
237 docs => 'http://daringfireball.net/projects/markdown/',
242 my $output = $ctx->stash('markdown_output');
243 if (defined $output && $output eq 'html') {
244 $g_empty_element_suffix = ">";
247 $g_empty_element_suffix = " />";
250 $text = Markdown($text);
251 $text = $smartypants->($text, '1');
257 #### BBEdit/command-line text filter interface ##########################
258 # Needs to be hidden from MT (and Blosxom when running in static mode).
260 # We're only using $blosxom::version once; tell Perl not to warn us:
262 unless ( defined($blosxom::version) ) {
265 #### Check for command-line switches: #################
268 Getopt::Long::Configure('pass_through');
269 GetOptions(\%cli_opts,
274 if ($cli_opts{'version'}) { # Version info
275 print "\nThis is Markdown, version $VERSION.\n";
276 print "Copyright 2004 John Gruber\n";
277 print "http://daringfireball.net/projects/markdown/\n\n";
280 if ($cli_opts{'shortversion'}) { # Just the version number string.
284 if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML
285 $g_empty_element_suffix = ">";
289 #### Process incoming text: ###########################
292 local $/; # Slurp the whole file
295 print Markdown($text);
303 # Main function. The order in which other subs are called here is
304 # essential. Link and image substitutions need to happen before
305 # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
306 # and <img> tags get encoded.
310 # Clear the global hashes. If we don't clear these, you get conflicts
311 # from other articles when generating a page which contains more than
312 # one article (e.g. an index page that shows the N most recent
320 @g_used_footnotes = ();
321 @g_used_references = ();
324 # Standardize line endings:
325 $text =~ s{\r\n}{\n}g; # DOS to Unix
326 $text =~ s{\r}{\n}g; # Mac to Unix
328 # Make sure $text ends with a couple of newlines:
331 # Convert all tabs to spaces.
332 $text = _Detab($text);
334 # Strip any lines consisting only of spaces and tabs.
335 # This makes subsequent regexen easier to write, because we can
336 # match consecutive blank lines with /\n+/ instead of something
337 # contorted like /[ \t]*\n+/ .
338 $text =~ s/^[ \t]+$//mg;
340 # Strip leading blank lines
344 $text = _ParseMetaData($text) if $g_use_metadata;
346 # And recheck for leading blank lines
349 # Turn block-level HTML blocks into hash entries
350 $text = _HashHTMLBlocks($text);
352 # Strip footnote and link definitions, store in hashes.
353 $text = _StripFootnoteDefinitions($text);
355 $text = _StripLinkDefinitions($text);
357 _GenerateImageCrossRefs($text);
359 $text = _StripMarkdownReferences($text);
361 $text = _RunBlockGamut($text);
363 $text = _DoMarkdownCitations($text);
365 $text = _DoFootnotes($text);
367 $text = _UnescapeSpecialChars($text);
369 # Clean encoding within HTML comments
370 $text = _UnescapeComments($text);
372 # This must follow _UnescapeSpecialChars
373 $text = _UnescapeWikiWords($text);
375 $text = _FixFootnoteParagraphs($text);
376 $text .= _PrintFootnotes();
378 $text .= _PrintMarkdownBibliography();
380 $text = _ConvertCopyright($text);
382 if (lc($g_document_format) =~ /^complete\s*$/) {
383 return xhtmlMetaData() . "<body>\n\n" . $text . "\n</body>\n</html>";
385 return textMetaData() . $text . "\n";
391 sub _StripLinkDefinitions {
393 # Strips link definitions from text, stores the URLs and titles in
397 my $less_than_tab = $g_tab_width - 1;
399 # Link defs are in the form: ^[id]: url "optional title"
401 # Pattern altered for MultiMarkdown
402 # in order to not match citations or footnotes
403 ^[ ]{0,$less_than_tab}\[([^#^].*)\]: # id = $1
405 \n? # maybe *one* newline
407 <?(\S+?)>? # url = $2
409 \n? # maybe one newline
412 (?<=\s) # lookbehind for whitespace
417 )? # title is optional
419 # MultiMarkdown addition for attribute support
422 (?<=\s) # lookbehind for whitespace
423 (([ \t]*\n)?[ \t]*((\S+=\S+)|(\S+=".*?")))*
430 $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive
432 $g_titles{lc $1} = $3;
433 $g_titles{lc $1} =~ s/"/"/g;
436 # MultiMarkdown addition "
438 $g_attributes{lc $1} = $4;
447 sub _HashHTMLBlocks {
449 my $less_than_tab = $g_tab_width - 1;
451 # Hashify HTML blocks:
452 # We only want to do this for block-level HTML tags, such as headers,
453 # lists, and tables. That's because we still want to wrap <p>s around
454 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
455 # phrase emphasis, and spans. The list of tags we're looking for is
459 p | div | h[1-6] | blockquote | pre | table |
460 dl | ol | ul | script | noscript | form |
461 fieldset | iframe | ins | del
463 }x; # MultiMarkdown does not include `math` in the above list so that
464 # Equations can optionally be included in separate paragraphs
467 (?: # Match one attr name/value pair
468 \s+ # There needs to be at least some whitespace
469 # before each attribute name.
470 [\w.:_-]+ # Attribute name
473 ".+?" # "Attribute value"
475 '.+?' # 'Attribute value'
480 my $empty_tag = qr{< \w+ $tag_attrs \s* />}xms;
481 my $open_tag = qr{< $block_tags $tag_attrs \s* >}xms;
482 my $close_tag = undef; # let Text::Balanced handle this
484 use Text::Balanced qw(gen_extract_tagged);
485 my $extract_block = gen_extract_tagged($open_tag, $close_tag, undef, { ignore => [$empty_tag] });
488 ## TO-DO: the 0,3 on the next line ought to respect the
489 ## tabwidth, or else, we should mandate 4-space tabwidth and
491 while ($text =~ s{^(([ ]{0,3}<)?.*\n)}{}m) {
494 # current line could be start of code block
496 my ($tag, $remainder) = $extract_block->($cur_line . $text);
498 my $key = md5_hex($tag);
499 $g_html_blocks{$key} = $tag;
500 push @chunks, "\n\n" . $key . "\n\n";
504 # No tag match, so toss $cur_line into @chunks
505 push @chunks, $cur_line;
509 # current line could NOT be start of code block
510 push @chunks, $cur_line;
514 push @chunks, $text; # Whatever is left.
516 $text = join '', @chunks;
520 # Special case just for <hr />. It was easier to make a special case than
521 # to make the other regex more complicated.
524 (?<=\n\n) # Starting after a blank line
526 \A\n? # the beginning of the doc
529 [ ]{0,$less_than_tab}
530 <(hr) # start tag = $2
533 /?> # the matching end tag
535 (?=\n{2,}|\Z) # followed by a blank line or end of document
538 my $key = md5_hex($1);
539 $g_html_blocks{$key} = $1;
540 "\n\n" . $key . "\n\n";
543 # Special case for standalone HTML comments:
546 (?<=\n\n) # Starting after a blank line
548 \A\n? # the beginning of the doc
551 [ ]{0,$less_than_tab}
558 (?=\n{2,}|\Z) # followed by a blank line or end of document
561 my $key = md5_hex($1);
562 $g_html_blocks{$key} = $1;
563 "\n\n" . $key . "\n\n";
566 # PHP and ASP-style processor instructions (<?…?> and <%…%>)
569 (?<=\n\n) # Starting after a blank line
571 \A\n? # the beginning of the doc
574 [ ]{0,$less_than_tab}
581 (?=\n{2,}|\Z) # followed by a blank line or end of document
584 my $key = md5_hex($1);
585 $g_html_blocks{$key} = $1;
586 "\n\n" . $key . "\n\n";
596 # These are all the transformations that form block-level
597 # tags like paragraphs, headers, and list items.
601 $text = _DoHeaders($text);
603 # Do tables first to populate the table id's for cross-refs
604 # Escape <pre><code> so we don't get greedy with tables
605 $text = _DoTables($text);
607 # And now, protect our tables
608 $text = _HashHTMLBlocks($text);
610 # Do Horizontal Rules:
611 $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
612 $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
613 $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
615 $text = _DoDefinitionLists($text);
616 $text = _DoLists($text);
617 $text = _DoCodeBlocks($text);
618 $text = _DoBlockQuotes($text);
620 # We already ran _HashHTMLBlocks() before, in Markdown(), but that
621 # was to escape raw HTML in the original Markdown source. This time,
622 # we're escaping the markup we've just created, so that we don't wrap
623 # <p> tags around block-level tags.
624 $text = _HashHTMLBlocks($text);
625 $text = _FormParagraphs($text);
633 # These are all the transformations that occur *within* block-level
634 # tags like paragraphs, headers, and list items.
638 $text = _DoCodeSpans($text);
639 $text = _DoMathSpans($text);
640 $text = _EscapeSpecialCharsWithinTagAttributes($text);
641 $text = _EncodeBackslashEscapes($text);
643 # Process anchor and image tags. Images must come first,
644 # because ![foo][f] looks like an anchor.
645 $text = _DoImages($text);
646 $text = _DoAnchors($text);
649 if ($g_use_wiki_links && !$g_temp_no_wikiwords && !$g_wikilinks_kill_switch) {
650 $text = _DoWikiLinks($text);
652 # And then reprocess anchors and images
653 $text = _DoImages($text);
654 $text = _DoAnchors($text);
658 # Make links out of things like `<http://example.com/>`
659 # Must come after _DoAnchors(), because you can use < and >
660 # delimiters in inline links like [this](<url>).
661 $text = _DoAutoLinks($text);
662 $text = _EncodeAmpsAndAngles($text);
663 $text = _DoItalicsAndBold($text);
666 $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
672 sub _EscapeSpecialCharsWithinTagAttributes {
674 # Within tags -- meaning between < and > -- encode [\ ` * _] so they
675 # don't conflict with their use in Markdown for code, italics and strong.
676 # We're replacing each such character with its corresponding MD5 checksum
677 # value; this is likely overkill, but it should prevent us from colliding
678 # with the escape values by accident.
681 my $tokens ||= _TokenizeHTML($text);
682 $text = ''; # rebuild $text from the tokens
684 foreach my $cur_token (@$tokens) {
685 if ($cur_token->[0] eq "tag") {
686 $cur_token->[1] =~ s! \\ !$g_escape_table{'\\'}!gx;
687 $cur_token->[1] =~ s{ (?<=.)</?code>(?=.) }{$g_escape_table{'`'}}gx;
688 $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx;
689 $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx;
691 $text .= $cur_token->[1];
699 # Turn Markdown link shortcuts into XHTML <a> tags.
704 # First, handle reference-style links: [link text] [id]
707 ( # wrap whole match in $1
709 ($g_nested_brackets) # link text = $2
712 [ ]? # one optional space
713 (?:\n[ ]*)? # one optional newline followed by spaces
721 my $whole_match = $1;
725 if ($link_id eq "") {
726 $link_id = lc $link_text; # for shortcut links like [this][].
729 # Allow automatic cross-references to headers
730 my $label = Header2Label($link_id);
731 if (defined $g_urls{$link_id}) {
732 my $url = $g_urls{$link_id};
733 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
734 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
735 $result = "<a href=\"$url\"";
736 if ( defined $g_titles{$link_id} ) {
737 my $title = $g_titles{$link_id};
738 $title =~ s! \* !$g_escape_table{'*'}!gx;
739 $title =~ s! _ !$g_escape_table{'_'}!gx;
740 $result .= " title=\"$title\"";
742 $result .= _DoAttributes($label);
743 $result .= ">$link_text</a>";
744 } elsif (defined $g_crossrefs{$label}) {
745 my $url = $g_crossrefs{$label};
746 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
747 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
748 $result = "<a href=\"$url\"";
749 if ( defined $g_titles{$label} ) {
750 my $title = $g_titles{$label};
751 $title =~ s! \* !$g_escape_table{'*'}!gx;
752 $title =~ s! _ !$g_escape_table{'_'}!gx;
753 $result .= " title=\"$title\"";
755 $result .= _DoAttributes($label);
756 $result .= ">$link_text</a>";
758 $result = $whole_match;
764 # Next, inline-style links: [link text](url "optional title")
767 ( # wrap whole match in $1
769 ($g_nested_brackets) # link text = $2
773 ($g_nested_parens) # href = $3
776 (['"]) # quote char = $5
779 [ \t]* # ignore any spaces/tabs between closing quote and )
780 )? # title is optional
785 my $whole_match = $1;
790 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
791 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
792 $url =~ s{^<(.*)>$}{$1}; # Remove <>'s surrounding URL, if present
793 $result = "<a href=\"$url\"";
795 if (defined $title) {
796 $title =~ s/"/"/g;
797 $title =~ s! \* !$g_escape_table{'*'}!gx;
798 $title =~ s! _ !$g_escape_table{'_'}!gx;
799 $result .= " title=\"$title\"";
801 $result .= ">$link_text</a>";
807 # Last, handle reference-style shortcuts: [link text]
808 # These must come last in case you've also got [link test][1]
809 # or [link test](/foo)
812 ( # wrap whole match in $1
814 ([^\[\]]+) # link text = $2; can't contain '[' or ']'
819 my $whole_match = $1;
821 (my $link_id = lc $2) =~ s{[ ]?\n}{ }g; # lower-case and turn embedded newlines into spaces
823 # Allow automatic cross-references to headers
824 my $label = Header2Label($link_id);
825 if (defined $g_urls{$link_id}) {
826 my $url = $g_urls{$link_id};
827 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
828 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
829 $result = "<a href=\"$url\"";
830 if ( defined $g_titles{$link_id} ) {
831 my $title = $g_titles{$link_id};
832 $title =~ s! \* !$g_escape_table{'*'}!gx;
833 $title =~ s! _ !$g_escape_table{'_'}!gx;
834 $result .= " title=\"$title\"";
836 $result .= _DoAttributes($link_id);
837 $result .= ">$link_text</a>";
838 } elsif (defined $g_crossrefs{$label}) {
839 my $url = $g_crossrefs{$label};
840 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
841 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
842 $result = "<a href=\"$url\"";
843 if ( defined $g_titles{$label} ) {
844 my $title = $g_titles{$label};
845 $title =~ s! \* !$g_escape_table{'*'}!gx;
846 $title =~ s! _ !$g_escape_table{'_'}!gx;
847 $result .= " title=\"$title\"";
849 $result .= _DoAttributes($label);
850 $result .= ">$link_text</a>";
852 $result = $whole_match;
863 # Turn Markdown image shortcuts into <img> tags.
868 # First, handle reference-style labeled images: ![alt text][id]
871 ( # wrap whole match in $1
873 (.*?) # alt text = $2
876 [ ]? # one optional space
877 (?:\n[ ]*)? # one optional newline followed by spaces
886 my $whole_match = $1;
890 if ($link_id eq "") {
891 $link_id = lc $alt_text; # for shortcut links like ![this][].
894 $alt_text =~ s/"/"/g;
895 if (defined $g_urls{$link_id}) {
896 my $url = $g_urls{$link_id};
897 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
898 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
900 my $label = Header2Label($alt_text);
901 $g_crossrefs{$label} = "#$label";
902 if (! defined $g_titles{$link_id}) {
903 $g_titles{$link_id} = $alt_text;
906 $result = "<img id=\"$label\" src=\"$url\" alt=\"$alt_text\"";
907 if (defined $g_titles{$link_id}) {
908 my $title = $g_titles{$link_id};
909 $title =~ s! \* !$g_escape_table{'*'}!gx;
910 $title =~ s! _ !$g_escape_table{'_'}!gx;
911 $result .= " title=\"$title\"";
913 $result .= _DoAttributes($link_id);
914 $result .= $g_empty_element_suffix;
917 # If there's no such link ID, leave intact:
918 $result = $whole_match;
925 # Next, handle inline images: ![alt text](url "optional title")
926 # Don't forget: encode * and _
929 ( # wrap whole match in $1
931 (.*?) # alt text = $2
933 \s? # One optional whitespace character
936 ($g_nested_parens) # href = $3
939 (['"]) # quote char = $5
943 )? # title is optional
948 my $whole_match = $1;
951 my $title = (defined $6) ? $6 : '';
953 $alt_text =~ s/"/"/g;
954 $title =~ s/"/"/g;
955 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
956 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
957 $url =~ s{^<(.*)>$}{$1}; # Remove <>'s surrounding URL, if present
959 my $label = Header2Label($alt_text);
960 $g_crossrefs{$label} = "#$label";
961 # $g_titles{$label} = $alt_text; # I think this line should not be here
963 $result = "<img id=\"$label\" src=\"$url\" alt=\"$alt_text\"";
964 if (defined $title) {
965 $title =~ s! \* !$g_escape_table{'*'}!gx;
966 $title =~ s! _ !$g_escape_table{'_'}!gx;
967 $result .= " title=\"$title\"";
969 $result .= $g_empty_element_suffix;
984 # Don't do Wiki Links in Headers
985 $g_temp_no_wikiwords = 1;
987 # Setext-style headers:
994 $text =~ s{ ^(.+?)(?:\s\[([^\[]*?)\])?[ \t]*\n=+[ \t]*\n+ }{
996 $label = Header2Label($2);
998 $label = Header2Label($1);
1000 $header = _RunSpanGamut($1);
1001 $header =~ s/^\s*//s;
1004 $g_crossrefs{$label} = "#$label";
1005 $g_titles{$label} = $header;
1006 $idString = " id=\"$label\"";
1011 "<h1$idString>" . $header . "</h1>\n\n";
1014 $text =~ s{ ^(.+?)(?:\s*\[([^\[]*?)\])?[ \t]*\n-+[ \t]*\n+ }{
1016 $label = Header2Label($2);
1018 $label = Header2Label($1);
1020 $header = _RunSpanGamut($1);
1021 $header =~ s/^\s*//s;
1024 $g_crossrefs{$label} = "#$label";
1025 $g_titles{$label} = $header;
1026 $idString = " id=\"$label\"";
1031 "<h2$idString>" . $header . "</h2>\n\n";
1035 # atx-style headers:
1038 # ## Header 2 with closing hashes ##
1043 ^(\#{1,6}) # $1 = string of #'s
1045 (.+?) # $2 = Header text
1047 (?:\[([^\[]*?)\])? # $3 = optional label for cross-reference
1049 \#* # optional closing #'s (not counted)
1052 my $h_level = length($1) + $g_base_header_level - 1;
1054 $label = Header2Label($3);
1056 $label = Header2Label($2);
1058 $header = _RunSpanGamut($2);
1059 $header =~ s/^\s*//s;
1062 $g_crossrefs{$label} = "#$label";
1063 $g_titles{$label} = $header;
1064 $idString = " id=\"$label\"";
1069 "<h$h_level$idString>" . $header . "</h$h_level>\n\n";
1072 # Can now process Wiki Links again
1073 $g_temp_no_wikiwords = 0;
1081 # Form HTML ordered (numbered) and unordered (bulleted) lists.
1084 my $less_than_tab = $g_tab_width - 1;
1086 # Re-usable patterns to match list item bullets and number markers:
1087 my $marker_ul = qr/[*+-]/;
1088 my $marker_ol = qr/\d+[.]/;
1089 my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
1091 # Re-usable pattern to match any entirel ul or ol list:
1092 my $whole_list = qr{
1095 [ ]{0,$less_than_tab}
1096 (${marker_any}) # $3 = first list item marker
1105 (?! # Negative lookahead for another list item marker
1113 # We use a different prefix before nested lists than top-level lists.
1114 # See extended comment in _ProcessListItems().
1116 # Note: There's a bit of duplication here. My original implementation
1117 # created a scalar regex pattern as the conditional result of the test on
1118 # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
1119 # substitution once, using the scalar as the pattern. This worked,
1120 # everywhere except when running under MT on my hosting account at Pair
1121 # Networks. There, this caused all rebuilds to be killed by the reaper (or
1122 # perhaps they crashed, but that seems incredibly unlikely given that the
1123 # same script on the same server ran fine *except* under MT. I've spent
1124 # more time trying to figure out why this is happening than I'd like to
1125 # admit. My only guess, backed up by the fact that this workaround works,
1126 # is that Perl optimizes the substition when it can figure out that the
1127 # pattern will never change, and when this optimization isn't on, we run
1128 # afoul of the reaper. Thus, the slightly redundant code that uses two
1129 # static s/// patterns rather than one conditional pattern.
1131 if ($g_list_level) {
1137 my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
1139 # Turn double returns into triple returns, so that we can make a
1140 # paragraph for the last item in a list, if necessary:
1141 $list =~ s/\n{2,}/\n\n\n/g;
1142 my $result = _ProcessListItems($list, $marker_any);
1144 # Trim any trailing whitespace, to put the closing `</$list_type>`
1145 # up on the preceding line, to get it past the current stupid
1146 # HTML block parser. This is a hack to work around the terrible
1147 # hack that is the HTML block parser.
1148 $result =~ s{\s+$}{};
1149 $result = "<$list_type>" . $result . "</$list_type>\n";
1159 my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
1160 # Turn double returns into triple returns, so that we can make a
1161 # paragraph for the last item in a list, if necessary:
1162 $list =~ s/\n{2,}/\n\n\n/g;
1163 my $result = _ProcessListItems($list, $marker_any);
1164 $result = "<$list_type>\n" . $result . "</$list_type>\n";
1174 sub _ProcessListItems {
1176 # Process the contents of a single ordered or unordered list, splitting it
1177 # into individual list items.
1180 my $list_str = shift;
1181 my $marker_any = shift;
1184 # The $g_list_level global keeps track of when we're inside a list.
1185 # Each time we enter a list, we increment it; when we leave a list,
1186 # we decrement. If it's zero, we're not in a list anymore.
1188 # We do this because when we're not inside a list, we want to treat
1189 # something like this:
1191 # I recommend upgrading to version
1192 # 8. Oops, now this line is treated
1195 # As a single paragraph, despite the fact that the second line starts
1196 # with a digit-period-space sequence.
1198 # Whereas when we're inside a list (or sub-list), that line will be
1199 # treated as the start of a sub-list. What a kludge, huh? This is
1200 # an aspect of Markdown's syntax that's hard to parse perfectly
1201 # without resorting to mind-reading. Perhaps the solution is to
1202 # change the syntax rules such that sub-lists must start with a
1203 # starting cardinal number; e.g. "1." or "a.".
1207 # trim trailing blank lines:
1208 $list_str =~ s/\n{2,}\z/\n/;
1212 (\n)? # leading line = $1
1213 (^[ \t]*) # leading whitespace = $2
1214 ($marker_any) [ \t]+ # list marker = $3
1215 ((?s:.+?) # list item text = $4
1217 (?= \n* (\z | \2 ($marker_any) [ \t]+))
1220 my $leading_line = $1;
1221 my $leading_space = $2;
1223 if ($leading_line or ($item =~ m/\n{2,}/)) {
1224 $item = _RunBlockGamut(_Outdent($item));
1227 # Recursion for sub-lists:
1228 $item = _DoLists(_Outdent($item));
1230 $item = _RunSpanGamut($item);
1233 "<li>" . $item . "</li>\n";
1244 # Process Markdown `<pre><code>` blocks.
1251 ( # $1 = the code block -- one or more lines, starting with a space/tab
1253 (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces
1257 ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1260 my $result; # return value
1262 $codeblock = _EncodeCode(_Outdent($codeblock));
1263 $codeblock = _Detab($codeblock);
1264 $codeblock =~ s/\A\n+//; # trim leading newlines
1265 $codeblock =~ s/\n+\z//; # trim trailing newlines
1267 $result = "\n\n<pre><code>" . $codeblock . "</code></pre>\n\n"; # CHANGED: Removed newline for MMD
1278 # * Backtick quotes are used for <code></code> spans.
1280 # * You can use multiple backticks as the delimiters if you want to
1281 # include literal backticks in the code span. So, this input:
1283 # Just type ``foo `bar` baz`` at the prompt.
1285 # Will translate to:
1287 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1289 # There's no arbitrary limit to the number of backticks you
1290 # can use as delimters. If you need three consecutive backticks
1291 # in your code, use four for delimiters, etc.
1293 # * You can use spaces to get literal backticks at the edges:
1295 # ... type `` `bar` `` ...
1299 # ... type <code>`bar`</code> ...
1305 (?<!\\) # Character before opening ` can't be a backslash
1306 (`+) # $1 = Opening run of `
1307 (.+?) # $2 = The code block
1309 \1 # Matching closer
1313 $c =~ s/^[ \t]*//g; # leading whitespace
1314 $c =~ s/[ \t]*$//g; # trailing whitespace
1315 $c = _EncodeCode($c);
1325 # Encode/escape certain characters inside Markdown code runs.
1326 # The point is that in code, these characters are literals,
1327 # and lose their special Markdown meanings.
1331 # Protect Wiki Links in Code Blocks
1332 if (!$g_wikilinks_kill_switch) {
1333 my $WikiWord = qr'[A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*';
1334 s/(\A\\?|\s\\?)($WikiWord)/$1\\$2/gx;
1337 # Encode all ampersands; HTML entities are not
1338 # entities within a Markdown code span.
1341 # Encode $'s, but only if we're running under Blosxom.
1342 # (Blosxom interpolates Perl variables in article bodies.)
1345 if (defined($blosxom::version)) {
1351 # Do the angle bracket song and dance:
1355 # Now, escape characters that are magic in Markdown:
1356 s! \* !$g_escape_table{'*'}!gx;
1357 s! _ !$g_escape_table{'_'}!gx;
1358 s! { !$g_escape_table{'{'}!gx;
1359 s! } !$g_escape_table{'}'}!gx;
1360 s! \[ !$g_escape_table{'['}!gx;
1361 s! \] !$g_escape_table{']'}!gx;
1362 s! \\ !$g_escape_table{'\\'}!gx;
1368 sub _DoItalicsAndBold {
1371 # Cave in - `*` and `_` behave differently... We'll see how it works out
1374 # <strong> must go first:
1375 $text =~ s{ (?<!\w) (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
1376 {<strong>$2</strong>}gsx;
1378 $text =~ s{ (?<!\w) (\*|_) (?=\S) (.+?) (?<=\S) \1 }
1381 # And now, a second pass to catch nested strong and emphasis special cases
1382 $text =~ s{ (?<!\w) (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
1383 {<strong>$2</strong>}gsx;
1385 $text =~ s{ (?<!\w) (\*|_) (?=\S) (.+?) (?<=\S) \1 }
1388 # And now, allow `*` in the middle of words
1390 # <strong> must go first:
1391 $text =~ s{ (\*\*) (?=\S) (.+?[*]*) (?<=\S) \1 }
1392 {<strong>$2</strong>}gsx;
1394 $text =~ s{ (\*) (?=\S) (.+?) (?<=\S) \1 }
1401 sub _DoBlockQuotes {
1405 ( # Wrap whole match in $1
1407 ^[ \t]*>[ \t]? # '>' at the start of a line
1408 .+\n # rest of the first line
1409 (.+\n)* # subsequent consecutive lines
1415 $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting
1416 $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines
1417 $bq = _RunBlockGamut($bq); # recurse
1420 # These leading spaces screw with <pre> content, so we need to fix that:
1429 "<blockquote>\n$bq\n</blockquote>\n\n";
1437 sub _FormParagraphs {
1440 # $text - string to process with html <p> tags
1444 # Strip leading and trailing lines:
1448 my @grafs = split(/\n{2,}/, $text);
1454 unless (defined( $g_html_blocks{$_} )) {
1455 $_ = _RunSpanGamut($_);
1462 # Unhashify HTML blocks
1464 # foreach my $graf (@grafs) {
1465 # my $block = $g_html_blocks{$graf};
1466 # if (defined $block) {
1471 foreach my $graf (@grafs) {
1472 # Modify elements of @grafs in-place...
1473 my $block = $g_html_blocks{$graf};
1474 if (defined $block) {
1482 markdown\s*=\s* (['"]) # $2 = attr quote char
1491 (</div>) # $4 = closing tag
1496 my ($div_open, $div_content, $div_close) = ($1, $3, $4);
1498 # We can't call Markdown(), because that resets the hash;
1499 # that initialization code should be pulled into its own sub, though.
1500 $div_content = _HashHTMLBlocks($div_content);
1501 $div_content = _StripLinkDefinitions($div_content);
1502 $div_content = _RunBlockGamut($div_content);
1503 $div_content = _UnescapeSpecialChars($div_content);
1505 $div_open =~ s{\smarkdown\s*=\s*(['"]).+?\1}{}ms;
1507 $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1513 return join "\n\n", @grafs;
1517 sub _EncodeAmpsAndAngles {
1518 # Smart processing for ampersands and angle brackets that need to be encoded.
1522 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1523 # http://bumppo.net/projects/amputator/
1524 $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g;
1527 $text =~ s{<(?![a-z/?\$!])}{<}gi;
1533 sub _EncodeBackslashEscapes {
1535 # Parameter: String.
1536 # Returns: The string, with after processing the following backslash
1541 s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first.
1542 s! \\` !$g_escape_table{'`'}!gx;
1543 s! \\\* !$g_escape_table{'*'}!gx;
1544 s! \\_ !$g_escape_table{'_'}!gx;
1545 s! \\\{ !$g_escape_table{'{'}!gx;
1546 s! \\\} !$g_escape_table{'}'}!gx;
1547 s! \\\[ !$g_escape_table{'['}!gx;
1548 s! \\\] !$g_escape_table{']'}!gx;
1549 s! \\\( !$g_escape_table{'('}!gx;
1550 s! \\\) !$g_escape_table{')'}!gx;
1551 s! \\> !$g_escape_table{'>'}!gx;
1552 s! \\\# !$g_escape_table{'#'}!gx;
1553 s! \\\+ !$g_escape_table{'+'}!gx;
1554 s! \\\- !$g_escape_table{'-'}!gx;
1555 s! \\\. !$g_escape_table{'.'}!gx;
1556 s{ \\! }{$g_escape_table{'!'}}gx;
1565 $text =~ s{<((https?|ftp|dict):[^'">\s]+)>}{<a href="$1">$1</a>}gi;
1567 # Email addresses: <address@domain.foo>
1574 [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1578 _EncodeEmailAddress( _UnescapeSpecialChars($1) );
1585 sub _EncodeEmailAddress {
1587 # Input: an email address, e.g. "foo@example.com"
1589 # Output: the email address as a mailto link, with each character
1590 # of the address encoded as either a decimal or hex entity, in
1591 # the hopes of foiling most address harvesting spam bots. E.g.:
1593 # <a href="mailto:foo@e
1594 # xample.com">foo
1595 # @example.com</a>
1597 # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1598 # mailing list: <http://tinyurl.com/yu7ue>
1605 sub { '&#' . ord(shift) . ';' },
1606 sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
1610 $addr = "mailto:" . $addr;
1614 if ( $char eq '@' ) {
1615 # this *must* be encoded. I insist.
1616 $char = $encode[int rand 1]->($char);
1617 } elsif ( $char ne ':' ) {
1618 # leave ':' alone (to spot mailto: later)
1620 # roughly 10% raw, 45% hex, 45% dec
1622 $r > .9 ? $encode[2]->($char) :
1623 $r < .45 ? $encode[1]->($char) :
1630 $addr = qq{<a href="$addr">$addr</a>};
1631 $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
1637 sub _UnescapeSpecialChars {
1639 # Swap back in all the special characters we've hidden.
1643 while( my($char, $hash) = each(%g_escape_table) ) {
1644 $text =~ s/$hash/$char/g;
1652 # Parameter: String containing HTML markup.
1653 # Returns: Reference to an array of the tokens comprising the input
1654 # string. Each token is either a tag (possibly with nested,
1655 # tags contained therein, such as <a href="<MTFoo>">, or a
1656 # run of text between tags. Each element of the array is a
1657 # two-element array; the first is either 'tag' or 'text';
1658 # the second is the actual value.
1661 # Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
1662 # <http://www.bradchoate.com/past/mtregex.php>
1667 my $len = length $str;
1671 my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
1672 my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment
1673 (?s: <\? .*? \?> ) | # processing instruction
1674 $nested_tags/ix; # nested tags
1676 while ($str =~ m/($match)/g) {
1678 my $sec_start = pos $str;
1679 my $tag_start = $sec_start - length $whole_tag;
1680 if ($pos < $tag_start) {
1681 push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
1683 push @tokens, ['tag', $whole_tag];
1686 push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
1694 # Remove one level of line-leading tabs or spaces
1698 $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
1705 # Cribbed from a post by Bart Lateur:
1706 # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
1710 $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
1715 # MultiMarkdown Routines
1718 sub _ParseMetaData {
1720 my $clean_text = "";
1722 my ($inMetaData, $currentKey) = (1,'');
1724 foreach my $line ( split /\n/, $text ) {
1725 $line =~ /^$/ and $inMetaData = 0 and $clean_text .= $line and next;
1727 if ($line =~ /^([a-zA-Z0-9][0-9a-zA-Z _-]*?):\s*(.*)$/ ) {
1729 $currentKey =~ s/ / /g;
1730 $g_metadata{$currentKey} = $2;
1731 if (lc($currentKey) eq "format") {
1732 $g_document_format = lc($g_metadata{$currentKey});
1734 if (lc($currentKey) eq "base url") {
1735 $g_base_url = $g_metadata{$currentKey};
1737 if (lc($currentKey) eq "use wikilinks") {
1738 if (lc($g_metadata{$currentKey}) eq "true" ||
1739 $g_metadata{$currentKey} eq "1") {
1740 $g_use_wiki_links = 1;
1743 if (lc($currentKey) eq "bibliography title") {
1744 $g_bibliography_title = $g_metadata{$currentKey};
1745 $g_bibliography_title =~ s/\s*$//;
1747 if (lc($currentKey) eq "base header level") {
1748 $g_base_header_level = $g_metadata{$currentKey};
1750 if (!$g_metadata_newline{$currentKey}) {
1751 $g_metadata_newline{$currentKey} = $g_metadata_newline{default};
1754 if ($currentKey eq "") {
1755 # No metadata present
1756 $clean_text .= "$line\n";
1760 if ($line =~ /^\s*(.+)$/ ) {
1761 $g_metadata{$currentKey} .= "$g_metadata_newline{$currentKey}$1";
1765 $clean_text .= "$line\n";
1772 sub _StripFootnoteDefinitions {
1774 my $less_than_tab = $g_tab_width - 1;
1777 \n\[\^([^\n]+?)\]\:[ \t]*# id = $1
1779 (.*?)\n{1,2} # end at new paragraph
1780 ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1785 my $footnote = "$2\n";
1786 $footnote =~ s/^[ ]{0,$g_tab_width}//gm;
1788 $g_footnotes{id2footnote($id)} = $footnote;
1797 # First, run routines that get skipped in footnotes
1798 foreach my $label (sort keys %g_footnotes) {
1799 my $footnote = _RunBlockGamut($g_footnotes{$label});
1801 $footnote = _DoMarkdownCitations($footnote);
1802 $g_footnotes{$label} = $footnote;
1806 \[\^(.+?)\] # id = $1
1809 my $id = id2footnote($1);
1810 if (defined $g_footnotes{$id} ) {
1811 $g_footnote_counter++;
1812 if ($g_footnotes{$id} =~ /^glossary:/i) {
1813 $result = "<a href=\"#fn:$id\" id=\"fnref:$id\" class=\"footnote glossary\">$g_footnote_counter</a>";
1815 $result = "<a href=\"#fn:$id\" id=\"fnref:$id\" class=\"footnote\">$g_footnote_counter</a>";
1817 push (@g_used_footnotes,$id);
1825 sub _FixFootnoteParagraphs {
1828 $text =~ s/^\<p\>\<\/footnote\>/<\/footnote>/gm;
1833 sub _PrintFootnotes{
1834 my $footnote_counter = 0;
1837 foreach my $id (@g_used_footnotes) {
1838 $footnote_counter++;
1839 my $footnote = $g_footnotes{$id};
1840 my $footnote_closing_tag = "";
1842 $footnote =~ s/(\<\/(p(re)?|ol|ul)\>)$//;
1843 $footnote_closing_tag = $1;
1845 if ($footnote =~ s/^glossary:\s*//i) {
1846 # Add some formatting for glossary entries
1851 (?:\(([^\(\)]*)\)[^\n]*)? # $2 = optional sort key
1854 my $glossary = "<span class=\"glossary name\">$1</span>";
1857 $glossary.="<span class=\"glossary sort\" style=\"display:none\">$2</span>";
1863 $result.="<li id=\"fn:$id\">$footnote<a href=\"#fnref:$id\" class=\"reversefootnote\"> ↩</a>$footnote_closing_tag</li>\n\n";
1865 $result.="<li id=\"fn:$id\">$footnote<a href=\"#fnref:$id\" class=\"reversefootnote\"> ↩</a>$footnote_closing_tag</li>\n\n";
1868 $result .= "</ol>\n</div>";
1870 if ($footnote_counter > 0) {
1871 $result = "\n\n<div class=\"footnotes\">\n<hr$g_empty_element_suffix\n<ol>\n\n".$result;
1876 $result= _UnescapeSpecialChars($result);
1882 my $label = lc $header;
1883 $label =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters
1884 while ($label =~ s/^[^A-Za-z]//g)
1885 {}; # Strip illegal leading characters
1890 # Since we prepend "fn:", we can allow leading digits in footnotes
1892 my $footnote = lc $id;
1893 $footnote =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters
1899 my $result = qq{<?xml version="1.0" encoding="UTF-8" ?>\n};
1901 # This screws up xsltproc - make sure to use `-nonet -novalid` if you
1903 if ($g_allow_mathml) {
1904 $result .= qq{<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"\n\t"http://www.w3.org/TR/MathML2/dtd/xhtml-math11-f.dtd">
1907 $result.= qq{<html xmlns="http://www.w3.org/1999/xhtml">\n\t<head>\n};
1909 $result .= qq{<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n};
1911 $result.= qq!<html xmlns="http://www.w3.org/1999/xhtml">\n\t<head>\n!;
1914 $result.= "\t\t<!-- Processed by MultiMarkdown -->\n";
1916 foreach my $key (sort keys %g_metadata ) {
1917 # Strip trailing spaces
1918 $g_metadata{$key} =~ s/(\s)*$//s;
1920 # Strip spaces from key
1921 my $export_key = $key;
1922 $export_key =~ s/\s//g;
1924 if (lc($key) eq "title") {
1925 $result.= "\t\t<title>" . _EncodeAmpsAndAngles($g_metadata{$key}) . "</title>\n";
1926 } elsif (lc($key) eq "css") {
1927 $result.= "\t\t<link type=\"text/css\" rel=\"stylesheet\" href=\"$g_metadata{$key}\"$g_empty_element_suffix\n";
1928 } elsif (lc($export_key) eq "xhtmlheader") {
1929 $result .= "\t\t$g_metadata{$key}\n";
1931 $result.= qq!\t\t<meta name="$export_key" content="$g_metadata{$key}"$g_empty_element_suffix\n!;
1934 $result.= "\t</head>\n";
1942 foreach my $key (sort keys %g_metadata ) {
1943 $result .= "$key: $g_metadata{$key}\n";
1945 $result =~ s/\s*\n/<br \/>\n/g;
1947 if ($result ne "") {
1954 sub _ConvertCopyright{
1956 # Convert to an XML compatible form of copyright symbol
1958 $text =~ s/©/©/gi;
1963 sub _CreateWikiLink {
1974 return "[$title]($g_base_url$id)";
1979 my $WikiWord = '[A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*';
1980 my $FreeLinkPattern = "([-,.()' _0-9A-Za-z\x80-\xff]+)";
1982 if ($g_wikilinks_kill_switch) {
1986 if ($g_use_wiki_links) {
1989 \[\[($FreeLinkPattern)\]\]
1998 _CreateWikiLink($label)
2003 if ($g_use_wiki_links) {
2007 $1 . _CreateWikiLink($2)
2010 # Catch WikiWords at beginning of text
2011 $text =~ s{^($WikiWord)
2021 sub _UnescapeWikiWords {
2023 my $WikiWord = '[A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*';
2025 if ($g_wikilinks_kill_switch) {
2029 # Unescape escaped WikiWords
2030 # This should occur whether wikilinks are enabled or not
2031 $text =~ s/(?<=\B)\\($WikiWord)/$1/g;
2039 my $less_than_tab = $g_tab_width - 1;
2041 # Algorithm inspired by PHP Markdown Extra's table support
2042 # <http://www.michelf.com/projects/php-markdown/>
2044 # Reusable regexp's to match table
2046 my $line_start = qr{
2047 [ ]{0,$less_than_tab}
2059 my $table_rows = qr{
2063 my $table_caption = qr{
2068 my $table_divider = qr{
2070 [\|\-\+\:\.][ \-\+\|\:\.]* \| [ \-\+\|\:\.]*
2073 my $whole_table = qr{
2074 ($table_caption)? # Optional caption
2075 ($first_row # First line must start at beginning
2076 ($table_row)*?)? # Header Rows
2077 $table_divider # Divider/Alignment definitions
2078 $table_rows+ # Body Rows
2079 ($table_caption)? # Optional caption
2083 # Find whole tables, then break them up and process them
2086 ^($whole_table) # Whole table in $1
2087 (\n|\Z) # End of file or 2 blank lines
2091 # Clean extra spaces at end of lines -
2092 # they cause the processing to choke
2093 $table =~ s/[\t ]*\n/\n/gs;
2095 my $result = "<table>\n";
2097 my $use_row_header = 1;
2099 # Add Caption, if present
2101 if ($table =~ s/^$line_start\[\s*(.*?)\s*\](\[\s*(.*?)\s*\])?[ \t]*$//m) {
2104 # add caption id to cross-ref list
2105 $table_id = Header2Label($3);
2107 # use caption as the id
2108 $table_id = Header2Label($1);
2110 $result .= "<caption id=\"$table_id\">" . _RunSpanGamut($1). "</caption>\n";
2112 $g_crossrefs{$table_id} = "#$table_id";
2113 $g_titles{$table_id} = "$1";
2116 # If a second "caption" is present, treat it as a summary
2117 # However, this is not valid in XHTML 1.0 Strict
2118 # But maybe in future
2120 # A summary might be longer than one line
2121 if ($table =~ s/\n$line_start\[\s*(.*?)\s*\][ \t]*\n/\n/s) {
2122 # $result .= "<summary>" . _RunSpanGamut($1) . "</summary>\n";
2125 # Now, divide table into header, alignment, and body
2127 # First, add leading \n in case there is no header
2129 $table = "\n" . $table;
2133 $table =~ s/\n($table_divider)\n(($table_rows)+)//s;
2136 my $alignment_string = $1;
2141 # Process column alignment
2142 while ($alignment_string =~ /\|?\s*(.+?)\s*(\||\Z)/gs) {
2143 my $cell = _RunSpanGamut($1);
2145 $result .= "<col class=\"extended\"";
2149 if ($cell =~ /\:$/) {
2150 if ($cell =~ /^\:/) {
2151 $result .= " align=\"center\"$g_empty_element_suffix\n";
2152 push(@alignments,"center");
2154 $result .= " align=\"right\"$g_empty_element_suffix\n";
2155 push(@alignments,"right");
2158 if ($cell =~ /^\:/) {
2159 $result .= " align=\"left\"$g_empty_element_suffix\n";
2160 push(@alignments,"left");
2162 if (($cell =~ /^\./) || ($cell =~ /\.$/)) {
2163 $result .= " align=\"char\"$g_empty_element_suffix\n";
2164 push(@alignments,"char");
2166 $result .= "$g_empty_element_suffix\n";
2167 push(@alignments,"");
2174 $table =~ s/^\n+//s;
2176 $result .= "<thead>\n";
2179 $table =~ s/\n[ \t]*\n/\n/g;
2181 foreach my $line (split(/\n/, $table)) {
2182 # process each line (row) in table
2183 $result .= "<tr>\n";
2185 while ($line =~ /\|?\s*([^\|]+?)\s*(\|+|\Z)/gs) {
2186 # process contents of each cell
2187 my $cell = _RunSpanGamut($1);
2190 if ($ending =~ s/^\s*(\|{2,})\s*$/$1/) {
2191 $colspan = " colspan=\"" . length($ending) . "\"";
2193 $result .= "\t<th$colspan>$cell</th>\n";
2195 if ($cell =~ /^\s*$/) {
2196 $use_row_header = 1;
2198 $use_row_header = 0;
2203 $result .= "</tr>\n";
2208 $result .= "</thead>\n<tbody>\n";
2210 foreach my $line (split(/\n/, $body)) {
2211 # process each line (row) in table
2212 if ($line =~ /^\s*$/) {
2213 $result .= "</tbody>\n\n<tbody>\n";
2216 $result .= "<tr>\n";
2218 while ($line =~ /\|?\s*([^\|]+?)\s*(\|+|\Z)/gs) {
2219 # process contents of each cell
2220 my $cell = _RunSpanGamut($1);
2226 my $cell_type = "td";
2227 if ($count == 0 && $use_row_header == 1) {
2230 if ($ending =~ s/^\s*(\|{2,})\s*$/$1/) {
2231 $colspan = " colspan=\"" . length($ending) . "\"";
2233 if ($alignments[$count] !~ /^\s*$/) {
2234 $result .= "\t<$cell_type$colspan align=\"$alignments[$count]\">$cell</$cell_type>\n";
2236 $result .= "\t<$cell_type$colspan>$cell</$cell_type>\n";
2240 $result .= "</tr>\n";
2243 # Strip out empty <thead> sections
2244 $result =~ s/<thead>\s*<\/thead>\s*//s;
2246 # Handle pull-quotes
2248 # This might be too specific for my needs. If others want it
2249 # removed, I am open to discussion.
2251 $result =~ s/<table>\s*<col \/>\s*<tbody>/<table class="pull-quote">\n<col \/>\n<tbody>/s;
2253 $result .= "</tbody>\n</table>\n";
2257 my $table_body = qr{
2258 ( # wrap whole match in $2
2260 (.*?\|.*?)\n # wrap headers in $3
2262 [ ]{0,$less_than_tab}
2263 ($table_divider) # alignment in $4
2265 ( # wrap cells in $5
2279 if (defined $g_attributes{$id}) {
2280 my $attributes = $g_attributes{$id};
2281 while ($attributes =~ s/(\S+)="(.*?)"//) {
2282 $result .= " $1=\"$2\"";
2284 while ($attributes =~ /(\S+)=(\S+)/g) {
2285 $result .= " $1=\"$2\"";
2293 sub _StripMarkdownReferences {
2295 my $less_than_tab = $g_tab_width - 1;
2298 \n\[\#(.+?)\]:[ \t]* # id = $1
2300 (.*?)\n{1,2} # end at new paragraph
2301 ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
2306 my $reference = "$2\n";
2308 $reference =~ s/^[ ]{0,$g_tab_width}//gm;
2310 $reference = _RunBlockGamut($reference);
2312 # strip leading and trailing <p> tags (they will be added later)
2313 $reference =~ s/^\<p\>//s;
2314 $reference =~ s/\<\/p\>\s*$//s;
2316 $g_references{$id} = $reference;
2322 sub _DoMarkdownCitations {
2325 $text =~ s{ # Allow for citations without locator to be written
2326 \[\#([^\[]*?)\] # in usual manner, e.g. [#author][] rather than
2335 \[([^\[]*?)\] # citation text = $1
2336 [ ]? # one optional space
2337 (?:\n[ ]*)? # one optional newline followed by spaces
2338 \[\#(.*?)\] # id = $2
2341 my $anchor_text = $1;
2345 # implement equivalent to \citet
2346 my $textual_string = "";
2347 if ($anchor_text =~ s/^(.*?);\s*//) {
2348 $textual_string = "<span class=\"textual citation\">$1</span>";
2351 if (defined $g_references{$id} ) {
2352 my $citation_counter=0;
2354 # See if citation has been used before
2355 foreach my $old_id (@g_used_references) {
2356 $citation_counter++;
2357 $count = $citation_counter if ($old_id eq $id);
2360 if (! defined $count) {
2361 $g_citation_counter++;
2362 $count = $g_citation_counter;
2363 push (@g_used_references,$id);
2366 $result = "<span class=\"markdowncitation\">$textual_string (<a href=\"#$id\">$count</a>";
2368 if ($anchor_text ne "") {
2369 $result .=", <span class=\"locator\">$anchor_text</span>";
2372 $result .= ")</span>";
2374 # No reference exists
2375 $result = "<span class=\"externalcitation\">$textual_string (<a id=\"$id\">$id</a>";
2377 if ($anchor_text ne "") {
2378 $result .=", <span class=\"locator\">$anchor_text</span>";
2381 $result .= ")</span>";
2384 if (Header2Label($anchor_text) eq "notcited"){
2385 $result = "<span class=\"notcited\" id=\"$id\"/>";
2394 sub _PrintMarkdownBibliography{
2395 my $citation_counter = 0;
2398 foreach my $id (@g_used_references) {
2399 $citation_counter++;
2400 $result.="<div id=\"$id\"><p>[$citation_counter] <span class=\"item\">$g_references{$id}</span></p></div>\n\n";
2402 $result .= "</div>";
2404 if ($citation_counter > 0) {
2405 $result = "\n\n<div class=\"bibliography\">\n<hr$g_empty_element_suffix\n<p>$g_bibliography_title</p>\n\n".$result;
2413 sub _GenerateImageCrossRefs {
2417 # First, handle reference-style labeled images: ![alt text][id]
2420 ( # wrap whole match in $1
2422 (.*?) # alt text = $2
2425 [ ]? # one optional space
2426 (?:\n[ ]*)? # one optional newline followed by spaces
2435 my $whole_match = $1;
2437 my $link_id = lc $3;
2439 if ($link_id eq "") {
2440 $link_id = lc $alt_text; # for shortcut links like ![this][].
2443 $alt_text =~ s/"/"/g;
2444 if (defined $g_urls{$link_id}) {
2445 my $label = Header2Label($alt_text);
2446 $g_crossrefs{$label} = "#$label";
2449 # If there's no such link ID, leave intact:
2450 $result = $whole_match;
2457 # Next, handle inline images: ![alt text](url "optional title")
2458 # Don't forget: encode * and _
2461 ( # wrap whole match in $1
2463 (.*?) # alt text = $2
2467 <?(\S+?)>? # src url = $3
2470 (['"]) # quote char = $5 '
2474 )? # title is optional
2479 my $whole_match = $1;
2482 $alt_text =~ s/"/"/g;
2483 my $label = Header2Label($alt_text);
2484 $g_crossrefs{$label} = "#$label";
2491 sub _FindMathEquations{
2495 (\<math[^\>]*)id=\"(.*?)\"> # "
2497 my $label = Header2Label($2);
2498 my $header = _RunSpanGamut($2);
2500 $g_crossrefs{$label} = "#$label";
2501 $g_titles{$label} = $header;
2503 $1 . "id=\"$label\">";
2510 # Based on Gruber's _DoCodeSpans
2513 my $display_as_block = 0;
2514 $display_as_block = 1 if ($text =~ /^<<[^\>\>]*>>$/);
2517 (?<!\\) # Character before opening << can't be a backslash
2519 (.+?) # $2 = The code block
2520 (?:\[(.+)\])? # $3 = optional label
2525 my @attr = (xmlns=>"http://www.w3.org/1998/Math/MathML");
2528 $label = Header2Label($3);
2529 my $header = _RunSpanGamut($3);
2531 $g_crossrefs{$label} = "#$label";
2532 $g_titles{$label} = $header;
2534 $m =~ s/^[ \t]*//g; # leading whitespace
2535 $m =~ s/[ \t]*$//g; # trailing whitespace
2536 push(@attr,(id=>"$label")) if ($label ne "");
2537 push(@attr,(display=>"block")) if ($display_as_block == 1);
2539 $m = $mathParser->TextToMathML($m,\@attr);
2546 sub _DoDefinitionLists {
2547 # Uses the syntax proposed by Michel Fortin in PHP Markdown Extra
2550 my $less_than_tab = $g_tab_width -1;
2552 my $line_start = qr{
2553 [ ]{0,$less_than_tab}
2561 my $definition = qr{
2562 \n?[ ]{0,$less_than_tab}
2564 ((?=\n*[ ]{0,$less_than_tab}\S)|\n\n|\Z) # Lookahead for non-space at line-start,
2565 # two returns, or end of doc
2568 my $definition_block = qr{
2569 ((?:$term)+) # $1 = one or more terms
2570 ((?:$definition)+) # $2 = by one or more definitions
2573 my $definition_list = qr{
2574 (?:$definition_block\n*)+ # One ore more definition blocks
2578 ($definition_list) # $1 = the whole list
2584 (?:$definition_block)\n*
2590 [ ]{0,$less_than_tab}
2596 $term =~ s/^\s*(.*?)\s*$/$1/;
2597 if ($term !~ /^\s*$/){
2598 $result = "<dt>" . _RunSpanGamut($1) . "</dt>\n";
2606 my $def = $1 . "\n";
2607 $def =~ s/^[ ]{0,$g_tab_width}//gm;
2608 "<dd>\n" . _RunBlockGamut($def) . "\n</dd>\n";
2611 $terms . $defs . "\n";
2614 "<dl>\n" . $list . "</dl>\n\n";
2620 sub _UnescapeComments{
2621 # Remove encoding inside comments
2622 # Based on proposal by Toras Doran (author of Text::MultiMarkdown)
2626 (?<=<!--) # Begin comment
2627 (.*?) # Anything inside
2628 (?=-->) # End comments
2653 B<MultiMarkdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
2659 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2660 easy-to-write structured text format into HTML. Markdown's text format
2661 is most similar to that of plain text email, and supports features such
2662 as headers, *emphasis*, code blocks, blockquotes, and links.
2664 Markdown's syntax is designed not as a generic markup language, but
2665 specifically to serve as a front-end to (X)HTML. You can use span-level
2666 HTML tags anywhere in a Markdown document, and you can use block level
2667 HTML tags (like <div> and <table> as well).
2669 For more information about Markdown's syntax, see:
2671 http://daringfireball.net/projects/markdown/
2676 Use "--" to end switch parsing. For example, to open a file named "-z", use:
2683 =item B<--html4tags>
2685 Use HTML 4 style for empty element tags, e.g.:
2689 instead of Markdown's default XHTML style tags, e.g.:
2694 =item B<-v>, B<--version>
2696 Display Markdown's version number and copyright information.
2699 =item B<-s>, B<--shortversion>
2701 Display the short-form version number.
2710 To file bug reports or feature requests (other than topics listed in the
2711 Caveats section above) please send email to:
2713 support@daringfireball.net (for Markdown issues)
2715 fletcher@fletcherpenney.net (for MultiMarkdown issues)
2717 Please include with your report: (1) the example input; (2) the output
2718 you expected; (3) the output Markdown actually produced.
2721 =head1 VERSION HISTORY
2723 See the readme file for detailed release notes for this version.
2725 1.0.2b8 - Wed 09 May 2007
2727 + Fixed bug with nested raw HTML tags that contained
2728 attributes. The problem is that it uses a backreference in
2729 the expression that it passes to gen_extract_tagged, which
2730 is broken when Text::Balanced wraps it in parentheses.
2732 Thanks to Matt Kraai for the patch.
2734 + Now supports URLs containing literal parentheses, such as:
2736 http://en.wikipedia.org/wiki/WIMP_(computing)
2738 Such parentheses may be arbitrarily nested, but must be
2744 + Changed shebang line from "/usr/bin/perl" to "/usr/bin/env perl"
2746 + Now only trim trailing newlines from code blocks, instead of trimming
2747 all trailing whitespace characters.
2750 1.0.2b6 - Mon 03 Apr 2006
2752 + Fixed bad performance bug in new `Text::Balanced`-based block-level parser.
2755 1.0.2b5 - Thu 08 Dec 2005
2757 + Fixed bug where this:
2759 [text](http://m.com "title" )
2761 wasn't working as expected, because the parser wasn't allowing for spaces
2762 before the closing paren.
2765 1.0.2b4 - Thu 08 Sep 2005
2767 + Filthy hack to support markdown='1' in div tags, because I need it
2768 to write today's fireball.
2770 + First crack at a new, smarter, block-level HTML parser.
2772 1.0.2b3 - Thu 28 Apr 2005
2774 + _DoAutoLinks() now supports the 'dict://' URL scheme.
2776 + PHP- and ASP-style processor instructions are now protected as
2782 + Workarounds for regressions introduced with fix for "backticks within
2783 tags" bug in 1.0.2b1. The fix is to allow `...` to be turned into
2784 <code>...</code> within an HTML tag attribute, and then to turn
2785 these spurious `<code>` tags back into literal backtick characters
2786 in _EscapeSpecialCharsWithinTagAttributes().
2788 The regression was caused because in the fix, we moved
2789 _EscapeSpecialCharsWithinTagAttributes() ahead of _DoCodeSpans()
2790 in _RunSpanGamut(), but that's no good. We need to process code
2791 spans first, otherwise we can get tripped up by something like this:
2793 `<test a="` content of attribute `">`
2796 1.0.2b2 - 20 Mar 2005
2798 + Fix for nested sub-lists in list-paragraph mode. Previously we got
2799 a spurious extra level of `<p>` tags for something like this:
2807 + Experimental support for [this] as a synonym for [this][].
2808 (Note to self: No test yet for this.)
2809 Be sure to test, e.g.: [permutations of this sort of [thing][].]
2812 1.0.2b1 - 28 Feb 2005
2814 + Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span>
2816 + Fix for escaped backticks still triggering code spans:
2818 There are two raw backticks here: \` and here: \`, not a code span
2828 http://daringfireball.net/
2830 PHP port and other contributions by Michel Fortin
2833 MultiMarkdown changes by Fletcher Penney
2834 http://fletcherpenney.net/
2836 =head1 COPYRIGHT AND LICENSE
2838 Original Markdown Code Copyright (c) 2003-2007 John Gruber
2839 <http://daringfireball.net/>
2840 All rights reserved.
2842 MultiMarkdown changes Copyright (c) 2005-2007 Fletcher T. Penney
2843 <http://fletcherpenney.net/>
2844 All rights reserved.
2846 Redistribution and use in source and binary forms, with or without
2847 modification, are permitted provided that the following conditions are
2850 * Redistributions of source code must retain the above copyright notice,
2851 this list of conditions and the following disclaimer.
2853 * Redistributions in binary form must reproduce the above copyright
2854 notice, this list of conditions and the following disclaimer in the
2855 documentation and/or other materials provided with the distribution.
2857 * Neither the name "Markdown" nor the names of its contributors may
2858 be used to endorse or promote products derived from this software
2859 without specific prior written permission.
2861 This software is provided by the copyright holders and contributors "as
2862 is" and any express or implied warranties, including, but not limited
2863 to, the implied warranties of merchantability and fitness for a
2864 particular purpose are disclaimed. In no event shall the copyright owner
2865 or contributors be liable for any direct, indirect, incidental, special,
2866 exemplary, or consequential damages (including, but not limited to,
2867 procurement of substitute goods or services; loss of use, data, or
2868 profits; or business interruption) however caused and on any theory of
2869 liability, whether in contract, strict liability, or tort (including
2870 negligence or otherwise) arising in any way out of the use of this
2871 software, even if advised of the possibility of such damage.
2876 Possibilities for 'THE'