3 # MultiMarkdown -- A modification of John Gruber's original Markdown
4 # that adds new features and an output format that can more readily
5 # be converted into other document formats
7 # $Id: MultiMarkdown.pl 492 2008-01-18 23:08:43Z fletcher $
9 # Original Code Copyright (c) 2004-2007 John Gruber
10 # <http://daringfireball.net/projects/markdown/>
12 # MultiMarkdown changes Copyright (c) 2005-2008 Fletcher T. Penney
13 # <http://fletcherpenney.net/>
15 # MultiMarkdown Version 2.0.b5
17 # Based on Markdown.pl 1.0.2b8 - Wed 09 May 2007
20 # TODO: Change math mode delimiter?
21 # TODO: WikiWords inside of MMD links are converted to wiki links
22 # TODO: Still need to get the glossary working in non-memoir documents
23 # TODO: A mechanism to include arbitrary code (LaTeX, etc) without being "ugly"
24 # TODO: Look into discussion re: assigning classes to div's/span's on Markdown list.
25 # TODO: Should I just scrap the WikiWords feature to get rid of all the trouble it causes?
26 # TODO: Improve support for tables with long items and overall width in LaTeX
27 # TODO: Need a syntax for long table cells in MMD, even if no rowspan feature yet
28 # TODO: Create utilities to convert MMD tables to/from tab-delimited
37 # Include ASCIIMathML.pm
38 my $me = $0; # Where am I?
40 # Am I running in Windoze?
44 $me = dirname($me)."\\"; # Get just the directory portion
46 $me = dirname(readlink($me))."/"; # Get just the directory portion
49 require $me ."ASCIIMathML.pm";
51 use Digest::MD5 qw(md5_hex);
52 use vars qw($VERSION $g_use_metadata $g_use_wiki_links $g_base_url
53 $g_bibliography_title $g_allow_mathml $g_base_header_level $mathParser);
56 $mathParser = new Text::ASCIIMathML();
58 ## Disabled; causes problems under Perl 5.6.1:
60 # binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
63 # Global default settings:
65 my $g_empty_element_suffix = " />"; # Change to ">" for HTML output
67 my $g_allow_mathml = 1;
68 my $g_base_header_level = 1;
69 my $g_wikilinks_kill_switch = 1; # WikiLinks may become deprecated; this is the first step
75 # Reusable patterns to match balanced [brackets] and (parens). See
76 # Friedl's "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
77 my ($g_nested_brackets, $g_nested_parens);
78 $g_nested_brackets = qr{
80 [^\[\]]+ # Anything other than brackets
83 (??{ $g_nested_brackets }) # Recursive set of nested brackets
88 # Doesn't allow for whitespace, because we're using it to match URLs:
89 $g_nested_parens = qr{
91 [^()\s]+ # Anything other than parens or whitespace
94 (??{ $g_nested_parens }) # Recursive set of nested brackets
100 # Table of hash values for escaped characters:
102 foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
103 $g_escape_table{$char} = md5_hex($char);
107 # Global hashes, used by various utility routines
110 my %g_html_blocks = ();
112 my %g_metadata_newline = ();
113 my %g_crossrefs = ();
114 my %g_footnotes = ();
115 my %g_attributes = ();
116 my @g_used_footnotes = ();
117 my $g_footnote_counter = 0;
119 my $g_citation_counter = 0;
120 my @g_used_references = ();
121 my %g_references = ();
122 $g_bibliography_title = "Bibliography";
125 $g_metadata_newline{default} = "\n";
126 $g_metadata_newline{keywords} = ", ";
127 my $g_document_format = "";
129 # For use with WikiWords and [[Wiki Links]]
130 $g_use_wiki_links = 0;
131 $g_base_url = ""; # This is the base url to be used for WikiLinks
132 my $g_temp_no_wikiwords = 0;
135 # You can use \WikiWord to prevent a WikiWord from being treated as a link
138 # Used to track when we're inside an ordered or unordered list
139 # (see _ProcessListItems() for details):
140 my $g_list_level = 0;
143 #### Blosxom plug-in interface ##########################################
145 # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
146 # which posts Markdown should process, using a "meta-markup: markdown"
147 # header. If it's set to 0 (the default), Markdown will process all
149 my $g_blosxom_use_meta = 0;
153 my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
155 if ( (! $g_blosxom_use_meta) or
156 (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
158 $$body_ref = Markdown($$body_ref);
164 #### Movable Type plug-in interface #####################################
165 eval {require MT}; # Test to see if we're running in MT.
169 require MT::Template::Context;
170 import MT::Template::Context;
172 eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0.
176 my $plugin = new MT::Plugin({
178 description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
179 doc_link => 'http://daringfireball.net/projects/markdown/'
181 MT->add_plugin( $plugin );
184 MT::Template::Context->add_container_tag(MarkdownOptions => sub {
187 my $builder = $ctx->stash('builder');
188 my $tokens = $ctx->stash('tokens');
190 if (defined ($args->{'output'}) ) {
191 $ctx->stash('markdown_output', lc $args->{'output'});
194 defined (my $str = $builder->build($ctx, $tokens) )
195 or return $ctx->error($builder->errstr);
199 MT->add_text_filter('markdown' => {
201 docs => 'http://daringfireball.net/projects/markdown/',
207 my $output = $ctx->stash('markdown_output');
208 if (defined $output && $output =~ m/^html/i) {
209 $g_empty_element_suffix = ">";
210 $ctx->stash('markdown_output', '');
212 elsif (defined $output && $output eq 'raw') {
214 $ctx->stash('markdown_output', '');
218 $g_empty_element_suffix = " />";
221 $text = $raw ? $text : Markdown($text);
226 # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
231 $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
235 MT->add_text_filter('markdown_with_smartypants' => {
236 label => 'Markdown With SmartyPants',
237 docs => 'http://daringfireball.net/projects/markdown/',
242 my $output = $ctx->stash('markdown_output');
243 if (defined $output && $output eq 'html') {
244 $g_empty_element_suffix = ">";
247 $g_empty_element_suffix = " />";
250 $text = Markdown($text);
251 $text = $smartypants->($text, '1');
257 #### BBEdit/command-line text filter interface ##########################
258 # Needs to be hidden from MT (and Blosxom when running in static mode).
260 # We're only using $blosxom::version once; tell Perl not to warn us:
262 unless ( defined($blosxom::version) ) {
265 #### Check for command-line switches: #################
268 Getopt::Long::Configure('pass_through');
269 GetOptions(\%cli_opts,
274 if ($cli_opts{'version'}) { # Version info
275 print "\nThis is Markdown, version $VERSION.\n";
276 print "Copyright 2004 John Gruber\n";
277 print "http://daringfireball.net/projects/markdown/\n\n";
280 if ($cli_opts{'shortversion'}) { # Just the version number string.
284 if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML
285 $g_empty_element_suffix = ">";
289 #### Process incoming text: ###########################
292 local $/; # Slurp the whole file
295 print Markdown($text);
303 # Main function. The order in which other subs are called here is
304 # essential. Link and image substitutions need to happen before
305 # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
306 # and <img> tags get encoded.
310 # Clear the global hashes. If we don't clear these, you get conflicts
311 # from other articles when generating a page which contains more than
312 # one article (e.g. an index page that shows the N most recent
320 @g_used_footnotes = ();
321 @g_used_references = ();
324 # Standardize line endings:
325 $text =~ s{\r\n}{\n}g; # DOS to Unix
326 $text =~ s{\r}{\n}g; # Mac to Unix
328 # Make sure $text ends with a couple of newlines:
331 # Convert all tabs to spaces.
332 $text = _Detab($text);
334 # Strip any lines consisting only of spaces and tabs.
335 # This makes subsequent regexen easier to write, because we can
336 # match consecutive blank lines with /\n+/ instead of something
337 # contorted like /[ \t]*\n+/ .
338 $text =~ s/^[ \t]+$//mg;
340 # Strip leading blank lines
344 $text = _ParseMetaData($text) if $g_use_metadata;
346 # And recheck for leading blank lines
349 # Turn block-level HTML blocks into hash entries
350 $text = _HashHTMLBlocks($text);
352 # Strip footnote and link definitions, store in hashes.
353 $text = _StripFootnoteDefinitions($text);
355 $text = _StripLinkDefinitions($text);
357 _GenerateImageCrossRefs($text);
359 $text = _StripMarkdownReferences($text);
361 $text = _RunBlockGamut($text);
363 $text = _DoMarkdownCitations($text);
365 $text = _DoFootnotes($text);
367 $text = _UnescapeSpecialChars($text);
369 # Clean encoding within HTML comments
370 $text = _UnescapeComments($text);
372 # This must follow _UnescapeSpecialChars
373 $text = _UnescapeWikiWords($text);
375 $text = _FixFootnoteParagraphs($text);
376 $text .= _PrintFootnotes();
378 $text .= _PrintMarkdownBibliography();
380 $text = _ConvertCopyright($text);
382 if (lc($g_document_format) =~ /^complete\s*$/) {
383 return xhtmlMetaData() . "<body>\n\n" . $text . "\n</body>\n</html>";
385 return textMetaData() . $text . "\n";
391 sub _StripLinkDefinitions {
393 # Strips link definitions from text, stores the URLs and titles in
397 my $less_than_tab = $g_tab_width - 1;
399 # Link defs are in the form: ^[id]: url "optional title"
401 # Pattern altered for MultiMarkdown
402 # in order to not match citations or footnotes
403 ^[ ]{0,$less_than_tab}\[([^#^].*)\]: # id = $1
405 \n? # maybe *one* newline
407 <?(\S+?)>? # url = $2
409 \n? # maybe one newline
412 (?<=\s) # lookbehind for whitespace
417 )? # title is optional
419 # MultiMarkdown addition for attribute support
422 (?<=\s) # lookbehind for whitespace
423 (([ \t]*\n)?[ \t]*((\S+=\S+)|(\S+=".*?")))*
430 $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive
432 $g_titles{lc $1} = $3;
433 $g_titles{lc $1} =~ s/"/"/g;
436 # MultiMarkdown addition "
438 $g_attributes{lc $1} = $4;
447 sub _HashHTMLBlocks {
449 my $less_than_tab = $g_tab_width - 1;
451 # Hashify HTML blocks:
452 # We only want to do this for block-level HTML tags, such as headers,
453 # lists, and tables. That's because we still want to wrap <p>s around
454 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
455 # phrase emphasis, and spans. The list of tags we're looking for is
459 p | div | h[1-6] | blockquote | pre | table |
460 dl | ol | ul | script | noscript | form |
461 fieldset | iframe | ins | del
463 }x; # MultiMarkdown does not include `math` in the above list so that
464 # Equations can optionally be included in separate paragraphs
467 (?: # Match one attr name/value pair
468 \s+ # There needs to be at least some whitespace
469 # before each attribute name.
470 [\w.:_-]+ # Attribute name
473 ".+?" # "Attribute value"
475 '.+?' # 'Attribute value'
480 my $empty_tag = qr{< \w+ $tag_attrs \s* />}xms;
481 my $open_tag = qr{< $block_tags $tag_attrs \s* >}xms;
482 my $close_tag = undef; # let Text::Balanced handle this
484 use Text::Balanced qw(gen_extract_tagged);
485 my $extract_block = gen_extract_tagged($open_tag, $close_tag, undef, { ignore => [$empty_tag] });
488 ## TO-DO: the 0,3 on the next line ought to respect the
489 ## tabwidth, or else, we should mandate 4-space tabwidth and
491 while ($text =~ s{^(([ ]{0,3}<)?.*\n)}{}m) {
494 # current line could be start of code block
496 my ($tag, $remainder) = $extract_block->($cur_line . $text);
498 my $key = md5_hex($tag);
499 $g_html_blocks{$key} = $tag;
500 push @chunks, "\n\n" . $key . "\n\n";
504 # No tag match, so toss $cur_line into @chunks
505 push @chunks, $cur_line;
509 # current line could NOT be start of code block
510 push @chunks, $cur_line;
514 push @chunks, $text; # Whatever is left.
516 $text = join '', @chunks;
520 # Special case just for <hr />. It was easier to make a special case than
521 # to make the other regex more complicated.
524 (?<=\n\n) # Starting after a blank line
526 \A\n? # the beginning of the doc
529 [ ]{0,$less_than_tab}
530 <(hr) # start tag = $2
533 /?> # the matching end tag
535 (?=\n{2,}|\Z) # followed by a blank line or end of document
538 my $key = md5_hex($1);
539 $g_html_blocks{$key} = $1;
540 "\n\n" . $key . "\n\n";
543 # Special case for standalone HTML comments:
546 (?<=\n\n) # Starting after a blank line
548 \A\n? # the beginning of the doc
551 [ ]{0,$less_than_tab}
558 (?=\n{2,}|\Z) # followed by a blank line or end of document
561 my $key = md5_hex($1);
562 $g_html_blocks{$key} = $1;
563 "\n\n" . $key . "\n\n";
566 # PHP and ASP-style processor instructions (<?…?> and <%…%>)
569 (?<=\n\n) # Starting after a blank line
571 \A\n? # the beginning of the doc
574 [ ]{0,$less_than_tab}
581 (?=\n{2,}|\Z) # followed by a blank line or end of document
584 my $key = md5_hex($1);
585 $g_html_blocks{$key} = $1;
586 "\n\n" . $key . "\n\n";
596 # These are all the transformations that form block-level
597 # tags like paragraphs, headers, and list items.
601 $text = _DoHeaders($text);
603 # Do tables first to populate the table id's for cross-refs
604 # Escape <pre><code> so we don't get greedy with tables
605 $text = _DoTables($text);
607 # And now, protect our tables
608 $text = _HashHTMLBlocks($text);
610 # Do Horizontal Rules:
611 $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
612 $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
613 $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
615 $text = _DoDefinitionLists($text);
616 $text = _DoLists($text);
617 $text = _DoCodeBlocks($text);
618 $text = _DoBlockQuotes($text);
620 # We already ran _HashHTMLBlocks() before, in Markdown(), but that
621 # was to escape raw HTML in the original Markdown source. This time,
622 # we're escaping the markup we've just created, so that we don't wrap
623 # <p> tags around block-level tags.
624 $text = _HashHTMLBlocks($text);
625 $text = _FormParagraphs($text);
633 # These are all the transformations that occur *within* block-level
634 # tags like paragraphs, headers, and list items.
638 $text = _DoCodeSpans($text);
639 $text = _DoMathSpans($text);
640 $text = _EscapeSpecialCharsWithinTagAttributes($text);
641 $text = _EncodeBackslashEscapes($text);
643 # Process anchor and image tags. Images must come first,
644 # because ![foo][f] looks like an anchor.
645 $text = _DoImages($text);
646 $text = _DoAnchors($text);
649 if ($g_use_wiki_links && !$g_temp_no_wikiwords && !$g_wikilinks_kill_switch) {
650 $text = _DoWikiLinks($text);
652 # And then reprocess anchors and images
653 $text = _DoImages($text);
654 $text = _DoAnchors($text);
658 # Make links out of things like `<http://example.com/>`
659 # Must come after _DoAnchors(), because you can use < and >
660 # delimiters in inline links like [this](<url>).
661 $text = _DoAutoLinks($text);
662 $text = _EncodeAmpsAndAngles($text);
663 $text = _DoItalicsAndBold($text);
666 $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
672 sub _EscapeSpecialCharsWithinTagAttributes {
674 # Within tags -- meaning between < and > -- encode [\ ` * _] so they
675 # don't conflict with their use in Markdown for code, italics and strong.
676 # We're replacing each such character with its corresponding MD5 checksum
677 # value; this is likely overkill, but it should prevent us from colliding
678 # with the escape values by accident.
681 my $tokens ||= _TokenizeHTML($text);
682 $text = ''; # rebuild $text from the tokens
684 foreach my $cur_token (@$tokens) {
685 if ($cur_token->[0] eq "tag") {
686 $cur_token->[1] =~ s! \\ !$g_escape_table{'\\'}!gx;
687 $cur_token->[1] =~ s{ (?<=.)</?code>(?=.) }{$g_escape_table{'`'}}gx;
688 $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx;
689 $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx;
691 $text .= $cur_token->[1];
699 # Turn Markdown link shortcuts into XHTML <a> tags.
704 # First, handle reference-style links: [link text] [id]
707 ( # wrap whole match in $1
709 ($g_nested_brackets) # link text = $2
712 [ ]? # one optional space
713 (?:\n[ ]*)? # one optional newline followed by spaces
721 my $whole_match = $1;
725 if ($link_id eq "") {
726 $link_id = lc $link_text; # for shortcut links like [this][].
729 # Allow automatic cross-references to headers
730 my $label = Header2Label($link_id);
731 if (defined $g_urls{$link_id}) {
732 my $url = $g_urls{$link_id};
733 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
734 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
735 $result = "<a href=\"$url\"";
736 if ( defined $g_titles{$link_id} ) {
737 my $title = $g_titles{$link_id};
738 $title =~ s! \* !$g_escape_table{'*'}!gx;
739 $title =~ s! _ !$g_escape_table{'_'}!gx;
740 $result .= " title=\"$title\"";
742 $result .= _DoAttributes($label);
743 $result .= ">$link_text</a>";
744 } elsif (defined $g_crossrefs{$label}) {
745 my $url = $g_crossrefs{$label};
746 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
747 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
748 $result = "<a href=\"$url\"";
749 if ( defined $g_titles{$label} ) {
750 my $title = $g_titles{$label};
751 $title =~ s! \* !$g_escape_table{'*'}!gx;
752 $title =~ s! _ !$g_escape_table{'_'}!gx;
753 $result .= " title=\"$title\"";
755 $result .= _DoAttributes($label);
756 $result .= ">$link_text</a>";
758 $result = $whole_match;
764 # Next, inline-style links: [link text](url "optional title")
767 ( # wrap whole match in $1
769 ($g_nested_brackets) # link text = $2
773 ($g_nested_parens) # href = $3
776 (['"]) # quote char = $5
779 [ \t]* # ignore any spaces/tabs between closing quote and )
780 )? # title is optional
785 my $whole_match = $1;
790 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
791 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
792 $url =~ s{^<(.*)>$}{$1}; # Remove <>'s surrounding URL, if present
793 $result = "<a href=\"$url\"";
795 if (defined $title) {
796 $title =~ s/"/"/g;
797 $title =~ s! \* !$g_escape_table{'*'}!gx;
798 $title =~ s! _ !$g_escape_table{'_'}!gx;
799 $result .= " title=\"$title\"";
801 $result .= ">$link_text</a>";
807 # Last, handle reference-style shortcuts: [link text]
808 # These must come last in case you've also got [link test][1]
809 # or [link test](/foo)
812 ( # wrap whole match in $1
814 ([^\[\]]+) # link text = $2; can't contain '[' or ']'
819 my $whole_match = $1;
821 (my $link_id = lc $2) =~ s{[ ]?\n}{ }g; # lower-case and turn embedded newlines into spaces
823 # Allow automatic cross-references to headers
824 my $label = Header2Label($link_id);
825 if (defined $g_urls{$link_id}) {
826 my $url = $g_urls{$link_id};
827 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
828 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
829 $result = "<a href=\"$url\"";
830 if ( defined $g_titles{$link_id} ) {
831 my $title = $g_titles{$link_id};
832 $title =~ s! \* !$g_escape_table{'*'}!gx;
833 $title =~ s! _ !$g_escape_table{'_'}!gx;
834 $result .= " title=\"$title\"";
836 $result .= _DoAttributes($link_id);
837 $result .= ">$link_text</a>";
838 } elsif (defined $g_crossrefs{$label}) {
839 my $url = $g_crossrefs{$label};
840 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
841 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
842 $result = "<a href=\"$url\"";
843 if ( defined $g_titles{$label} ) {
844 my $title = $g_titles{$label};
845 $title =~ s! \* !$g_escape_table{'*'}!gx;
846 $title =~ s! _ !$g_escape_table{'_'}!gx;
847 $result .= " title=\"$title\"";
849 $result .= _DoAttributes($label);
850 $result .= ">$link_text</a>";
852 $result = $whole_match;
863 # Turn Markdown image shortcuts into <img> tags.
868 # First, handle reference-style labeled images: ![alt text][id]
871 ( # wrap whole match in $1
873 (.*?) # alt text = $2
876 [ ]? # one optional space
877 (?:\n[ ]*)? # one optional newline followed by spaces
886 my $whole_match = $1;
890 if ($link_id eq "") {
891 $link_id = lc $alt_text; # for shortcut links like ![this][].
894 $alt_text =~ s/"/"/g;
895 if (defined $g_urls{$link_id}) {
896 my $url = $g_urls{$link_id};
897 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
898 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
900 my $label = Header2Label($alt_text);
901 $g_crossrefs{$label} = "#$label";
902 if (! defined $g_titles{$link_id}) {
903 $g_titles{$link_id} = $alt_text;
906 $result = "<img id=\"$label\" src=\"$url\" alt=\"$alt_text\"";
907 if (defined $g_titles{$link_id}) {
908 my $title = $g_titles{$link_id};
909 $title =~ s! \* !$g_escape_table{'*'}!gx;
910 $title =~ s! _ !$g_escape_table{'_'}!gx;
911 $result .= " title=\"$title\"";
913 $result .= _DoAttributes($link_id);
914 $result .= $g_empty_element_suffix;
917 # If there's no such link ID, leave intact:
918 $result = $whole_match;
925 # Next, handle inline images: ![alt text](url "optional title")
926 # Don't forget: encode * and _
929 ( # wrap whole match in $1
931 (.*?) # alt text = $2
933 \s? # One optional whitespace character
936 ($g_nested_parens) # href = $3
939 (['"]) # quote char = $5
943 )? # title is optional
944 # MultiMarkdown addition for attribute support
946 (?<=\s) # lookbehind for whitespace
947 (([ \t]*\n)?[ \t]*((\S+=\S+)|(\S+=".*?")))*
953 my $whole_match = $1;
956 my $title = (defined $6) ? $6 : '';
959 $alt_text =~ s/"/"/g;
960 $title =~ s/"/"/g;
961 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
962 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
963 $url =~ s{^<(.*)>$}{$1}; # Remove <>'s surrounding URL, if present
965 my $label = Header2Label($alt_text);
966 $g_crossrefs{$label} = "#$label";
967 # $g_titles{$label} = $alt_text; # I think this line should not be here
969 $result = "<img id=\"$label\" src=\"$url\" alt=\"$alt_text\"";
970 if (defined $title) {
971 $title =~ s! \* !$g_escape_table{'*'}!gx;
972 $title =~ s! _ !$g_escape_table{'_'}!gx;
973 $result .= " title=\"$title\"";
975 if (defined $attrs) {
976 $result .= " $attrs";
978 $result .= $g_empty_element_suffix;
993 # Don't do Wiki Links in Headers
994 $g_temp_no_wikiwords = 1;
996 # Setext-style headers:
1003 $text =~ s{ ^(.+?)(?:\s\[([^\[]*?)\])?[ \t]*\n=+[ \t]*\n+ }{
1005 $label = Header2Label($2);
1007 $label = Header2Label($1);
1009 $header = _RunSpanGamut($1);
1010 $header =~ s/^\s*//s;
1013 $g_crossrefs{$label} = "#$label";
1014 $g_titles{$label} = $header;
1015 $idString = " id=\"$label\"";
1020 "<h1$idString>" . $header . "</h1>\n\n";
1023 $text =~ s{ ^(.+?)(?:\s*\[([^\[]*?)\])?[ \t]*\n-+[ \t]*\n+ }{
1025 $label = Header2Label($2);
1027 $label = Header2Label($1);
1029 $header = _RunSpanGamut($1);
1030 $header =~ s/^\s*//s;
1033 $g_crossrefs{$label} = "#$label";
1034 $g_titles{$label} = $header;
1035 $idString = " id=\"$label\"";
1040 "<h2$idString>" . $header . "</h2>\n\n";
1044 # atx-style headers:
1047 # ## Header 2 with closing hashes ##
1052 ^(\#{1,6}) # $1 = string of #'s
1054 (.+?) # $2 = Header text
1056 (?:\[([^\[]*?)\])? # $3 = optional label for cross-reference
1058 \#* # optional closing #'s (not counted)
1061 my $h_level = length($1) + $g_base_header_level - 1;
1063 $label = Header2Label($3);
1065 $label = Header2Label($2);
1067 $header = _RunSpanGamut($2);
1068 $header =~ s/^\s*//s;
1071 $g_crossrefs{$label} = "#$label";
1072 $g_titles{$label} = $header;
1073 $idString = " id=\"$label\"";
1078 "<h$h_level$idString>" . $header . "</h$h_level>\n\n";
1081 # Can now process Wiki Links again
1082 $g_temp_no_wikiwords = 0;
1090 # Form HTML ordered (numbered) and unordered (bulleted) lists.
1093 my $less_than_tab = $g_tab_width - 1;
1095 # Re-usable patterns to match list item bullets and number markers:
1096 my $marker_ul = qr/[*+-]/;
1097 my $marker_ol = qr/\d+[.]/;
1098 my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
1100 # Re-usable pattern to match any entirel ul or ol list:
1101 my $whole_list = qr{
1104 [ ]{0,$less_than_tab}
1105 (${marker_any}) # $3 = first list item marker
1114 (?! # Negative lookahead for another list item marker
1122 # We use a different prefix before nested lists than top-level lists.
1123 # See extended comment in _ProcessListItems().
1125 # Note: There's a bit of duplication here. My original implementation
1126 # created a scalar regex pattern as the conditional result of the test on
1127 # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
1128 # substitution once, using the scalar as the pattern. This worked,
1129 # everywhere except when running under MT on my hosting account at Pair
1130 # Networks. There, this caused all rebuilds to be killed by the reaper (or
1131 # perhaps they crashed, but that seems incredibly unlikely given that the
1132 # same script on the same server ran fine *except* under MT. I've spent
1133 # more time trying to figure out why this is happening than I'd like to
1134 # admit. My only guess, backed up by the fact that this workaround works,
1135 # is that Perl optimizes the substition when it can figure out that the
1136 # pattern will never change, and when this optimization isn't on, we run
1137 # afoul of the reaper. Thus, the slightly redundant code that uses two
1138 # static s/// patterns rather than one conditional pattern.
1140 if ($g_list_level) {
1146 my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
1148 # Turn double returns into triple returns, so that we can make a
1149 # paragraph for the last item in a list, if necessary:
1150 $list =~ s/\n{2,}/\n\n\n/g;
1151 my $result = _ProcessListItems($list, $marker_any);
1153 # Trim any trailing whitespace, to put the closing `</$list_type>`
1154 # up on the preceding line, to get it past the current stupid
1155 # HTML block parser. This is a hack to work around the terrible
1156 # hack that is the HTML block parser.
1157 $result =~ s{\s+$}{};
1158 $result = "<$list_type>" . $result . "</$list_type>\n";
1168 my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
1169 # Turn double returns into triple returns, so that we can make a
1170 # paragraph for the last item in a list, if necessary:
1171 $list =~ s/\n{2,}/\n\n\n/g;
1172 my $result = _ProcessListItems($list, $marker_any);
1173 $result = "<$list_type>\n" . $result . "</$list_type>\n";
1183 sub _ProcessListItems {
1185 # Process the contents of a single ordered or unordered list, splitting it
1186 # into individual list items.
1189 my $list_str = shift;
1190 my $marker_any = shift;
1193 # The $g_list_level global keeps track of when we're inside a list.
1194 # Each time we enter a list, we increment it; when we leave a list,
1195 # we decrement. If it's zero, we're not in a list anymore.
1197 # We do this because when we're not inside a list, we want to treat
1198 # something like this:
1200 # I recommend upgrading to version
1201 # 8. Oops, now this line is treated
1204 # As a single paragraph, despite the fact that the second line starts
1205 # with a digit-period-space sequence.
1207 # Whereas when we're inside a list (or sub-list), that line will be
1208 # treated as the start of a sub-list. What a kludge, huh? This is
1209 # an aspect of Markdown's syntax that's hard to parse perfectly
1210 # without resorting to mind-reading. Perhaps the solution is to
1211 # change the syntax rules such that sub-lists must start with a
1212 # starting cardinal number; e.g. "1." or "a.".
1216 # trim trailing blank lines:
1217 $list_str =~ s/\n{2,}\z/\n/;
1221 (\n)? # leading line = $1
1222 (^[ \t]*) # leading whitespace = $2
1223 ($marker_any) [ \t]+ # list marker = $3
1224 ((?s:.+?) # list item text = $4
1226 (?= \n* (\z | \2 ($marker_any) [ \t]+))
1229 my $leading_line = $1;
1230 my $leading_space = $2;
1232 if ($leading_line or ($item =~ m/\n{2,}/)) {
1233 $item = _RunBlockGamut(_Outdent($item));
1236 # Recursion for sub-lists:
1237 $item = _DoLists(_Outdent($item));
1239 $item = _RunSpanGamut($item);
1242 "<li>" . $item . "</li>\n";
1253 # Process Markdown `<pre><code>` blocks.
1260 ( # $1 = the code block -- one or more lines, starting with a space/tab
1262 (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces
1266 ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1269 my $result; # return value
1271 $codeblock = _EncodeCode(_Outdent($codeblock));
1272 $codeblock = _Detab($codeblock);
1273 $codeblock =~ s/\A\n+//; # trim leading newlines
1274 $codeblock =~ s/\n+\z//; # trim trailing newlines
1276 $result = "\n\n<pre><code>" . $codeblock . "</code></pre>\n\n"; # CHANGED: Removed newline for MMD
1287 # * Backtick quotes are used for <code></code> spans.
1289 # * You can use multiple backticks as the delimiters if you want to
1290 # include literal backticks in the code span. So, this input:
1292 # Just type ``foo `bar` baz`` at the prompt.
1294 # Will translate to:
1296 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1298 # There's no arbitrary limit to the number of backticks you
1299 # can use as delimters. If you need three consecutive backticks
1300 # in your code, use four for delimiters, etc.
1302 # * You can use spaces to get literal backticks at the edges:
1304 # ... type `` `bar` `` ...
1308 # ... type <code>`bar`</code> ...
1314 (?<!\\) # Character before opening ` can't be a backslash
1315 (`+) # $1 = Opening run of `
1316 (.+?) # $2 = The code block
1318 \1 # Matching closer
1322 $c =~ s/^[ \t]*//g; # leading whitespace
1323 $c =~ s/[ \t]*$//g; # trailing whitespace
1324 $c = _EncodeCode($c);
1334 # Encode/escape certain characters inside Markdown code runs.
1335 # The point is that in code, these characters are literals,
1336 # and lose their special Markdown meanings.
1340 # Protect Wiki Links in Code Blocks
1341 if (!$g_wikilinks_kill_switch) {
1342 my $WikiWord = qr'[A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*';
1343 s/(\A\\?|\s\\?)($WikiWord)/$1\\$2/gx;
1346 # Encode all ampersands; HTML entities are not
1347 # entities within a Markdown code span.
1350 # Encode $'s, but only if we're running under Blosxom.
1351 # (Blosxom interpolates Perl variables in article bodies.)
1354 if (defined($blosxom::version)) {
1360 # Do the angle bracket song and dance:
1364 # Now, escape characters that are magic in Markdown:
1365 s! \* !$g_escape_table{'*'}!gx;
1366 s! _ !$g_escape_table{'_'}!gx;
1367 s! { !$g_escape_table{'{'}!gx;
1368 s! } !$g_escape_table{'}'}!gx;
1369 s! \[ !$g_escape_table{'['}!gx;
1370 s! \] !$g_escape_table{']'}!gx;
1371 s! \\ !$g_escape_table{'\\'}!gx;
1377 sub _DoItalicsAndBold {
1380 # Cave in - `*` and `_` behave differently... We'll see how it works out
1383 # <strong> must go first:
1384 $text =~ s{ (?<!\w) (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
1385 {<strong>$2</strong>}gsx;
1387 $text =~ s{ (?<!\w) (\*|_) (?=\S) (.+?) (?<=\S) \1 }
1390 # And now, a second pass to catch nested strong and emphasis special cases
1391 $text =~ s{ (?<!\w) (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
1392 {<strong>$2</strong>}gsx;
1394 $text =~ s{ (?<!\w) (\*|_) (?=\S) (.+?) (?<=\S) \1 }
1397 # And now, allow `*` in the middle of words
1399 # <strong> must go first:
1400 $text =~ s{ (\*\*) (?=\S) (.+?[*]*) (?<=\S) \1 }
1401 {<strong>$2</strong>}gsx;
1403 $text =~ s{ (\*) (?=\S) (.+?) (?<=\S) \1 }
1410 sub _DoBlockQuotes {
1414 ( # Wrap whole match in $1
1416 ^[ \t]*>[ \t]? # '>' at the start of a line
1417 .+\n # rest of the first line
1418 (.+\n)* # subsequent consecutive lines
1424 $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting
1425 $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines
1426 $bq = _RunBlockGamut($bq); # recurse
1429 # These leading spaces screw with <pre> content, so we need to fix that:
1438 "<blockquote>\n$bq\n</blockquote>\n\n";
1446 sub _FormParagraphs {
1449 # $text - string to process with html <p> tags
1453 # Strip leading and trailing lines:
1457 my @grafs = split(/\n{2,}/, $text);
1463 unless (defined( $g_html_blocks{$_} )) {
1464 $_ = _RunSpanGamut($_);
1471 # Unhashify HTML blocks
1473 # foreach my $graf (@grafs) {
1474 # my $block = $g_html_blocks{$graf};
1475 # if (defined $block) {
1480 foreach my $graf (@grafs) {
1481 # Modify elements of @grafs in-place...
1482 my $block = $g_html_blocks{$graf};
1483 if (defined $block) {
1491 markdown\s*=\s* (['"]) # $2 = attr quote char
1500 (</div>) # $4 = closing tag
1505 my ($div_open, $div_content, $div_close) = ($1, $3, $4);
1507 # We can't call Markdown(), because that resets the hash;
1508 # that initialization code should be pulled into its own sub, though.
1509 $div_content = _HashHTMLBlocks($div_content);
1510 $div_content = _StripLinkDefinitions($div_content);
1511 $div_content = _RunBlockGamut($div_content);
1512 $div_content = _UnescapeSpecialChars($div_content);
1514 $div_open =~ s{\smarkdown\s*=\s*(['"]).+?\1}{}ms;
1516 $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1522 return join "\n\n", @grafs;
1526 sub _EncodeAmpsAndAngles {
1527 # Smart processing for ampersands and angle brackets that need to be encoded.
1531 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1532 # http://bumppo.net/projects/amputator/
1533 $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g;
1536 $text =~ s{<(?![a-z/?\$!])}{<}gi;
1542 sub _EncodeBackslashEscapes {
1544 # Parameter: String.
1545 # Returns: The string, with after processing the following backslash
1550 s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first.
1551 s! \\` !$g_escape_table{'`'}!gx;
1552 s! \\\* !$g_escape_table{'*'}!gx;
1553 s! \\_ !$g_escape_table{'_'}!gx;
1554 s! \\\{ !$g_escape_table{'{'}!gx;
1555 s! \\\} !$g_escape_table{'}'}!gx;
1556 s! \\\[ !$g_escape_table{'['}!gx;
1557 s! \\\] !$g_escape_table{']'}!gx;
1558 s! \\\( !$g_escape_table{'('}!gx;
1559 s! \\\) !$g_escape_table{')'}!gx;
1560 s! \\> !$g_escape_table{'>'}!gx;
1561 s! \\\# !$g_escape_table{'#'}!gx;
1562 s! \\\+ !$g_escape_table{'+'}!gx;
1563 s! \\\- !$g_escape_table{'-'}!gx;
1564 s! \\\. !$g_escape_table{'.'}!gx;
1565 s{ \\! }{$g_escape_table{'!'}}gx;
1574 $text =~ s{<((https?|ftp|dict):[^'">\s]+)>}{<a href="$1">$1</a>}gi;
1576 # Email addresses: <address@domain.foo>
1583 [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1587 _EncodeEmailAddress( _UnescapeSpecialChars($1) );
1594 sub _EncodeEmailAddress {
1596 # Input: an email address, e.g. "foo@example.com"
1598 # Output: the email address as a mailto link, with each character
1599 # of the address encoded as either a decimal or hex entity, in
1600 # the hopes of foiling most address harvesting spam bots. E.g.:
1602 # <a href="mailto:foo@e
1603 # xample.com">foo
1604 # @example.com</a>
1606 # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1607 # mailing list: <http://tinyurl.com/yu7ue>
1614 sub { '&#' . ord(shift) . ';' },
1615 sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
1619 $addr = "mailto:" . $addr;
1623 if ( $char eq '@' ) {
1624 # this *must* be encoded. I insist.
1625 $char = $encode[int rand 1]->($char);
1626 } elsif ( $char ne ':' ) {
1627 # leave ':' alone (to spot mailto: later)
1629 # roughly 10% raw, 45% hex, 45% dec
1631 $r > .9 ? $encode[2]->($char) :
1632 $r < .45 ? $encode[1]->($char) :
1639 $addr = qq{<a href="$addr">$addr</a>};
1640 $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
1646 sub _UnescapeSpecialChars {
1648 # Swap back in all the special characters we've hidden.
1652 while( my($char, $hash) = each(%g_escape_table) ) {
1653 $text =~ s/$hash/$char/g;
1661 # Parameter: String containing HTML markup.
1662 # Returns: Reference to an array of the tokens comprising the input
1663 # string. Each token is either a tag (possibly with nested,
1664 # tags contained therein, such as <a href="<MTFoo>">, or a
1665 # run of text between tags. Each element of the array is a
1666 # two-element array; the first is either 'tag' or 'text';
1667 # the second is the actual value.
1670 # Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
1671 # <http://www.bradchoate.com/past/mtregex.php>
1676 my $len = length $str;
1680 my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth);
1681 my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment
1682 (?s: <\? .*? \?> ) | # processing instruction
1683 $nested_tags/ix; # nested tags
1685 while ($str =~ m/($match)/g) {
1687 my $sec_start = pos $str;
1688 my $tag_start = $sec_start - length $whole_tag;
1689 if ($pos < $tag_start) {
1690 push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
1692 push @tokens, ['tag', $whole_tag];
1695 push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
1703 # Remove one level of line-leading tabs or spaces
1707 $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
1714 # Cribbed from a post by Bart Lateur:
1715 # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
1719 $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
1724 # MultiMarkdown Routines
1727 sub _ParseMetaData {
1729 my $clean_text = "";
1731 my ($inMetaData, $currentKey) = (1,'');
1733 foreach my $line ( split /\n/, $text ) {
1734 $line =~ /^$/ and $inMetaData = 0 and $clean_text .= $line and next;
1736 if ($line =~ /^([a-zA-Z0-9][0-9a-zA-Z _-]*?):\s*(.*)$/ ) {
1738 $currentKey =~ s/ / /g;
1739 $g_metadata{$currentKey} = $2;
1740 if (lc($currentKey) eq "format") {
1741 $g_document_format = lc($g_metadata{$currentKey});
1743 if (lc($currentKey) eq "base url") {
1744 $g_base_url = $g_metadata{$currentKey};
1746 if (lc($currentKey) eq "use wikilinks") {
1747 if (lc($g_metadata{$currentKey}) eq "true" ||
1748 $g_metadata{$currentKey} eq "1") {
1749 $g_use_wiki_links = 1;
1752 if (lc($currentKey) eq "bibliography title") {
1753 $g_bibliography_title = $g_metadata{$currentKey};
1754 $g_bibliography_title =~ s/\s*$//;
1756 if (lc($currentKey) eq "base header level") {
1757 $g_base_header_level = $g_metadata{$currentKey};
1759 if (!$g_metadata_newline{$currentKey}) {
1760 $g_metadata_newline{$currentKey} = $g_metadata_newline{default};
1763 if ($currentKey eq "") {
1764 # No metadata present
1765 $clean_text .= "$line\n";
1769 if ($line =~ /^\s*(.+)$/ ) {
1770 $g_metadata{$currentKey} .= "$g_metadata_newline{$currentKey}$1";
1774 $clean_text .= "$line\n";
1781 sub _StripFootnoteDefinitions {
1783 my $less_than_tab = $g_tab_width - 1;
1786 \n\[\^([^\n]+?)\]\:[ \t]*# id = $1
1788 (.*?)\n{1,2} # end at new paragraph
1789 ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1794 my $footnote = "$2\n";
1795 $footnote =~ s/^[ ]{0,$g_tab_width}//gm;
1797 $g_footnotes{id2footnote($id)} = $footnote;
1806 # First, run routines that get skipped in footnotes
1807 foreach my $label (sort keys %g_footnotes) {
1808 my $footnote = _RunBlockGamut($g_footnotes{$label});
1810 $footnote = _DoMarkdownCitations($footnote);
1811 $g_footnotes{$label} = $footnote;
1815 \[\^(.+?)\] # id = $1
1818 my $id = id2footnote($1);
1819 if (defined $g_footnotes{$id} ) {
1820 $g_footnote_counter++;
1821 if ($g_footnotes{$id} =~ /^glossary:/i) {
1822 $result = "<a href=\"#fn:$id\" id=\"fnref:$id\" class=\"footnote glossary\">$g_footnote_counter</a>";
1824 $result = "<a href=\"#fn:$id\" id=\"fnref:$id\" class=\"footnote\">$g_footnote_counter</a>";
1826 push (@g_used_footnotes,$id);
1834 sub _FixFootnoteParagraphs {
1837 $text =~ s/^\<p\>\<\/footnote\>/<\/footnote>/gm;
1842 sub _PrintFootnotes{
1843 my $footnote_counter = 0;
1846 foreach my $id (@g_used_footnotes) {
1847 $footnote_counter++;
1848 my $footnote = $g_footnotes{$id};
1849 my $footnote_closing_tag = "";
1851 $footnote =~ s/(\<\/(p(re)?|ol|ul)\>)$//;
1852 $footnote_closing_tag = $1;
1854 if ($footnote =~ s/^glossary:\s*//i) {
1855 # Add some formatting for glossary entries
1860 (?:\(([^\(\)]*)\)[^\n]*)? # $2 = optional sort key
1863 my $glossary = "<span class=\"glossary name\">$1</span>";
1866 $glossary.="<span class=\"glossary sort\" style=\"display:none\">$2</span>";
1872 $result.="<li id=\"fn:$id\">$footnote<a href=\"#fnref:$id\" class=\"reversefootnote\"> ↩</a>$footnote_closing_tag</li>\n\n";
1874 $result.="<li id=\"fn:$id\">$footnote<a href=\"#fnref:$id\" class=\"reversefootnote\"> ↩</a>$footnote_closing_tag</li>\n\n";
1877 $result .= "</ol>\n</div>";
1879 if ($footnote_counter > 0) {
1880 $result = "\n\n<div class=\"footnotes\">\n<hr$g_empty_element_suffix\n<ol>\n\n".$result;
1885 $result= _UnescapeSpecialChars($result);
1891 my $label = lc $header;
1892 $label =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters
1893 while ($label =~ s/^[^A-Za-z]//g)
1894 {}; # Strip illegal leading characters
1899 # Since we prepend "fn:", we can allow leading digits in footnotes
1901 my $footnote = lc $id;
1902 $footnote =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters
1908 my $result = qq{<?xml version="1.0" encoding="UTF-8" ?>\n};
1910 # This screws up xsltproc - make sure to use `-nonet -novalid` if you
1912 if ($g_allow_mathml) {
1913 $result .= qq{<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"\n\t"http://www.w3.org/TR/MathML2/dtd/xhtml-math11-f.dtd">
1916 $result.= qq{<html xmlns="http://www.w3.org/1999/xhtml">\n\t<head>\n};
1918 $result .= qq{<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n};
1920 $result.= qq!<html xmlns="http://www.w3.org/1999/xhtml">\n\t<head>\n!;
1923 $result.= "\t\t<!-- Processed by MultiMarkdown -->\n";
1925 foreach my $key (sort keys %g_metadata ) {
1926 # Strip trailing spaces
1927 $g_metadata{$key} =~ s/(\s)*$//s;
1929 # Strip spaces from key
1930 my $export_key = $key;
1931 $export_key =~ s/\s//g;
1933 if (lc($key) eq "title") {
1934 $result.= "\t\t<title>" . _EncodeAmpsAndAngles($g_metadata{$key}) . "</title>\n";
1935 } elsif (lc($key) eq "css") {
1936 $result.= "\t\t<link type=\"text/css\" rel=\"stylesheet\" href=\"$g_metadata{$key}\"$g_empty_element_suffix\n";
1937 } elsif (lc($export_key) eq "xhtmlheader") {
1938 $result .= "\t\t$g_metadata{$key}\n";
1940 $result.= qq!\t\t<meta name="$export_key" content="$g_metadata{$key}"$g_empty_element_suffix\n!;
1943 $result.= "\t</head>\n";
1951 foreach my $key (sort keys %g_metadata ) {
1952 $result .= "$key: $g_metadata{$key}\n";
1954 $result =~ s/\s*\n/<br \/>\n/g;
1956 if ($result ne "") {
1963 sub _ConvertCopyright{
1965 # Convert to an XML compatible form of copyright symbol
1967 $text =~ s/©/©/gi;
1972 sub _CreateWikiLink {
1983 return "[$title]($g_base_url$id)";
1988 my $WikiWord = '[A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*';
1989 my $FreeLinkPattern = "([-,.()' _0-9A-Za-z\x80-\xff]+)";
1991 if ($g_wikilinks_kill_switch) {
1995 if ($g_use_wiki_links) {
1998 \[\[($FreeLinkPattern)\]\]
2007 _CreateWikiLink($label)
2012 if ($g_use_wiki_links) {
2016 $1 . _CreateWikiLink($2)
2019 # Catch WikiWords at beginning of text
2020 $text =~ s{^($WikiWord)
2030 sub _UnescapeWikiWords {
2032 my $WikiWord = '[A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*';
2034 if ($g_wikilinks_kill_switch) {
2038 # Unescape escaped WikiWords
2039 # This should occur whether wikilinks are enabled or not
2040 $text =~ s/(?<=\B)\\($WikiWord)/$1/g;
2048 my $less_than_tab = $g_tab_width - 1;
2050 # Algorithm inspired by PHP Markdown Extra's table support
2051 # <http://www.michelf.com/projects/php-markdown/>
2053 # Reusable regexp's to match table
2055 my $line_start = qr{
2056 [ ]{0,$less_than_tab}
2068 my $table_rows = qr{
2072 my $table_caption = qr{
2077 my $table_divider = qr{
2079 [\|\-\+\:\.][ \-\+\|\:\.]* \| [ \-\+\|\:\.]*
2082 my $whole_table = qr{
2083 ($table_caption)? # Optional caption
2084 ($first_row # First line must start at beginning
2085 ($table_row)*?)? # Header Rows
2086 $table_divider # Divider/Alignment definitions
2087 $table_rows+ # Body Rows
2088 ($table_caption)? # Optional caption
2092 # Find whole tables, then break them up and process them
2095 ^($whole_table) # Whole table in $1
2096 (\n|\Z) # End of file or 2 blank lines
2100 # Clean extra spaces at end of lines -
2101 # they cause the processing to choke
2102 $table =~ s/[\t ]*\n/\n/gs;
2104 my $result = "<table>\n";
2106 my $use_row_header = 1;
2108 # Add Caption, if present
2110 if ($table =~ s/^$line_start\[\s*(.*?)\s*\](\[\s*(.*?)\s*\])?[ \t]*$//m) {
2113 # add caption id to cross-ref list
2114 $table_id = Header2Label($3);
2116 # use caption as the id
2117 $table_id = Header2Label($1);
2119 $result .= "<caption id=\"$table_id\">" . _RunSpanGamut($1). "</caption>\n";
2121 $g_crossrefs{$table_id} = "#$table_id";
2122 $g_titles{$table_id} = "$1";
2125 # If a second "caption" is present, treat it as a summary
2126 # However, this is not valid in XHTML 1.0 Strict
2127 # But maybe in future
2129 # A summary might be longer than one line
2130 if ($table =~ s/\n$line_start\[\s*(.*?)\s*\][ \t]*\n/\n/s) {
2131 # $result .= "<summary>" . _RunSpanGamut($1) . "</summary>\n";
2134 # Now, divide table into header, alignment, and body
2136 # First, add leading \n in case there is no header
2138 $table = "\n" . $table;
2142 $table =~ s/\n($table_divider)\n(($table_rows)+)//s;
2145 my $alignment_string = $1;
2150 # Process column alignment
2151 while ($alignment_string =~ /\|?\s*(.+?)\s*(\||\Z)/gs) {
2152 my $cell = _RunSpanGamut($1);
2154 $result .= "<col class=\"extended\"";
2158 if ($cell =~ /\:$/) {
2159 if ($cell =~ /^\:/) {
2160 $result .= " align=\"center\"$g_empty_element_suffix\n";
2161 push(@alignments,"center");
2163 $result .= " align=\"right\"$g_empty_element_suffix\n";
2164 push(@alignments,"right");
2167 if ($cell =~ /^\:/) {
2168 $result .= " align=\"left\"$g_empty_element_suffix\n";
2169 push(@alignments,"left");
2171 if (($cell =~ /^\./) || ($cell =~ /\.$/)) {
2172 $result .= " align=\"char\"$g_empty_element_suffix\n";
2173 push(@alignments,"char");
2175 $result .= "$g_empty_element_suffix\n";
2176 push(@alignments,"");
2183 $table =~ s/^\n+//s;
2185 $result .= "<thead>\n";
2188 $table =~ s/\n[ \t]*\n/\n/g;
2190 foreach my $line (split(/\n/, $table)) {
2191 # process each line (row) in table
2192 $result .= "<tr>\n";
2194 while ($line =~ /\|?\s*([^\|]+?)\s*(\|+|\Z)/gs) {
2195 # process contents of each cell
2196 my $cell = _RunSpanGamut($1);
2199 if ($ending =~ s/^\s*(\|{2,})\s*$/$1/) {
2200 $colspan = " colspan=\"" . length($ending) . "\"";
2202 $result .= "\t<th$colspan>$cell</th>\n";
2204 if ($cell =~ /^\s*$/) {
2205 $use_row_header = 1;
2207 $use_row_header = 0;
2212 $result .= "</tr>\n";
2217 $result .= "</thead>\n<tbody>\n";
2219 foreach my $line (split(/\n/, $body)) {
2220 # process each line (row) in table
2221 if ($line =~ /^\s*$/) {
2222 $result .= "</tbody>\n\n<tbody>\n";
2225 $result .= "<tr>\n";
2227 while ($line =~ /\|?\s*([^\|]+?)\s*(\|+|\Z)/gs) {
2228 # process contents of each cell
2229 my $cell = _RunSpanGamut($1);
2235 my $cell_type = "td";
2236 if ($count == 0 && $use_row_header == 1) {
2239 if ($ending =~ s/^\s*(\|{2,})\s*$/$1/) {
2240 $colspan = " colspan=\"" . length($ending) . "\"";
2242 if ($alignments[$count] !~ /^\s*$/) {
2243 $result .= "\t<$cell_type$colspan align=\"$alignments[$count]\">$cell</$cell_type>\n";
2245 $result .= "\t<$cell_type$colspan>$cell</$cell_type>\n";
2249 $result .= "</tr>\n";
2252 # Strip out empty <thead> sections
2253 $result =~ s/<thead>\s*<\/thead>\s*//s;
2255 # Handle pull-quotes
2257 # This might be too specific for my needs. If others want it
2258 # removed, I am open to discussion.
2260 $result =~ s/<table>\s*<col \/>\s*<tbody>/<table class="pull-quote">\n<col \/>\n<tbody>/s;
2262 $result .= "</tbody>\n</table>\n";
2266 my $table_body = qr{
2267 ( # wrap whole match in $2
2269 (.*?\|.*?)\n # wrap headers in $3
2271 [ ]{0,$less_than_tab}
2272 ($table_divider) # alignment in $4
2274 ( # wrap cells in $5
2288 if (defined $g_attributes{$id}) {
2289 my $attributes = $g_attributes{$id};
2290 while ($attributes =~ s/(\S+)="(.*?)"//) {
2291 $result .= " $1=\"$2\"";
2293 while ($attributes =~ /(\S+)=(\S+)/g) {
2294 $result .= " $1=\"$2\"";
2302 sub _StripMarkdownReferences {
2304 my $less_than_tab = $g_tab_width - 1;
2307 \n\[\#(.+?)\]:[ \t]* # id = $1
2309 (.*?)\n{1,2} # end at new paragraph
2310 ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
2315 my $reference = "$2\n";
2317 $reference =~ s/^[ ]{0,$g_tab_width}//gm;
2319 $reference = _RunBlockGamut($reference);
2321 # strip leading and trailing <p> tags (they will be added later)
2322 $reference =~ s/^\<p\>//s;
2323 $reference =~ s/\<\/p\>\s*$//s;
2325 $g_references{$id} = $reference;
2331 sub _DoMarkdownCitations {
2334 $text =~ s{ # Allow for citations without locator to be written
2335 \[\#([^\[]*?)\] # in usual manner, e.g. [#author][] rather than
2344 \[([^\[]*?)\] # citation text = $1
2345 [ ]? # one optional space
2346 (?:\n[ ]*)? # one optional newline followed by spaces
2347 \[\#(.*?)\] # id = $2
2350 my $anchor_text = $1;
2354 # implement equivalent to \citet
2355 my $textual_string = "";
2356 if ($anchor_text =~ s/^(.*?);\s*//) {
2357 $textual_string = "<span class=\"textual citation\">$1</span>";
2360 if (defined $g_references{$id} ) {
2361 my $citation_counter=0;
2363 # See if citation has been used before
2364 foreach my $old_id (@g_used_references) {
2365 $citation_counter++;
2366 $count = $citation_counter if ($old_id eq $id);
2369 if (! defined $count) {
2370 $g_citation_counter++;
2371 $count = $g_citation_counter;
2372 push (@g_used_references,$id);
2375 $result = "<span class=\"markdowncitation\">$textual_string (<a href=\"#$id\">$count</a>";
2377 if ($anchor_text ne "") {
2378 $result .=", <span class=\"locator\">$anchor_text</span>";
2381 $result .= ")</span>";
2383 # No reference exists
2384 $result = "<span class=\"externalcitation\">$textual_string (<a id=\"$id\">$id</a>";
2386 if ($anchor_text ne "") {
2387 $result .=", <span class=\"locator\">$anchor_text</span>";
2390 $result .= ")</span>";
2393 if (Header2Label($anchor_text) eq "notcited"){
2394 $result = "<span class=\"notcited\" id=\"$id\"/>";
2403 sub _PrintMarkdownBibliography{
2404 my $citation_counter = 0;
2407 foreach my $id (@g_used_references) {
2408 $citation_counter++;
2409 $result.="<div id=\"$id\"><p>[$citation_counter] <span class=\"item\">$g_references{$id}</span></p></div>\n\n";
2411 $result .= "</div>";
2413 if ($citation_counter > 0) {
2414 $result = "\n\n<div class=\"bibliography\">\n<hr$g_empty_element_suffix\n<p>$g_bibliography_title</p>\n\n".$result;
2422 sub _GenerateImageCrossRefs {
2426 # First, handle reference-style labeled images: ![alt text][id]
2429 ( # wrap whole match in $1
2431 (.*?) # alt text = $2
2434 [ ]? # one optional space
2435 (?:\n[ ]*)? # one optional newline followed by spaces
2444 my $whole_match = $1;
2446 my $link_id = lc $3;
2448 if ($link_id eq "") {
2449 $link_id = lc $alt_text; # for shortcut links like ![this][].
2452 $alt_text =~ s/"/"/g;
2453 if (defined $g_urls{$link_id}) {
2454 my $label = Header2Label($alt_text);
2455 $g_crossrefs{$label} = "#$label";
2458 # If there's no such link ID, leave intact:
2459 $result = $whole_match;
2466 # Next, handle inline images: ![alt text](url "optional title")
2467 # Don't forget: encode * and _
2470 ( # wrap whole match in $1
2472 (.*?) # alt text = $2
2476 <?(\S+?)>? # src url = $3
2479 (['"]) # quote char = $5 '
2483 )? # title is optional
2488 my $whole_match = $1;
2491 $alt_text =~ s/"/"/g;
2492 my $label = Header2Label($alt_text);
2493 $g_crossrefs{$label} = "#$label";
2500 sub _FindMathEquations{
2504 (\<math[^\>]*)id=\"(.*?)\"> # "
2506 my $label = Header2Label($2);
2507 my $header = _RunSpanGamut($2);
2509 $g_crossrefs{$label} = "#$label";
2510 $g_titles{$label} = $header;
2512 $1 . "id=\"$label\">";
2519 # Based on Gruber's _DoCodeSpans
2522 my $display_as_block = 0;
2523 $display_as_block = 1 if ($text =~ /^<<[^\>\>]*>>$/);
2526 (?<!\\) # Character before opening << can't be a backslash
2528 (.+?) # $2 = The code block
2529 (?:\[(.+)\])? # $3 = optional label
2534 my @attr = (xmlns=>"http://www.w3.org/1998/Math/MathML");
2537 $label = Header2Label($3);
2538 my $header = _RunSpanGamut($3);
2540 $g_crossrefs{$label} = "#$label";
2541 $g_titles{$label} = $header;
2543 $m =~ s/^[ \t]*//g; # leading whitespace
2544 $m =~ s/[ \t]*$//g; # trailing whitespace
2545 push(@attr,(id=>"$label")) if ($label ne "");
2546 push(@attr,(display=>"block")) if ($display_as_block == 1);
2548 $m = $mathParser->TextToMathML($m,\@attr);
2555 sub _DoDefinitionLists {
2556 # Uses the syntax proposed by Michel Fortin in PHP Markdown Extra
2559 my $less_than_tab = $g_tab_width -1;
2561 my $line_start = qr{
2562 [ ]{0,$less_than_tab}
2570 my $definition = qr{
2571 \n?[ ]{0,$less_than_tab}
2573 ((?=\n*[ ]{0,$less_than_tab}\S)|\n\n|\Z) # Lookahead for non-space at line-start,
2574 # two returns, or end of doc
2577 my $definition_block = qr{
2578 ((?:$term)+) # $1 = one or more terms
2579 ((?:$definition)+) # $2 = by one or more definitions
2582 my $definition_list = qr{
2583 (?:$definition_block\n*)+ # One ore more definition blocks
2587 ($definition_list) # $1 = the whole list
2593 (?:$definition_block)\n*
2599 [ ]{0,$less_than_tab}
2605 $term =~ s/^\s*(.*?)\s*$/$1/;
2606 if ($term !~ /^\s*$/){
2607 $result = "<dt>" . _RunSpanGamut($1) . "</dt>\n";
2615 my $def = $1 . "\n";
2616 $def =~ s/^[ ]{0,$g_tab_width}//gm;
2617 "<dd>\n" . _RunBlockGamut($def) . "\n</dd>\n";
2620 $terms . $defs . "\n";
2623 "<dl>\n" . $list . "</dl>\n\n";
2629 sub _UnescapeComments{
2630 # Remove encoding inside comments
2631 # Based on proposal by Toras Doran (author of Text::MultiMarkdown)
2635 (?<=<!--) # Begin comment
2636 (.*?) # Anything inside
2637 (?=-->) # End comments
2662 B<MultiMarkdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
2668 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2669 easy-to-write structured text format into HTML. Markdown's text format
2670 is most similar to that of plain text email, and supports features such
2671 as headers, *emphasis*, code blocks, blockquotes, and links.
2673 Markdown's syntax is designed not as a generic markup language, but
2674 specifically to serve as a front-end to (X)HTML. You can use span-level
2675 HTML tags anywhere in a Markdown document, and you can use block level
2676 HTML tags (like <div> and <table> as well).
2678 For more information about Markdown's syntax, see:
2680 http://daringfireball.net/projects/markdown/
2685 Use "--" to end switch parsing. For example, to open a file named "-z", use:
2692 =item B<--html4tags>
2694 Use HTML 4 style for empty element tags, e.g.:
2698 instead of Markdown's default XHTML style tags, e.g.:
2703 =item B<-v>, B<--version>
2705 Display Markdown's version number and copyright information.
2708 =item B<-s>, B<--shortversion>
2710 Display the short-form version number.
2719 To file bug reports or feature requests (other than topics listed in the
2720 Caveats section above) please send email to:
2722 support@daringfireball.net (for Markdown issues)
2724 fletcher@fletcherpenney.net (for MultiMarkdown issues)
2726 Please include with your report: (1) the example input; (2) the output
2727 you expected; (3) the output Markdown actually produced.
2730 =head1 VERSION HISTORY
2732 See the readme file for detailed release notes for this version.
2734 1.0.2b8 - Wed 09 May 2007
2736 + Fixed bug with nested raw HTML tags that contained
2737 attributes. The problem is that it uses a backreference in
2738 the expression that it passes to gen_extract_tagged, which
2739 is broken when Text::Balanced wraps it in parentheses.
2741 Thanks to Matt Kraai for the patch.
2743 + Now supports URLs containing literal parentheses, such as:
2745 http://en.wikipedia.org/wiki/WIMP_(computing)
2747 Such parentheses may be arbitrarily nested, but must be
2753 + Changed shebang line from "/usr/bin/perl" to "/usr/bin/env perl"
2755 + Now only trim trailing newlines from code blocks, instead of trimming
2756 all trailing whitespace characters.
2759 1.0.2b6 - Mon 03 Apr 2006
2761 + Fixed bad performance bug in new `Text::Balanced`-based block-level parser.
2764 1.0.2b5 - Thu 08 Dec 2005
2766 + Fixed bug where this:
2768 [text](http://m.com "title" )
2770 wasn't working as expected, because the parser wasn't allowing for spaces
2771 before the closing paren.
2774 1.0.2b4 - Thu 08 Sep 2005
2776 + Filthy hack to support markdown='1' in div tags, because I need it
2777 to write today's fireball.
2779 + First crack at a new, smarter, block-level HTML parser.
2781 1.0.2b3 - Thu 28 Apr 2005
2783 + _DoAutoLinks() now supports the 'dict://' URL scheme.
2785 + PHP- and ASP-style processor instructions are now protected as
2791 + Workarounds for regressions introduced with fix for "backticks within
2792 tags" bug in 1.0.2b1. The fix is to allow `...` to be turned into
2793 <code>...</code> within an HTML tag attribute, and then to turn
2794 these spurious `<code>` tags back into literal backtick characters
2795 in _EscapeSpecialCharsWithinTagAttributes().
2797 The regression was caused because in the fix, we moved
2798 _EscapeSpecialCharsWithinTagAttributes() ahead of _DoCodeSpans()
2799 in _RunSpanGamut(), but that's no good. We need to process code
2800 spans first, otherwise we can get tripped up by something like this:
2802 `<test a="` content of attribute `">`
2805 1.0.2b2 - 20 Mar 2005
2807 + Fix for nested sub-lists in list-paragraph mode. Previously we got
2808 a spurious extra level of `<p>` tags for something like this:
2816 + Experimental support for [this] as a synonym for [this][].
2817 (Note to self: No test yet for this.)
2818 Be sure to test, e.g.: [permutations of this sort of [thing][].]
2821 1.0.2b1 - 28 Feb 2005
2823 + Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span>
2825 + Fix for escaped backticks still triggering code spans:
2827 There are two raw backticks here: \` and here: \`, not a code span
2837 http://daringfireball.net/
2839 PHP port and other contributions by Michel Fortin
2842 MultiMarkdown changes by Fletcher Penney
2843 http://fletcherpenney.net/
2845 =head1 COPYRIGHT AND LICENSE
2847 Original Markdown Code Copyright (c) 2003-2007 John Gruber
2848 <http://daringfireball.net/>
2849 All rights reserved.
2851 MultiMarkdown changes Copyright (c) 2005-2007 Fletcher T. Penney
2852 <http://fletcherpenney.net/>
2853 All rights reserved.
2855 Redistribution and use in source and binary forms, with or without
2856 modification, are permitted provided that the following conditions are
2859 * Redistributions of source code must retain the above copyright notice,
2860 this list of conditions and the following disclaimer.
2862 * Redistributions in binary form must reproduce the above copyright
2863 notice, this list of conditions and the following disclaimer in the
2864 documentation and/or other materials provided with the distribution.
2866 * Neither the name "Markdown" nor the names of its contributors may
2867 be used to endorse or promote products derived from this software
2868 without specific prior written permission.
2870 This software is provided by the copyright holders and contributors "as
2871 is" and any express or implied warranties, including, but not limited
2872 to, the implied warranties of merchantability and fitness for a
2873 particular purpose are disclaimed. In no event shall the copyright owner
2874 or contributors be liable for any direct, indirect, incidental, special,
2875 exemplary, or consequential damages (including, but not limited to,
2876 procurement of substitute goods or services; loss of use, data, or
2877 profits; or business interruption) however caused and on any theory of
2878 liability, whether in contract, strict liability, or tort (including
2879 negligence or otherwise) arising in any way out of the use of this
2880 software, even if advised of the possibility of such damage.
2885 Possibilities for 'THE'