git.oblomov.eu Git - multimarkdown/blob - bin/MultiMarkdown.pl

   1 #!/usr/bin/env perl
   2
   3 # MultiMarkdown -- A modification of John Gruber's original Markdown
   4 #       that adds new features and an output format that can more readily
   5 #       be converted into other document formats
   6 #
   7 # $Id: MultiMarkdown.pl 492 2008-01-18 23:08:43Z fletcher $
   8 #
   9 # Original Code Copyright (c) 2004-2007 John Gruber
  10 #       <http://daringfireball.net/projects/markdown/>
  11 #
  12 # MultiMarkdown changes Copyright (c) 2005-2008 Fletcher T. Penney
  13 #       <http://fletcherpenney.net/>
  14 #
  15 # MultiMarkdown Version 2.0.b5
  16 #
  17 # Based on Markdown.pl 1.0.2b8 -  Wed 09 May 2007
  18 #
  19 #
  20 #       TODO: Change math mode delimiter?
  21 #       TODO: WikiWords inside of MMD links are converted to wiki links
  22 #       TODO: Still need to get the glossary working in non-memoir documents
  23 #       TODO: A mechanism to include arbitrary code (LaTeX, etc) without being "ugly"
  24 #       TODO: Look into discussion re: assigning classes to div's/span's on Markdown list.
  25 #       TODO: Should I just scrap the WikiWords feature to get rid of all the trouble it causes?
  26 #       TODO: Improve support for tables with long items and overall width in LaTeX
  27 #       TODO: Need a syntax for long table cells in MMD, even if no rowspan feature yet
  28 #       TODO: Create utilities to convert MMD tables to/from tab-delimited
  29
  30
  31 package Markdown;
  32 require 5.006_000;
  33 use strict;
  34 use warnings;
  35 use File::Basename;
  36
  37 # Include ASCIIMathML.pm
  38         my $me = $0;                            # Where am I?
  39
  40         # Am I running in Windoze?
  41         my $os = $^O;
  42
  43         if ($os =~ /MSWin/) {
  44                 $me = dirname($me)."\\";        # Get just the directory portion
  45         } else {
  46                 $me = dirname(readlink($me))."/";       # Get just the directory portion
  47         }
  48
  49         require $me ."ASCIIMathML.pm";
  50
  51 use Digest::MD5 qw(md5_hex);
  52 use vars qw($VERSION $g_use_metadata $g_use_wiki_links $g_base_url
  53         $g_bibliography_title $g_allow_mathml $g_base_header_level $mathParser);
  54 $VERSION = '2.0.b5';
  55
  56 $mathParser = new Text::ASCIIMathML();
  57
  58 ## Disabled; causes problems under Perl 5.6.1:
  59 # use utf8;
  60 # binmode( STDOUT, ":utf8" );  # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
  61
  62 #
  63 # Global default settings:
  64 #
  65 my $g_empty_element_suffix = " />";     # Change to ">" for HTML output
  66 my $g_tab_width = 4;
  67 my $g_allow_mathml = 1;
  68 my $g_base_header_level = 1;
  69 my $g_wikilinks_kill_switch = 1;                # WikiLinks may become deprecated; this is the first step
  70
  71 #
  72 # Globals:
  73 #
  74
  75 # Reusable patterns to match balanced [brackets] and (parens). See
  76 # Friedl's "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
  77 my ($g_nested_brackets, $g_nested_parens);
  78 $g_nested_brackets = qr{
  79         (?>                                                             # Atomic matching
  80            [^\[\]]+                                                     # Anything other than brackets
  81          |
  82            \[
  83                  (??{ $g_nested_brackets })             # Recursive set of nested brackets
  84            \]
  85         )*
  86 }x;
  87
  88 # Doesn't allow for whitespace, because we're using it to match URLs:
  89 $g_nested_parens = qr{
  90         (?>                                                             # Atomic matching
  91            [^()\s]+                                                     # Anything other than parens or whitespace
  92          |
  93            \(
  94                  (??{ $g_nested_parens })               # Recursive set of nested brackets
  95            \)
  96         )*
  97 }x;
  98
  99
 100 # Table of hash values for escaped characters:
 101 my %g_escape_table;
 102 foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
 103         $g_escape_table{$char} = md5_hex($char);
 104 }
 105
 106
 107 # Global hashes, used by various utility routines
 108 my %g_urls = ();
 109 my %g_titles= ();
 110 my %g_html_blocks = ();
 111 my %g_metadata = ();
 112 my %g_metadata_newline = ();
 113 my %g_crossrefs = ();
 114 my %g_footnotes = ();
 115 my %g_attributes = ();
 116 my @g_used_footnotes = ();
 117 my $g_footnote_counter = 0;
 118
 119 my $g_citation_counter = 0;
 120 my @g_used_references = ();
 121 my %g_references = ();
 122 $g_bibliography_title = "Bibliography";
 123
 124 $g_use_metadata = 1;
 125 $g_metadata_newline{default} = "\n";
 126 $g_metadata_newline{keywords} = ", ";
 127 my $g_document_format = "";
 128
 129 # For use with WikiWords and [[Wiki Links]]
 130 $g_use_wiki_links = 0;
 131 $g_base_url = "";               # This is the base url to be used for WikiLinks
 132 my $g_temp_no_wikiwords = 0;
 133
 134 # NOTE:
 135 # You can use \WikiWord to prevent a WikiWord from being treated as a link
 136
 137
 138 # Used to track when we're inside an ordered or unordered list
 139 # (see _ProcessListItems() for details):
 140 my $g_list_level = 0;
 141
 142
 143 #### Blosxom plug-in interface ##########################################
 144
 145 # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
 146 # which posts Markdown should process, using a "meta-markup: markdown"
 147 # header. If it's set to 0 (the default), Markdown will process all
 148 # entries.
 149 my $g_blosxom_use_meta = 0;
 150
 151 sub start { 1; }
 152 sub story {
 153         my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
 154
 155         if ( (! $g_blosxom_use_meta) or
 156              (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
 157              ){
 158                         $$body_ref  = Markdown($$body_ref);
 159      }
 160      1;
 161 }
 162
 163
 164 #### Movable Type plug-in interface #####################################
 165 eval {require MT};  # Test to see if we're running in MT.
 166 unless ($@) {
 167     require MT;
 168     import  MT;
 169     require MT::Template::Context;
 170     import  MT::Template::Context;
 171
 172         eval {require MT::Plugin};  # Test to see if we're running >= MT 3.0.
 173         unless ($@) {
 174                 require MT::Plugin;
 175                 import  MT::Plugin;
 176                 my $plugin = new MT::Plugin({
 177                         name => "Markdown",
 178                         description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
 179                         doc_link => 'http://daringfireball.net/projects/markdown/'
 180                 });
 181                 MT->add_plugin( $plugin );
 182         }
 183
 184         MT::Template::Context->add_container_tag(MarkdownOptions => sub {
 185                 my $ctx  = shift;
 186                 my $args = shift;
 187                 my $builder = $ctx->stash('builder');
 188                 my $tokens = $ctx->stash('tokens');
 189
 190                 if (defined ($args->{'output'}) ) {
 191                         $ctx->stash('markdown_output', lc $args->{'output'});
 192                 }
 193
 194                 defined (my $str = $builder->build($ctx, $tokens) )
 195                         or return $ctx->error($builder->errstr);
 196                 $str;           # return value
 197         });
 198
 199         MT->add_text_filter('markdown' => {
 200                 label     => 'Markdown',
 201                 docs      => 'http://daringfireball.net/projects/markdown/',
 202                 on_format => sub {
 203                         my $text = shift;
 204                         my $ctx  = shift;
 205                         my $raw  = 0;
 206                     if (defined $ctx) {
 207                         my $output = $ctx->stash('markdown_output');
 208                                 if (defined $output  &&  $output =~ m/^html/i) {
 209                                         $g_empty_element_suffix = ">";
 210                                         $ctx->stash('markdown_output', '');
 211                                 }
 212                                 elsif (defined $output  &&  $output eq 'raw') {
 213                                         $raw = 1;
 214                                         $ctx->stash('markdown_output', '');
 215                                 }
 216                                 else {
 217                                         $raw = 0;
 218                                         $g_empty_element_suffix = " />";
 219                                 }
 220                         }
 221                         $text = $raw ? $text : Markdown($text);
 222                         $text;
 223                 },
 224         });
 225
 226         # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
 227         my $smartypants;
 228
 229         {
 230                 no warnings "once";
 231                 $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
 232         }
 233
 234         if ($smartypants) {
 235                 MT->add_text_filter('markdown_with_smartypants' => {
 236                         label     => 'Markdown With SmartyPants',
 237                         docs      => 'http://daringfireball.net/projects/markdown/',
 238                         on_format => sub {
 239                                 my $text = shift;
 240                                 my $ctx  = shift;
 241                                 if (defined $ctx) {
 242                                         my $output = $ctx->stash('markdown_output');
 243                                         if (defined $output  &&  $output eq 'html') {
 244                                                 $g_empty_element_suffix = ">";
 245                                         }
 246                                         else {
 247                                                 $g_empty_element_suffix = " />";
 248                                         }
 249                                 }
 250                                 $text = Markdown($text);
 251                                 $text = $smartypants->($text, '1');
 252                         },
 253                 });
 254         }
 255 }
 256 else {
 257 #### BBEdit/command-line text filter interface ##########################
 258 # Needs to be hidden from MT (and Blosxom when running in static mode).
 259
 260     # We're only using $blosxom::version once; tell Perl not to warn us:
 261         no warnings 'once';
 262     unless ( defined($blosxom::version) ) {
 263                 use warnings;
 264
 265                 #### Check for command-line switches: #################
 266                 my %cli_opts;
 267                 use Getopt::Long;
 268                 Getopt::Long::Configure('pass_through');
 269                 GetOptions(\%cli_opts,
 270                         'version',
 271                         'shortversion',
 272                         'html4tags',
 273                 );
 274                 if ($cli_opts{'version'}) {             # Version info
 275                         print "\nThis is Markdown, version $VERSION.\n";
 276                         print "Copyright 2004 John Gruber\n";
 277                         print "http://daringfireball.net/projects/markdown/\n\n";
 278                         exit 0;
 279                 }
 280                 if ($cli_opts{'shortversion'}) {                # Just the version number string.
 281                         print $VERSION;
 282                         exit 0;
 283                 }
 284                 if ($cli_opts{'html4tags'}) {                   # Use HTML tag style instead of XHTML
 285                         $g_empty_element_suffix = ">";
 286                 }
 287
 288
 289                 #### Process incoming text: ###########################
 290                 my $text;
 291                 {
 292                         local $/;               # Slurp the whole file
 293                         $text = <>;
 294                 }
 295         print Markdown($text);
 296     }
 297 }
 298
 299
 300
 301 sub Markdown {
 302 #
 303 # Main function. The order in which other subs are called here is
 304 # essential. Link and image substitutions need to happen before
 305 # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
 306 # and <img> tags get encoded.
 307 #
 308         my $text = shift;
 309
 310         # Clear the global hashes. If we don't clear these, you get conflicts
 311         # from other articles when generating a page which contains more than
 312         # one article (e.g. an index page that shows the N most recent
 313         # articles):
 314         %g_urls = ();
 315         %g_titles = ();
 316         %g_html_blocks = ();
 317         %g_metadata = ();
 318         %g_crossrefs = ();
 319         %g_footnotes = ();
 320         @g_used_footnotes = ();
 321         @g_used_references = ();
 322
 323
 324         # Standardize line endings:
 325         $text =~ s{\r\n}{\n}g;  # DOS to Unix
 326         $text =~ s{\r}{\n}g;    # Mac to Unix
 327
 328         # Make sure $text ends with a couple of newlines:
 329         $text .= "\n\n";
 330
 331         # Convert all tabs to spaces.
 332         $text = _Detab($text);
 333
 334         # Strip any lines consisting only of spaces and tabs.
 335         # This makes subsequent regexen easier to write, because we can
 336         # match consecutive blank lines with /\n+/ instead of something
 337         # contorted like /[ \t]*\n+/ .
 338         $text =~ s/^[ \t]+$//mg;
 339
 340         # Strip leading blank lines
 341         $text =~ s/^\n+//s;
 342
 343         # Strip out MetaData
 344         $text = _ParseMetaData($text) if $g_use_metadata;
 345
 346         # And recheck for leading blank lines
 347         $text =~ s/^\n+//s;
 348
 349         # Turn block-level HTML blocks into hash entries
 350         $text = _HashHTMLBlocks($text);
 351
 352         # Strip footnote and link definitions, store in hashes.
 353         $text = _StripFootnoteDefinitions($text);
 354
 355         $text = _StripLinkDefinitions($text);
 356
 357         _GenerateImageCrossRefs($text);
 358
 359         $text = _StripMarkdownReferences($text);
 360
 361         $text = _RunBlockGamut($text);
 362
 363         $text = _DoMarkdownCitations($text);
 364
 365         $text = _DoFootnotes($text);
 366
 367         $text = _UnescapeSpecialChars($text);
 368
 369         # Clean encoding within HTML comments
 370         $text = _UnescapeComments($text);
 371
 372         # This must follow _UnescapeSpecialChars
 373         $text = _UnescapeWikiWords($text);
 374
 375         $text = _FixFootnoteParagraphs($text);
 376         $text .= _PrintFootnotes();
 377
 378         $text .= _PrintMarkdownBibliography();
 379
 380         $text = _ConvertCopyright($text);
 381
 382         if (lc($g_document_format) =~ /^complete\s*$/) {
 383                 return xhtmlMetaData() . "<body>\n\n" . $text . "\n</body>\n</html>";
 384         } else {
 385                 return textMetaData() . $text . "\n";
 386         }
 387
 388 }
 389
 390
 391 sub _StripLinkDefinitions {
 392 #
 393 # Strips link definitions from text, stores the URLs and titles in
 394 # hash references.
 395 #
 396         my $text = shift;
 397         my $less_than_tab = $g_tab_width - 1;
 398
 399         # Link defs are in the form: ^[id]: url "optional title"
 400         while ($text =~ s{
 401                                                 # Pattern altered for MultiMarkdown
 402                                                 # in order to not match citations or footnotes
 403                                                 ^[ ]{0,$less_than_tab}\[([^#^].*)\]:    # id = $1
 404                                                   [ \t]*
 405                                                   \n?                           # maybe *one* newline
 406                                                   [ \t]*
 407                                                 <?(\S+?)>?                      # url = $2
 408                                                   [ \t]*
 409                                                   \n?                           # maybe one newline
 410                                                   [ \t]*
 411                                                 (?:
 412                                                         (?<=\s)                 # lookbehind for whitespace
 413                                                         ["(]
 414                                                         (.+?)                   # title = $3
 415                                                         [")]
 416                                                         [ \t]*
 417                                                 )?      # title is optional
 418
 419                                                 # MultiMarkdown addition for attribute support
 420                                                 \n?
 421                                                 (                               # Attributes = $4
 422                                                         (?<=\s)                 # lookbehind for whitespace
 423                                                         (([ \t]*\n)?[ \t]*((\S+=\S+)|(\S+=".*?")))*
 424                                                 )?
 425                                                 [ \t]*
 426                                                 # /addition
 427                                                 (?:\n+|\Z)
 428                                         }
 429                                         {}mx) {
 430                 $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 );    # Link IDs are case-insensitive
 431                 if ($3) {
 432                         $g_titles{lc $1} = $3;
 433                         $g_titles{lc $1} =~ s/"/&quot;/g;
 434                 }
 435
 436                 # MultiMarkdown addition "
 437                 if ($4) {
 438                         $g_attributes{lc $1} = $4;
 439                 }
 440                 # /addition
 441         }
 442
 443         return $text;
 444 }
 445
 446
 447 sub _HashHTMLBlocks {
 448         my $text = shift;
 449         my $less_than_tab = $g_tab_width - 1;
 450
 451         # Hashify HTML blocks:
 452         # We only want to do this for block-level HTML tags, such as headers,
 453         # lists, and tables. That's because we still want to wrap <p>s around
 454         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 455         # phrase emphasis, and spans. The list of tags we're looking for is
 456         # hard-coded:
 457         my $block_tags = qr{
 458                   (?:
 459                         p         |  div     |  h[1-6]  |  blockquote  |  pre       |  table  |
 460                         dl        |  ol      |  ul      |  script      |  noscript  |  form   |
 461                         fieldset  |  iframe     |  ins         |  del
 462                   )
 463                 }x;                     # MultiMarkdown does not include `math` in the above list so that
 464                                         # Equations can optionally be included in separate paragraphs
 465
 466         my $tag_attrs = qr{
 467                                                 (?:                             # Match one attr name/value pair
 468                                                         \s+                             # There needs to be at least some whitespace
 469                                                                                         # before each attribute name.
 470                                                         [\w.:_-]+               # Attribute name
 471                                                         \s*=\s*
 472                                                         (?:
 473                                                                 ".+?"           # "Attribute value"
 474                                                          |
 475                                                                 '.+?'           # 'Attribute value'
 476                                                         )
 477                                                 )*                              # Zero or more
 478                                         }x;
 479
 480         my $empty_tag = qr{< \w+ $tag_attrs \s* />}xms;
 481         my $open_tag =  qr{< $block_tags $tag_attrs \s* >}xms;
 482         my $close_tag = undef;  # let Text::Balanced handle this
 483
 484         use Text::Balanced qw(gen_extract_tagged);
 485         my $extract_block = gen_extract_tagged($open_tag, $close_tag, undef, { ignore => [$empty_tag] });
 486
 487         my @chunks;
 488         ## TO-DO: the 0,3 on the next line ought to respect the
 489         ## tabwidth, or else, we should mandate 4-space tabwidth and
 490         ## be done with it:
 491         while ($text =~ s{^(([ ]{0,3}<)?.*\n)}{}m) {
 492                 my $cur_line = $1;
 493                 if (defined $2) {
 494                         # current line could be start of code block
 495
 496                         my ($tag, $remainder) = $extract_block->($cur_line . $text);
 497                         if ($tag) {
 498                                 my $key = md5_hex($tag);
 499                                 $g_html_blocks{$key} = $tag;
 500                                 push @chunks, "\n\n" . $key . "\n\n";
 501                                 $text = $remainder;
 502                         }
 503                         else {
 504                                 # No tag match, so toss $cur_line into @chunks
 505                                 push @chunks, $cur_line;
 506                         }
 507                 }
 508                 else {
 509                         # current line could NOT be start of code block
 510                         push @chunks, $cur_line;
 511                 }
 512
 513         }
 514         push @chunks, $text; # Whatever is left.
 515
 516         $text = join '', @chunks;
 517
 518
 519
 520         # Special case just for <hr />. It was easier to make a special case than
 521         # to make the other regex more complicated.
 522         $text =~ s{
 523                                 (?:
 524                                         (?<=\n\n)               # Starting after a blank line
 525                                         |                               # or
 526                                         \A\n?                   # the beginning of the doc
 527                                 )
 528                                 (                                               # save in $1
 529                                         [ ]{0,$less_than_tab}
 530                                         <(hr)                           # start tag = $2
 531                                         \b                                      # word break
 532                                         ([^<>])*?                       #
 533                                         /?>                                     # the matching end tag
 534                                         [ \t]*
 535                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 536                                 )
 537                         }{
 538                                 my $key = md5_hex($1);
 539                                 $g_html_blocks{$key} = $1;
 540                                 "\n\n" . $key . "\n\n";
 541                         }egx;
 542
 543         # Special case for standalone HTML comments:
 544         $text =~ s{
 545                                 (?:
 546                                         (?<=\n\n)               # Starting after a blank line
 547                                         |                               # or
 548                                         \A\n?                   # the beginning of the doc
 549                                 )
 550                                 (                                               # save in $1
 551                                         [ ]{0,$less_than_tab}
 552                                         (?s:
 553                                                 <!
 554                                                 (--.*?--\s*)+
 555                                                 >
 556                                         )
 557                                         [ \t]*
 558                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 559                                 )
 560                         }{
 561                                 my $key = md5_hex($1);
 562                                 $g_html_blocks{$key} = $1;
 563                                 "\n\n" . $key . "\n\n";
 564                         }egx;
 565
 566         # PHP and ASP-style processor instructions (<?…?> and <%…%>)
 567         $text =~ s{
 568                                 (?:
 569                                         (?<=\n\n)               # Starting after a blank line
 570                                         |                               # or
 571                                         \A\n?                   # the beginning of the doc
 572                                 )
 573                                 (                                               # save in $1
 574                                         [ ]{0,$less_than_tab}
 575                                         (?s:
 576                                                 <([?%])                 # $2
 577                                                 .*?
 578                                                 \2>
 579                                         )
 580                                         [ \t]*
 581                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
 582                                 )
 583                         }{
 584                                 my $key = md5_hex($1);
 585                                 $g_html_blocks{$key} = $1;
 586                                 "\n\n" . $key . "\n\n";
 587                         }egx;
 588
 589
 590         return $text;
 591 }
 592
 593
 594 sub _RunBlockGamut {
 595 #
 596 # These are all the transformations that form block-level
 597 # tags like paragraphs, headers, and list items.
 598 #
 599         my $text = shift;
 600
 601         $text = _DoHeaders($text);
 602
 603         # Do tables first to populate the table id's for cross-refs
 604         # Escape <pre><code> so we don't get greedy with tables
 605         $text = _DoTables($text);
 606
 607         # And now, protect our tables
 608         $text = _HashHTMLBlocks($text);
 609
 610         # Do Horizontal Rules:
 611         $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
 612         $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
 613         $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
 614
 615         $text = _DoDefinitionLists($text);
 616         $text = _DoLists($text);
 617         $text = _DoCodeBlocks($text);
 618         $text = _DoBlockQuotes($text);
 619
 620         # We already ran _HashHTMLBlocks() before, in Markdown(), but that
 621         # was to escape raw HTML in the original Markdown source. This time,
 622         # we're escaping the markup we've just created, so that we don't wrap
 623         # <p> tags around block-level tags.
 624         $text = _HashHTMLBlocks($text);
 625         $text = _FormParagraphs($text);
 626
 627         return $text;
 628 }
 629
 630
 631 sub _RunSpanGamut {
 632 #
 633 # These are all the transformations that occur *within* block-level
 634 # tags like paragraphs, headers, and list items.
 635 #
 636         my $text = shift;
 637
 638         $text = _DoCodeSpans($text);
 639         $text = _DoMathSpans($text);
 640         $text = _EscapeSpecialCharsWithinTagAttributes($text);
 641         $text = _EncodeBackslashEscapes($text);
 642
 643         # Process anchor and image tags. Images must come first,
 644         # because ![foo][f] looks like an anchor.
 645         $text = _DoImages($text);
 646         $text = _DoAnchors($text);
 647
 648         # Process WikiWords
 649         if ($g_use_wiki_links && !$g_temp_no_wikiwords && !$g_wikilinks_kill_switch) {
 650                 $text = _DoWikiLinks($text);
 651
 652                 # And then reprocess anchors and images
 653                 $text = _DoImages($text);
 654                 $text = _DoAnchors($text);
 655         }
 656
 657
 658         # Make links out of things like `<http://example.com/>`
 659         # Must come after _DoAnchors(), because you can use < and >
 660         # delimiters in inline links like [this](<url>).
 661         $text = _DoAutoLinks($text);
 662         $text = _EncodeAmpsAndAngles($text);
 663         $text = _DoItalicsAndBold($text);
 664
 665         # Do hard breaks:
 666         $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
 667
 668         return $text;
 669 }
 670
 671
 672 sub _EscapeSpecialCharsWithinTagAttributes {
 673 #
 674 # Within tags -- meaning between < and > -- encode [\ ` * _] so they
 675 # don't conflict with their use in Markdown for code, italics and strong.
 676 # We're replacing each such character with its corresponding MD5 checksum
 677 # value; this is likely overkill, but it should prevent us from colliding
 678 # with the escape values by accident.
 679 #
 680         my $text = shift;
 681         my $tokens ||= _TokenizeHTML($text);
 682         $text = '';   # rebuild $text from the tokens
 683
 684         foreach my $cur_token (@$tokens) {
 685                 if ($cur_token->[0] eq "tag") {
 686                         $cur_token->[1] =~  s! \\ !$g_escape_table{'\\'}!gx;
 687                         $cur_token->[1] =~  s{ (?<=.)</?code>(?=.)  }{$g_escape_table{'`'}}gx;
 688                         $cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx;
 689                         $cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx;
 690                 }
 691                 $text .= $cur_token->[1];
 692         }
 693         return $text;
 694 }
 695
 696
 697 sub _DoAnchors {
 698 #
 699 # Turn Markdown link shortcuts into XHTML <a> tags.
 700 #
 701         my $text = shift;
 702
 703         #
 704         # First, handle reference-style links: [link text] [id]
 705         #
 706         $text =~ s{
 707                 (                                       # wrap whole match in $1
 708                   \[
 709                     ($g_nested_brackets)        # link text = $2
 710                   \]
 711
 712                   [ ]?                          # one optional space
 713                   (?:\n[ ]*)?           # one optional newline followed by spaces
 714
 715                   \[
 716                     (.*?)               # id = $3
 717                   \]
 718                 )
 719         }{
 720                 my $result;
 721                 my $whole_match = $1;
 722                 my $link_text   = $2;
 723                 my $link_id     = lc $3;
 724
 725                 if ($link_id eq "") {
 726                         $link_id = lc $link_text;     # for shortcut links like [this][].
 727                 }
 728
 729                 # Allow automatic cross-references to headers
 730                 my $label = Header2Label($link_id);
 731                 if (defined $g_urls{$link_id}) {
 732                         my $url = $g_urls{$link_id};
 733                         $url =~ s! \* !$g_escape_table{'*'}!gx;         # We've got to encode these to avoid
 734                         $url =~ s!  _ !$g_escape_table{'_'}!gx;         # conflicting with italics/bold.
 735                         $result = "<a href=\"$url\"";
 736                         if ( defined $g_titles{$link_id} ) {
 737                                 my $title = $g_titles{$link_id};
 738                                 $title =~ s! \* !$g_escape_table{'*'}!gx;
 739                                 $title =~ s!  _ !$g_escape_table{'_'}!gx;
 740                                 $result .=  " title=\"$title\"";
 741                         }
 742                         $result .= _DoAttributes($label);
 743                         $result .= ">$link_text</a>";
 744                 } elsif (defined $g_crossrefs{$label}) {
 745                         my $url = $g_crossrefs{$label};
 746                         $url =~ s! \* !$g_escape_table{'*'}!gx;         # We've got to encode these to avoid
 747                         $url =~ s!  _ !$g_escape_table{'_'}!gx;         # conflicting with italics/bold.
 748                         $result = "<a href=\"$url\"";
 749                         if ( defined $g_titles{$label} ) {
 750                                 my $title = $g_titles{$label};
 751                                 $title =~ s! \* !$g_escape_table{'*'}!gx;
 752                                 $title =~ s!  _ !$g_escape_table{'_'}!gx;
 753                                 $result .=  " title=\"$title\"";
 754                         }
 755                         $result .= _DoAttributes($label);
 756                         $result .= ">$link_text</a>";
 757                 } else {
 758                         $result = $whole_match;
 759                 }
 760                 $result;
 761         }xsge;
 762
 763         #
 764         # Next, inline-style links: [link text](url "optional title")
 765         #
 766         $text =~ s{
 767                 (                               # wrap whole match in $1
 768                   \[
 769                     ($g_nested_brackets)        # link text = $2
 770                   \]
 771                   \(                    # literal paren
 772                         [ \t]*
 773                         ($g_nested_parens)              # href = $3
 774                         [ \t]*
 775                         (                       # $4
 776                           (['"])        # quote char = $5
 777                           (.*?)         # Title = $6
 778                           \5            # matching quote
 779                           [ \t]*        # ignore any spaces/tabs between closing quote and )
 780                         )?                      # title is optional
 781                   \)
 782                 )
 783         }{
 784                 my $result;
 785                 my $whole_match = $1;
 786                 my $link_text   = $2;
 787                 my $url                 = $3;
 788                 my $title               = $6;
 789
 790                 $url =~ s! \* !$g_escape_table{'*'}!gx;         # We've got to encode these to avoid
 791                 $url =~ s!  _ !$g_escape_table{'_'}!gx;         # conflicting with italics/bold.
 792                 $url =~ s{^<(.*)>$}{$1};                                        # Remove <>'s surrounding URL, if present
 793                 $result = "<a href=\"$url\"";
 794
 795                 if (defined $title) {
 796                         $title =~ s/"/&quot;/g;
 797                         $title =~ s! \* !$g_escape_table{'*'}!gx;
 798                         $title =~ s!  _ !$g_escape_table{'_'}!gx;
 799                         $result .=  " title=\"$title\"";
 800                 }
 801                 $result .= ">$link_text</a>";
 802
 803                 $result;
 804         }xsge;
 805
 806         #
 807         # Last, handle reference-style shortcuts: [link text]
 808         # These must come last in case you've also got [link test][1]
 809         # or [link test](/foo)
 810         #
 811         $text =~ s{
 812                 (                                       # wrap whole match in $1
 813                   \[
 814                     ([^\[\]]+)          # link text = $2; can't contain '[' or ']'
 815                   \]
 816                 )
 817         }{
 818                 my $result;
 819                 my $whole_match = $1;
 820                 my $link_text   = $2;
 821                 (my $link_id = lc $2) =~ s{[ ]?\n}{ }g; # lower-case and turn embedded newlines into spaces
 822
 823                 # Allow automatic cross-references to headers
 824                 my $label = Header2Label($link_id);
 825                 if (defined $g_urls{$link_id}) {
 826                         my $url = $g_urls{$link_id};
 827                         $url =~ s! \* !$g_escape_table{'*'}!gx;         # We've got to encode these to avoid
 828                         $url =~ s!  _ !$g_escape_table{'_'}!gx;         # conflicting with italics/bold.
 829                         $result = "<a href=\"$url\"";
 830                         if ( defined $g_titles{$link_id} ) {
 831                                 my $title = $g_titles{$link_id};
 832                                 $title =~ s! \* !$g_escape_table{'*'}!gx;
 833                                 $title =~ s!  _ !$g_escape_table{'_'}!gx;
 834                                 $result .=  " title=\"$title\"";
 835                         }
 836                         $result .= _DoAttributes($link_id);
 837                         $result .= ">$link_text</a>";
 838                 } elsif (defined $g_crossrefs{$label}) {
 839                         my $url = $g_crossrefs{$label};
 840                         $url =~ s! \* !$g_escape_table{'*'}!gx;         # We've got to encode these to avoid
 841                         $url =~ s!  _ !$g_escape_table{'_'}!gx;         # conflicting with italics/bold.
 842                         $result = "<a href=\"$url\"";
 843                         if ( defined $g_titles{$label} ) {
 844                                 my $title = $g_titles{$label};
 845                                 $title =~ s! \* !$g_escape_table{'*'}!gx;
 846                                 $title =~ s!  _ !$g_escape_table{'_'}!gx;
 847                                 $result .=  " title=\"$title\"";
 848                         }
 849                         $result .= _DoAttributes($label);
 850                         $result .= ">$link_text</a>";
 851                 } else {
 852                         $result = $whole_match;
 853                 }
 854                 $result;
 855         }xsge;
 856
 857         return $text;
 858 }
 859
 860
 861 sub _DoImages {
 862 #
 863 # Turn Markdown image shortcuts into <img> tags.
 864 #
 865         my $text = shift;
 866
 867         #
 868         # First, handle reference-style labeled images: ![alt text][id]
 869         #
 870         $text =~ s{
 871                 (                               # wrap whole match in $1
 872                   !\[
 873                     (.*?)               # alt text = $2
 874                   \]
 875
 876                   [ ]?                          # one optional space
 877                   (?:\n[ ]*)?           # one optional newline followed by spaces
 878
 879                   \[
 880                     (.*?)               # id = $3
 881                   \]
 882
 883                 )
 884         }{
 885                 my $result;
 886                 my $whole_match = $1;
 887                 my $alt_text    = $2;
 888                 my $link_id     = lc $3;
 889
 890                 if ($link_id eq "") {
 891                         $link_id = lc $alt_text;     # for shortcut links like ![this][].
 892                 }
 893
 894                 $alt_text =~ s/"/&quot;/g;
 895                 if (defined $g_urls{$link_id}) {
 896                         my $url = $g_urls{$link_id};
 897                         $url =~ s! \* !$g_escape_table{'*'}!gx;         # We've got to encode these to avoid
 898                         $url =~ s!  _ !$g_escape_table{'_'}!gx;         # conflicting with italics/bold.
 899
 900                         my $label = Header2Label($alt_text);
 901                         $g_crossrefs{$label} = "#$label";
 902                         if (! defined $g_titles{$link_id}) {
 903                                 $g_titles{$link_id} = $alt_text;
 904                         }
 905
 906                         $result = "<img id=\"$label\" src=\"$url\" alt=\"$alt_text\"";
 907                         if (defined $g_titles{$link_id}) {
 908                                 my $title = $g_titles{$link_id};
 909                                 $title =~ s! \* !$g_escape_table{'*'}!gx;
 910                                 $title =~ s!  _ !$g_escape_table{'_'}!gx;
 911                                 $result .=  " title=\"$title\"";
 912                         }
 913                         $result .= _DoAttributes($link_id);
 914                         $result .= $g_empty_element_suffix;
 915                 }
 916                 else {
 917                         # If there's no such link ID, leave intact:
 918                         $result = $whole_match;
 919                 }
 920
 921                 $result;
 922         }xsge;
 923
 924         #
 925         # Next, handle inline images:  ![alt text](url "optional title")
 926         # Don't forget: encode * and _
 927
 928         $text =~ s{
 929                 (                               # wrap whole match in $1
 930                   !\[
 931                     (.*?)               # alt text = $2
 932                   \]
 933                   \s?                   # One optional whitespace character
 934                   \(                    # literal paren
 935                         [ \t]*
 936                         ($g_nested_parens)              # href = $3
 937                         [ \t]*
 938                         (                       # $4
 939                           (['"])        # quote char = $5
 940                           (.*?)         # title = $6
 941                           \5            # matching quote
 942                           [ \t]*
 943                         )?                      # title is optional
 944                         # MultiMarkdown addition for attribute support
 945                         (                               # Attributes = $7
 946                           (?<=\s)                       # lookbehind for whitespace
 947                           (([ \t]*\n)?[ \t]*((\S+=\S+)|(\S+=".*?")))*
 948                         )?
 949                   \)
 950                 )
 951         }{
 952                 my $result;
 953                 my $whole_match = $1;
 954                 my $alt_text    = $2;
 955                 my $url                 = $3;
 956                 my $title               = (defined $6) ? $6 : '';
 957                 my $attrs = $7;
 958
 959                 $alt_text =~ s/"/&quot;/g;
 960                 $title    =~ s/"/&quot;/g;
 961                 $url =~ s! \* !$g_escape_table{'*'}!gx;         # We've got to encode these to avoid
 962                 $url =~ s!  _ !$g_escape_table{'_'}!gx;         # conflicting with italics/bold.
 963                 $url =~ s{^<(.*)>$}{$1};                                        # Remove <>'s surrounding URL, if present
 964
 965                 my $label = Header2Label($alt_text);
 966                 $g_crossrefs{$label} = "#$label";
 967 #               $g_titles{$label} = $alt_text;                  # I think this line should not be here
 968
 969                 $result = "<img id=\"$label\" src=\"$url\" alt=\"$alt_text\"";
 970                 if (defined $title) {
 971                         $title =~ s! \* !$g_escape_table{'*'}!gx;
 972                         $title =~ s!  _ !$g_escape_table{'_'}!gx;
 973                         $result .=  " title=\"$title\"";
 974                 }
 975                 if (defined $attrs) {
 976                         $result .= " $attrs";
 977                 }
 978                 $result .= $g_empty_element_suffix;
 979
 980                 $result;
 981         }xsge;
 982
 983         return $text;
 984 }
 985
 986
 987 sub _DoHeaders {
 988         my $text = shift;
 989         my $header = "";
 990         my $label = "";
 991         my $idString = "";
 992
 993         # Don't do Wiki Links in Headers
 994         $g_temp_no_wikiwords = 1;
 995
 996         # Setext-style headers:
 997         #         Header 1
 998         #         ========
 999         #
1000         #         Header 2
1001         #         --------
1002         #
1003         $text =~ s{ ^(.+?)(?:\s\[([^\[]*?)\])?[ \t]*\n=+[ \t]*\n+ }{
1004                 if (defined $2) {
1005                         $label = Header2Label($2);
1006                 } else {
1007                         $label = Header2Label($1);
1008                 }
1009                 $header = _RunSpanGamut($1);
1010                 $header =~ s/^\s*//s;
1011
1012                 if ($label ne "") {
1013                         $g_crossrefs{$label} = "#$label";
1014                         $g_titles{$label} = $header;
1015                         $idString = " id=\"$label\"";
1016                 } else {
1017                         $idString = "";
1018                 }
1019
1020                 "<h1$idString>"  .  $header  .  "</h1>\n\n";
1021         }egmx;
1022
1023         $text =~ s{ ^(.+?)(?:\s*\[([^\[]*?)\])?[ \t]*\n-+[ \t]*\n+ }{
1024                 if (defined $2) {
1025                         $label = Header2Label($2);
1026                 } else {
1027                         $label = Header2Label($1);
1028                 }
1029                 $header = _RunSpanGamut($1);
1030                 $header =~ s/^\s*//s;
1031
1032                 if ($label ne "") {
1033                         $g_crossrefs{$label} = "#$label";
1034                         $g_titles{$label} = $header;
1035                         $idString = " id=\"$label\"";
1036                 } else {
1037                         $idString = "";
1038                 }
1039
1040                 "<h2$idString>"  .  $header  .  "</h2>\n\n";
1041         }egmx;
1042
1043
1044         # atx-style headers:
1045         #       # Header 1
1046         #       ## Header 2
1047         #       ## Header 2 with closing hashes ##
1048         #       ...
1049         #       ###### Header 6
1050         #
1051         $text =~ s{
1052                         ^(\#{1,6})      # $1 = string of #'s
1053                         [ \t]*
1054                         (.+?)           # $2 = Header text
1055                         [ \t]*
1056                         (?:\[([^\[]*?)\])?      # $3 = optional label for cross-reference
1057                         [ \t]*
1058                         \#*                     # optional closing #'s (not counted)
1059                         \n+
1060                 }{
1061                         my $h_level = length($1) + $g_base_header_level - 1;
1062                         if (defined $3) {
1063                                 $label = Header2Label($3);
1064                         } else {
1065                                 $label = Header2Label($2);
1066                         }
1067                         $header = _RunSpanGamut($2);
1068                         $header =~ s/^\s*//s;
1069
1070                         if ($label ne "") {
1071                                 $g_crossrefs{$label} = "#$label";
1072                                 $g_titles{$label} = $header;
1073                                 $idString = " id=\"$label\"";
1074                         } else {
1075                                 $idString = "";
1076                         }
1077
1078                         "<h$h_level$idString>"  .  $header  .  "</h$h_level>\n\n";
1079                 }egmx;
1080
1081         # Can now process Wiki Links again
1082         $g_temp_no_wikiwords = 0;
1083
1084         return $text;
1085 }
1086
1087
1088 sub _DoLists {
1089 #
1090 # Form HTML ordered (numbered) and unordered (bulleted) lists.
1091 #
1092         my $text = shift;
1093         my $less_than_tab = $g_tab_width - 1;
1094
1095         # Re-usable patterns to match list item bullets and number markers:
1096         my $marker_ul  = qr/[*+-]/;
1097         my $marker_ol  = qr/\d+[.]/;
1098         my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
1099
1100         # Re-usable pattern to match any entirel ul or ol list:
1101         my $whole_list = qr{
1102                 (                                                               # $1 = whole list
1103                   (                                                             # $2
1104                         [ ]{0,$less_than_tab}
1105                         (${marker_any})                         # $3 = first list item marker
1106                         [ \t]+
1107                   )
1108                   (?s:.+?)
1109                   (                                                             # $4
1110                           \z
1111                         |
1112                           \n{2,}
1113                           (?=\S)
1114                           (?!                                           # Negative lookahead for another list item marker
1115                                 [ \t]*
1116                                 ${marker_any}[ \t]+
1117                           )
1118                   )
1119                 )
1120         }mx;
1121
1122         # We use a different prefix before nested lists than top-level lists.
1123         # See extended comment in _ProcessListItems().
1124         #
1125         # Note: There's a bit of duplication here. My original implementation
1126         # created a scalar regex pattern as the conditional result of the test on
1127         # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
1128         # substitution once, using the scalar as the pattern. This worked,
1129         # everywhere except when running under MT on my hosting account at Pair
1130         # Networks. There, this caused all rebuilds to be killed by the reaper (or
1131         # perhaps they crashed, but that seems incredibly unlikely given that the
1132         # same script on the same server ran fine *except* under MT. I've spent
1133         # more time trying to figure out why this is happening than I'd like to
1134         # admit. My only guess, backed up by the fact that this workaround works,
1135         # is that Perl optimizes the substition when it can figure out that the
1136         # pattern will never change, and when this optimization isn't on, we run
1137         # afoul of the reaper. Thus, the slightly redundant code that uses two
1138         # static s/// patterns rather than one conditional pattern.
1139
1140         if ($g_list_level) {
1141                 $text =~ s{
1142                                 ^
1143                                 $whole_list
1144                         }{
1145                                 my $list = $1;
1146                                 my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
1147
1148                                 # Turn double returns into triple returns, so that we can make a
1149                                 # paragraph for the last item in a list, if necessary:
1150                                 $list =~ s/\n{2,}/\n\n\n/g;
1151                                 my $result = _ProcessListItems($list, $marker_any);
1152
1153                                 # Trim any trailing whitespace, to put the closing `</$list_type>`
1154                                 # up on the preceding line, to get it past the current stupid
1155                                 # HTML block parser. This is a hack to work around the terrible
1156                                 # hack that is the HTML block parser.
1157                                 $result =~ s{\s+$}{};
1158                                 $result = "<$list_type>" . $result . "</$list_type>\n";
1159                                 $result;
1160                         }egmx;
1161         }
1162         else {
1163                 $text =~ s{
1164                                 (?:(?<=\n\n)|\A\n?)
1165                                 $whole_list
1166                         }{
1167                                 my $list = $1;
1168                                 my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
1169                                 # Turn double returns into triple returns, so that we can make a
1170                                 # paragraph for the last item in a list, if necessary:
1171                                 $list =~ s/\n{2,}/\n\n\n/g;
1172                                 my $result = _ProcessListItems($list, $marker_any);
1173                                 $result = "<$list_type>\n" . $result . "</$list_type>\n";
1174                                 $result;
1175                         }egmx;
1176         }
1177
1178
1179         return $text;
1180 }
1181
1182
1183 sub _ProcessListItems {
1184 #
1185 #       Process the contents of a single ordered or unordered list, splitting it
1186 #       into individual list items.
1187 #
1188
1189         my $list_str = shift;
1190         my $marker_any = shift;
1191
1192
1193         # The $g_list_level global keeps track of when we're inside a list.
1194         # Each time we enter a list, we increment it; when we leave a list,
1195         # we decrement. If it's zero, we're not in a list anymore.
1196         #
1197         # We do this because when we're not inside a list, we want to treat
1198         # something like this:
1199         #
1200         #               I recommend upgrading to version
1201         #               8. Oops, now this line is treated
1202         #               as a sub-list.
1203         #
1204         # As a single paragraph, despite the fact that the second line starts
1205         # with a digit-period-space sequence.
1206         #
1207         # Whereas when we're inside a list (or sub-list), that line will be
1208         # treated as the start of a sub-list. What a kludge, huh? This is
1209         # an aspect of Markdown's syntax that's hard to parse perfectly
1210         # without resorting to mind-reading. Perhaps the solution is to
1211         # change the syntax rules such that sub-lists must start with a
1212         # starting cardinal number; e.g. "1." or "a.".
1213
1214         $g_list_level++;
1215
1216         # trim trailing blank lines:
1217         $list_str =~ s/\n{2,}\z/\n/;
1218
1219
1220         $list_str =~ s{
1221                 (\n)?                                                   # leading line = $1
1222                 (^[ \t]*)                                               # leading whitespace = $2
1223                 ($marker_any) [ \t]+                    # list marker = $3
1224                 ((?s:.+?)                                               # list item text   = $4
1225                 (\n{1,2}))
1226                 (?= \n* (\z | \2 ($marker_any) [ \t]+))
1227         }{
1228                 my $item = $4;
1229                 my $leading_line = $1;
1230                 my $leading_space = $2;
1231
1232                 if ($leading_line or ($item =~ m/\n{2,}/)) {
1233                         $item = _RunBlockGamut(_Outdent($item));
1234                 }
1235                 else {
1236                         # Recursion for sub-lists:
1237                         $item = _DoLists(_Outdent($item));
1238                         chomp $item;
1239                         $item = _RunSpanGamut($item);
1240                 }
1241
1242                 "<li>" . $item . "</li>\n";
1243         }egmx;
1244
1245         $g_list_level--;
1246         return $list_str;
1247 }
1248
1249
1250
1251 sub _DoCodeBlocks {
1252 #
1253 #       Process Markdown `<pre><code>` blocks.
1254 #
1255
1256         my $text = shift;
1257
1258         $text =~ s{
1259                         (?:\n\n|\A)
1260                         (                   # $1 = the code block -- one or more lines, starting with a space/tab
1261                           (?:
1262                             (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
1263                             .*\n+
1264                           )+
1265                         )
1266                         ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
1267                 }{
1268                         my $codeblock = $1;
1269                         my $result; # return value
1270
1271                         $codeblock = _EncodeCode(_Outdent($codeblock));
1272                         $codeblock = _Detab($codeblock);
1273                         $codeblock =~ s/\A\n+//; # trim leading newlines
1274                         $codeblock =~ s/\n+\z//; # trim trailing newlines
1275
1276                         $result = "\n\n<pre><code>" . $codeblock . "</code></pre>\n\n"; # CHANGED: Removed newline for MMD
1277
1278                         $result;
1279                 }egmx;
1280
1281         return $text;
1282 }
1283
1284
1285 sub _DoCodeSpans {
1286 #
1287 #       *       Backtick quotes are used for <code></code> spans.
1288 #
1289 #       *       You can use multiple backticks as the delimiters if you want to
1290 #               include literal backticks in the code span. So, this input:
1291 #
1292 #         Just type ``foo `bar` baz`` at the prompt.
1293 #
1294 #       Will translate to:
1295 #
1296 #         <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1297 #
1298 #               There's no arbitrary limit to the number of backticks you
1299 #               can use as delimters. If you need three consecutive backticks
1300 #               in your code, use four for delimiters, etc.
1301 #
1302 #       *       You can use spaces to get literal backticks at the edges:
1303 #
1304 #         ... type `` `bar` `` ...
1305 #
1306 #       Turns to:
1307 #
1308 #         ... type <code>`bar`</code> ...
1309 #
1310
1311         my $text = shift;
1312
1313         $text =~ s@
1314                         (?<!\\)         # Character before opening ` can't be a backslash
1315                         (`+)            # $1 = Opening run of `
1316                         (.+?)           # $2 = The code block
1317                         (?<!`)
1318                         \1                      # Matching closer
1319                         (?!`)
1320                 @
1321                         my $c = "$2";
1322                         $c =~ s/^[ \t]*//g; # leading whitespace
1323                         $c =~ s/[ \t]*$//g; # trailing whitespace
1324                         $c = _EncodeCode($c);
1325                         "<code>$c</code>";
1326                 @egsx;
1327
1328         return $text;
1329 }
1330
1331
1332 sub _EncodeCode {
1333 #
1334 # Encode/escape certain characters inside Markdown code runs.
1335 # The point is that in code, these characters are literals,
1336 # and lose their special Markdown meanings.
1337 #
1338     local $_ = shift;
1339
1340         # Protect Wiki Links in Code Blocks
1341         if (!$g_wikilinks_kill_switch) {
1342                 my $WikiWord = qr'[A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*';
1343                 s/(\A\\?|\s\\?)($WikiWord)/$1\\$2/gx;
1344         }
1345
1346         # Encode all ampersands; HTML entities are not
1347         # entities within a Markdown code span.
1348         s/&/&amp;/g;
1349
1350         # Encode $'s, but only if we're running under Blosxom.
1351         # (Blosxom interpolates Perl variables in article bodies.)
1352         {
1353                 no warnings 'once';
1354         if (defined($blosxom::version)) {
1355                 s/\$/&#036;/g;
1356         }
1357     }
1358
1359
1360         # Do the angle bracket song and dance:
1361         s! <  !&lt;!gx;
1362         s! >  !&gt;!gx;
1363
1364         # Now, escape characters that are magic in Markdown:
1365         s! \* !$g_escape_table{'*'}!gx;
1366         s! _  !$g_escape_table{'_'}!gx;
1367         s! {  !$g_escape_table{'{'}!gx;
1368         s! }  !$g_escape_table{'}'}!gx;
1369         s! \[ !$g_escape_table{'['}!gx;
1370         s! \] !$g_escape_table{']'}!gx;
1371         s! \\ !$g_escape_table{'\\'}!gx;
1372
1373         return $_;
1374 }
1375
1376
1377 sub _DoItalicsAndBold {
1378         my $text = shift;
1379
1380         # Cave in - `*` and `_` behave differently...  We'll see how it works out
1381
1382
1383         # <strong> must go first:
1384         $text =~ s{ (?<!\w) (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
1385                 {<strong>$2</strong>}gsx;
1386
1387         $text =~ s{ (?<!\w) (\*|_) (?=\S) (.+?) (?<=\S) \1 }
1388                 {<em>$2</em>}gsx;
1389
1390         # And now, a second pass to catch nested strong and emphasis special cases
1391         $text =~ s{ (?<!\w) (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
1392                 {<strong>$2</strong>}gsx;
1393
1394         $text =~ s{ (?<!\w) (\*|_) (?=\S) (.+?) (?<=\S) \1 }
1395                 {<em>$2</em>}gsx;
1396
1397         # And now, allow `*` in the middle of words
1398
1399         # <strong> must go first:
1400         $text =~ s{ (\*\*) (?=\S) (.+?[*]*) (?<=\S) \1 }
1401                 {<strong>$2</strong>}gsx;
1402
1403         $text =~ s{ (\*) (?=\S) (.+?) (?<=\S) \1 }
1404                 {<em>$2</em>}gsx;
1405
1406         return $text;
1407 }
1408
1409
1410 sub _DoBlockQuotes {
1411         my $text = shift;
1412
1413         $text =~ s{
1414                   (                                                             # Wrap whole match in $1
1415                         (
1416                           ^[ \t]*>[ \t]?                        # '>' at the start of a line
1417                             .+\n                                        # rest of the first line
1418                           (.+\n)*                                       # subsequent consecutive lines
1419                           \n*                                           # blanks
1420                         )+
1421                   )
1422                 }{
1423                         my $bq = $1;
1424                         $bq =~ s/^[ \t]*>[ \t]?//gm;    # trim one level of quoting
1425                         $bq =~ s/^[ \t]+$//mg;                  # trim whitespace-only lines
1426                         $bq = _RunBlockGamut($bq);              # recurse
1427
1428                         $bq =~ s/^/  /g;
1429                         # These leading spaces screw with <pre> content, so we need to fix that:
1430                         $bq =~ s{
1431                                         (\s*<pre>.+?</pre>)
1432                                 }{
1433                                         my $pre = $1;
1434                                         $pre =~ s/^  //mg;
1435                                         $pre;
1436                                 }egsx;
1437
1438                         "<blockquote>\n$bq\n</blockquote>\n\n";
1439                 }egmx;
1440
1441
1442         return $text;
1443 }
1444
1445
1446 sub _FormParagraphs {
1447 #
1448 #       Params:
1449 #               $text - string to process with html <p> tags
1450 #
1451         my $text = shift;
1452
1453         # Strip leading and trailing lines:
1454         $text =~ s/\A\n+//;
1455         $text =~ s/\n+\z//;
1456
1457         my @grafs = split(/\n{2,}/, $text);
1458
1459         #
1460         # Wrap <p> tags.
1461         #
1462         foreach (@grafs) {
1463                 unless (defined( $g_html_blocks{$_} )) {
1464                         $_ = _RunSpanGamut($_);
1465                         s/^([ \t]*)/<p>/;
1466                         $_ .= "</p>";
1467                 }
1468         }
1469
1470         #
1471         # Unhashify HTML blocks
1472         #
1473 #       foreach my $graf (@grafs) {
1474 #               my $block = $g_html_blocks{$graf};
1475 #               if (defined $block) {
1476 #                       $graf = $block;
1477 #               }
1478 #       }
1479
1480         foreach my $graf (@grafs) {
1481                 # Modify elements of @grafs in-place...
1482                 my $block = $g_html_blocks{$graf};
1483                 if (defined $block) {
1484                         $graf = $block;
1485                         if ($block =~ m{
1486                                                         \A
1487                                                         (                                                       # $1 = <div> tag
1488                                                           <div  \s+
1489                                                           [^>]*
1490                                                           \b
1491                                                           markdown\s*=\s*  (['"])       #       $2 = attr quote char
1492                                                           1
1493                                                           \2
1494                                                           [^>]*
1495                                                           >
1496                                                         )
1497                                                         (                                                       # $3 = contents
1498                                                         .*
1499                                                         )
1500                                                         (</div>)                                        # $4 = closing tag
1501                                                         \z
1502
1503                                                         }xms
1504                                 ) {
1505                                 my ($div_open, $div_content, $div_close) = ($1, $3, $4);
1506
1507                                 # We can't call Markdown(), because that resets the hash;
1508                                 # that initialization code should be pulled into its own sub, though.
1509                                 $div_content = _HashHTMLBlocks($div_content);
1510                                 $div_content = _StripLinkDefinitions($div_content);
1511                                 $div_content = _RunBlockGamut($div_content);
1512                                 $div_content = _UnescapeSpecialChars($div_content);
1513
1514                                 $div_open =~ s{\smarkdown\s*=\s*(['"]).+?\1}{}ms;
1515
1516                                 $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
1517                         }
1518                 }
1519         }
1520
1521
1522         return join "\n\n", @grafs;
1523 }
1524
1525
1526 sub _EncodeAmpsAndAngles {
1527 # Smart processing for ampersands and angle brackets that need to be encoded.
1528
1529         my $text = shift;
1530
1531         # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1532         #   http://bumppo.net/projects/amputator/
1533         $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g;
1534
1535         # Encode naked <'s
1536         $text =~ s{<(?![a-z/?\$!])}{&lt;}gi;
1537
1538         return $text;
1539 }
1540
1541
1542 sub _EncodeBackslashEscapes {
1543 #
1544 #   Parameter:  String.
1545 #   Returns:    The string, with after processing the following backslash
1546 #               escape sequences.
1547 #
1548     local $_ = shift;
1549
1550     s! \\\\  !$g_escape_table{'\\'}!gx;         # Must process escaped backslashes first.
1551     s! \\`   !$g_escape_table{'`'}!gx;
1552     s! \\\*  !$g_escape_table{'*'}!gx;
1553     s! \\_   !$g_escape_table{'_'}!gx;
1554     s! \\\{  !$g_escape_table{'{'}!gx;
1555     s! \\\}  !$g_escape_table{'}'}!gx;
1556     s! \\\[  !$g_escape_table{'['}!gx;
1557     s! \\\]  !$g_escape_table{']'}!gx;
1558     s! \\\(  !$g_escape_table{'('}!gx;
1559     s! \\\)  !$g_escape_table{')'}!gx;
1560     s! \\>   !$g_escape_table{'>'}!gx;
1561     s! \\\#  !$g_escape_table{'#'}!gx;
1562     s! \\\+  !$g_escape_table{'+'}!gx;
1563     s! \\\-  !$g_escape_table{'-'}!gx;
1564     s! \\\.  !$g_escape_table{'.'}!gx;
1565     s{ \\!  }{$g_escape_table{'!'}}gx;
1566
1567     return $_;
1568 }
1569
1570
1571 sub _DoAutoLinks {
1572         my $text = shift;
1573
1574         $text =~ s{<((https?|ftp|dict):[^'">\s]+)>}{<a href="$1">$1</a>}gi;
1575
1576         # Email addresses: <address@domain.foo>
1577         $text =~ s{
1578                 <
1579         (?:mailto:)?
1580                 (
1581                         [-.\w]+
1582                         \@
1583                         [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1584                 )
1585                 >
1586         }{
1587                 _EncodeEmailAddress( _UnescapeSpecialChars($1) );
1588         }egix;
1589
1590         return $text;
1591 }
1592
1593
1594 sub _EncodeEmailAddress {
1595 #
1596 #       Input: an email address, e.g. "foo@example.com"
1597 #
1598 #       Output: the email address as a mailto link, with each character
1599 #               of the address encoded as either a decimal or hex entity, in
1600 #               the hopes of foiling most address harvesting spam bots. E.g.:
1601 #
1602 #         <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1603 #       x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1604 #       &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1605 #
1606 #       Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1607 #       mailing list: <http://tinyurl.com/yu7ue>
1608 #
1609
1610         my $addr = shift;
1611
1612         srand;
1613         my @encode = (
1614                 sub { '&#' .                 ord(shift)   . ';' },
1615                 sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
1616                 sub {                            shift          },
1617         );
1618
1619         $addr = "mailto:" . $addr;
1620
1621         $addr =~ s{(.)}{
1622                 my $char = $1;
1623                 if ( $char eq '@' ) {
1624                         # this *must* be encoded. I insist.
1625                         $char = $encode[int rand 1]->($char);
1626                 } elsif ( $char ne ':' ) {
1627                         # leave ':' alone (to spot mailto: later)
1628                         my $r = rand;
1629                         # roughly 10% raw, 45% hex, 45% dec
1630                         $char = (
1631                                 $r > .9   ?  $encode[2]->($char)  :
1632                                 $r < .45  ?  $encode[1]->($char)  :
1633                                                          $encode[0]->($char)
1634                         );
1635                 }
1636                 $char;
1637         }gex;
1638
1639         $addr = qq{<a href="$addr">$addr</a>};
1640         $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
1641
1642         return $addr;
1643 }
1644
1645
1646 sub _UnescapeSpecialChars {
1647 #
1648 # Swap back in all the special characters we've hidden.
1649 #
1650         my $text = shift;
1651
1652         while( my($char, $hash) = each(%g_escape_table) ) {
1653                 $text =~ s/$hash/$char/g;
1654         }
1655     return $text;
1656 }
1657
1658
1659 sub _TokenizeHTML {
1660 #
1661 #   Parameter:  String containing HTML markup.
1662 #   Returns:    Reference to an array of the tokens comprising the input
1663 #               string. Each token is either a tag (possibly with nested,
1664 #               tags contained therein, such as <a href="<MTFoo>">, or a
1665 #               run of text between tags. Each element of the array is a
1666 #               two-element array; the first is either 'tag' or 'text';
1667 #               the second is the actual value.
1668 #
1669 #
1670 #   Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
1671 #       <http://www.bradchoate.com/past/mtregex.php>
1672 #
1673
1674     my $str = shift;
1675     my $pos = 0;
1676     my $len = length $str;
1677     my @tokens;
1678
1679     my $depth = 6;
1680     my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x  $depth);
1681     my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) |  # comment
1682                    (?s: <\? .*? \?> ) |              # processing instruction
1683                    $nested_tags/ix;                   # nested tags
1684
1685     while ($str =~ m/($match)/g) {
1686         my $whole_tag = $1;
1687         my $sec_start = pos $str;
1688         my $tag_start = $sec_start - length $whole_tag;
1689         if ($pos < $tag_start) {
1690             push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
1691         }
1692         push @tokens, ['tag', $whole_tag];
1693         $pos = pos $str;
1694     }
1695     push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
1696
1697     return \@tokens;
1698 }
1699
1700
1701 sub _Outdent {
1702 #
1703 # Remove one level of line-leading tabs or spaces
1704 #
1705         my $text = shift;
1706
1707         $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
1708         return $text;
1709 }
1710
1711
1712 sub _Detab {
1713 #
1714 # Cribbed from a post by Bart Lateur:
1715 # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
1716 #
1717         my $text = shift;
1718
1719         $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
1720         return $text;
1721 }
1722
1723 #
1724 # MultiMarkdown Routines
1725 #
1726
1727 sub _ParseMetaData {
1728         my $text = shift;
1729         my $clean_text = "";
1730
1731         my ($inMetaData, $currentKey) = (1,'');
1732
1733         foreach my $line ( split /\n/, $text ) {
1734                 $line =~ /^$/ and $inMetaData = 0 and $clean_text .= $line and next;
1735                 if ($inMetaData) {
1736                         if ($line =~ /^([a-zA-Z0-9][0-9a-zA-Z _-]*?):\s*(.*)$/ ) {
1737                                 $currentKey = $1;
1738                                 $currentKey =~ s/  / /g;
1739                                 $g_metadata{$currentKey} = $2;
1740                                 if (lc($currentKey) eq "format") {
1741                                         $g_document_format = lc($g_metadata{$currentKey});
1742                                 }
1743                                 if (lc($currentKey) eq "base url") {
1744                                         $g_base_url = $g_metadata{$currentKey};
1745                                 }
1746                                 if (lc($currentKey) eq "use wikilinks") {
1747                                         if (lc($g_metadata{$currentKey}) eq "true" ||
1748                                                 $g_metadata{$currentKey} eq "1") {
1749                                                         $g_use_wiki_links = 1;
1750                                                 }
1751                                 }
1752                                 if (lc($currentKey) eq "bibliography title") {
1753                                         $g_bibliography_title = $g_metadata{$currentKey};
1754                                         $g_bibliography_title =~ s/\s*$//;
1755                                 }
1756                                 if (lc($currentKey) eq "base header level") {
1757                                         $g_base_header_level = $g_metadata{$currentKey};
1758                                 }
1759                                 if (!$g_metadata_newline{$currentKey}) {
1760                                         $g_metadata_newline{$currentKey} = $g_metadata_newline{default};
1761                                 }
1762                         } else {
1763                                 if ($currentKey eq "") {
1764                                         # No metadata present
1765                                         $clean_text .= "$line\n";
1766                                         $inMetaData = 0;
1767                                         next;
1768                                 }
1769                                 if ($line =~ /^\s*(.+)$/ ) {
1770                                         $g_metadata{$currentKey} .= "$g_metadata_newline{$currentKey}$1";
1771                                 }
1772                         }
1773                 } else {
1774                         $clean_text .= "$line\n";
1775                 }
1776         }
1777
1778         return $clean_text;
1779 }
1780
1781 sub _StripFootnoteDefinitions {
1782         my $text = shift;
1783         my $less_than_tab = $g_tab_width - 1;
1784
1785         while ($text =~ s{
1786                 \n\[\^([^\n]+?)\]\:[ \t]*# id = $1
1787                 \n?
1788                 (.*?)\n{1,2}            # end at new paragraph
1789                 ((?=\n[ ]{0,$less_than_tab}\S)|\Z)      # Lookahead for non-space at line-start, or end of doc
1790         }
1791         {\n}sx)
1792         {
1793                 my $id = $1;
1794                 my $footnote = "$2\n";
1795                 $footnote =~ s/^[ ]{0,$g_tab_width}//gm;
1796
1797                 $g_footnotes{id2footnote($id)} = $footnote;
1798         }
1799
1800         return $text;
1801 }
1802
1803 sub _DoFootnotes {
1804         my $text = shift;
1805
1806         # First, run routines that get skipped in footnotes
1807         foreach my $label (sort keys %g_footnotes) {
1808                 my $footnote = _RunBlockGamut($g_footnotes{$label});
1809
1810                 $footnote = _DoMarkdownCitations($footnote);
1811                 $g_footnotes{$label} = $footnote;
1812         }
1813
1814         $text =~ s{
1815                 \[\^(.+?)\]             # id = $1
1816         }{
1817                 my $result;
1818                 my $id = id2footnote($1);
1819                 if (defined $g_footnotes{$id} ) {
1820                         $g_footnote_counter++;
1821                         if ($g_footnotes{$id} =~ /^glossary:/i) {
1822                                 $result = "<a href=\"#fn:$id\" id=\"fnref:$id\" class=\"footnote glossary\">$g_footnote_counter</a>";
1823                         } else {
1824                                 $result = "<a href=\"#fn:$id\" id=\"fnref:$id\" class=\"footnote\">$g_footnote_counter</a>";
1825                         }
1826                         push (@g_used_footnotes,$id);
1827                 }
1828                 $result;
1829         }xsge;
1830
1831         return $text;
1832 }
1833
1834 sub _FixFootnoteParagraphs {
1835         my $text = shift;
1836
1837         $text =~ s/^\<p\>\<\/footnote\>/<\/footnote>/gm;
1838
1839         return $text;
1840 }
1841
1842 sub _PrintFootnotes{
1843         my $footnote_counter = 0;
1844         my $result = "";
1845
1846         foreach my $id (@g_used_footnotes) {
1847                 $footnote_counter++;
1848                 my $footnote = $g_footnotes{$id};
1849                 my $footnote_closing_tag = "";
1850
1851                 $footnote =~ s/(\<\/(p(re)?|ol|ul)\>)$//;
1852                 $footnote_closing_tag = $1;
1853
1854                 if ($footnote =~ s/^glossary:\s*//i) {
1855                         # Add some formatting for glossary entries
1856
1857                         $footnote =~ s{
1858                                 ^(.*?)                          # $1 = term
1859                                 \s*
1860                                 (?:\(([^\(\)]*)\)[^\n]*)?               # $2 = optional sort key
1861                                 \n
1862                         }{
1863                                 my $glossary = "<span class=\"glossary name\">$1</span>";
1864
1865                                 if ($2) {
1866                                         $glossary.="<span class=\"glossary sort\" style=\"display:none\">$2</span>";
1867                                 };
1868
1869                                 $glossary . ":<p>";
1870                         }egsx;
1871
1872                         $result.="<li id=\"fn:$id\">$footnote<a href=\"#fnref:$id\" class=\"reversefootnote\">&#160;&#8617;</a>$footnote_closing_tag</li>\n\n";
1873                 } else {
1874                         $result.="<li id=\"fn:$id\">$footnote<a href=\"#fnref:$id\" class=\"reversefootnote\">&#160;&#8617;</a>$footnote_closing_tag</li>\n\n";
1875                 }
1876         }
1877         $result .= "</ol>\n</div>";
1878
1879         if ($footnote_counter > 0) {
1880                 $result = "\n\n<div class=\"footnotes\">\n<hr$g_empty_element_suffix\n<ol>\n\n".$result;
1881         } else {
1882                 $result = "";
1883         }
1884
1885         $result= _UnescapeSpecialChars($result);
1886         return $result;
1887 }
1888
1889 sub Header2Label {
1890         my $header = shift;
1891         my $label = lc $header;
1892         $label =~ s/[^A-Za-z0-9:_.-]//g;                # Strip illegal characters
1893         while ($label =~ s/^[^A-Za-z]//g)
1894                 {};             # Strip illegal leading characters
1895         return $label;
1896 }
1897
1898 sub id2footnote {
1899         # Since we prepend "fn:", we can allow leading digits in footnotes
1900         my $id = shift;
1901         my $footnote = lc $id;
1902         $footnote =~ s/[^A-Za-z0-9:_.-]//g;             # Strip illegal characters
1903         return $footnote;
1904 }
1905
1906
1907 sub xhtmlMetaData {
1908         my $result = qq{<?xml version="1.0" encoding="UTF-8" ?>\n};
1909
1910         # This screws up xsltproc - make sure to use `-nonet -novalid` if you
1911         #       have difficulty
1912         if ($g_allow_mathml) {
1913                  $result .= qq{<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"\n\t"http://www.w3.org/TR/MathML2/dtd/xhtml-math11-f.dtd">
1914 \n};
1915
1916                 $result.= qq{<html xmlns="http://www.w3.org/1999/xhtml">\n\t<head>\n};
1917         } else {
1918                 $result .= qq{<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n};
1919
1920                 $result.= qq!<html xmlns="http://www.w3.org/1999/xhtml">\n\t<head>\n!;
1921         }
1922
1923         $result.= "\t\t<!-- Processed by MultiMarkdown -->\n";
1924
1925         foreach my $key (sort keys %g_metadata ) {
1926                 # Strip trailing spaces
1927                 $g_metadata{$key} =~ s/(\s)*$//s;
1928
1929                 # Strip spaces from key
1930                 my $export_key = $key;
1931                 $export_key =~ s/\s//g;
1932
1933                 if (lc($key) eq "title") {
1934                         $result.= "\t\t<title>" . _EncodeAmpsAndAngles($g_metadata{$key}) . "</title>\n";
1935                 } elsif (lc($key) eq "css") {
1936                         $result.= "\t\t<link type=\"text/css\" rel=\"stylesheet\" href=\"$g_metadata{$key}\"$g_empty_element_suffix\n";
1937                 } elsif (lc($export_key) eq "xhtmlheader") {
1938                         $result .= "\t\t$g_metadata{$key}\n";
1939                 } else {
1940                         $result.= qq!\t\t<meta name="$export_key" content="$g_metadata{$key}"$g_empty_element_suffix\n!;
1941                 }
1942         }
1943         $result.= "\t</head>\n";
1944
1945         return $result;
1946 }
1947
1948 sub textMetaData {
1949         my $result = "";
1950
1951         foreach my $key (sort keys %g_metadata ) {
1952                 $result .= "$key: $g_metadata{$key}\n";
1953         }
1954         $result =~ s/\s*\n/<br \/>\n/g;
1955
1956         if ($result ne "") {
1957                 $result.= "\n";
1958         }
1959
1960         return $result;
1961 }
1962
1963 sub _ConvertCopyright{
1964         my $text = shift;
1965         # Convert to an XML compatible form of copyright symbol
1966
1967         $text =~ s/&copy;/&#xA9;/gi;
1968
1969         return $text;
1970 }
1971
1972 sub _CreateWikiLink {
1973         my $title = shift;
1974
1975         my $id = $title;
1976                 $id =~ s/ /_/g;
1977                 $id =~ s/__+/_/g;
1978                 $id =~ s/^_//g;
1979                 $id =~ s/_$//;
1980
1981         $title =~ s/_/ /g;
1982
1983         return "[$title]($g_base_url$id)";
1984 }
1985
1986 sub _DoWikiLinks {
1987         my $text = shift;
1988         my $WikiWord = '[A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*';
1989         my $FreeLinkPattern = "([-,.()' _0-9A-Za-z\x80-\xff]+)";
1990
1991         if ($g_wikilinks_kill_switch) {
1992                 return $text;
1993         }
1994
1995         if ($g_use_wiki_links) {
1996                 # FreeLinks
1997                 $text =~ s{
1998                         \[\[($FreeLinkPattern)\]\]
1999                 }{
2000                         my $label = $1;
2001                         $label =~ s{
2002                                 ([\s\>])($WikiWord)
2003                         }{
2004                                 $1 ."\\" . $2
2005                         }xsge;
2006
2007                         _CreateWikiLink($label)
2008                 }xsge;
2009         }
2010
2011         # WikiWords
2012         if ($g_use_wiki_links) {
2013                 $text =~ s{
2014                         ([\s])($WikiWord)
2015                 }{
2016                         $1 . _CreateWikiLink($2)
2017                 }xsge;
2018
2019                 # Catch WikiWords at beginning of text
2020                 $text =~ s{^($WikiWord)
2021                 }{
2022                         _CreateWikiLink($1)
2023                 }xse;
2024         }
2025
2026
2027         return $text;
2028 }
2029
2030 sub _UnescapeWikiWords {
2031         my $text = shift;
2032         my $WikiWord = '[A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*';
2033
2034         if ($g_wikilinks_kill_switch) {
2035                 return $text;
2036         }
2037
2038         # Unescape escaped WikiWords
2039         # This should occur whether wikilinks are enabled or not
2040         $text =~ s/(?<=\B)\\($WikiWord)/$1/g;
2041
2042         return $text;
2043 }
2044
2045
2046 sub _DoTables {
2047         my $text = shift;
2048         my $less_than_tab = $g_tab_width - 1;
2049
2050         # Algorithm inspired by PHP Markdown Extra's table support
2051         # <http://www.michelf.com/projects/php-markdown/>
2052
2053         # Reusable regexp's to match table
2054
2055         my $line_start = qr{
2056                 [ ]{0,$less_than_tab}
2057         }mx;
2058
2059         my $table_row = qr{
2060                 [^\n]*?\|[^\n]*?\n
2061         }mx;
2062
2063         my $first_row = qr{
2064                 $line_start
2065                 \S+.*?\|.*?\n
2066         }mx;
2067
2068         my $table_rows = qr{
2069                 (\n?$table_row)
2070         }mx;
2071
2072         my $table_caption = qr{
2073                 $line_start
2074                 \[.*?\][ \t]*\n
2075         }mx;
2076
2077         my $table_divider = qr{
2078                 $line_start
2079                 [\|\-\+\:\.][ \-\+\|\:\.]* \| [ \-\+\|\:\.]*
2080         }mx;
2081
2082         my $whole_table = qr{
2083                 ($table_caption)?               # Optional caption
2084                 ($first_row                             # First line must start at beginning
2085                 ($table_row)*?)?                # Header Rows
2086                 $table_divider                  # Divider/Alignment definitions
2087                 $table_rows+                    # Body Rows
2088                 ($table_caption)?               # Optional caption
2089         }mx;
2090
2091
2092         # Find whole tables, then break them up and process them
2093
2094         $text =~ s{
2095                 ^($whole_table)                 # Whole table in $1
2096                 (\n|\Z)                                 # End of file or 2 blank lines
2097         }{
2098                 my $table = $1;
2099
2100                 # Clean extra spaces at end of lines -
2101                 #       they cause the processing to choke
2102                 $table =~ s/[\t ]*\n/\n/gs;
2103
2104                 my $result = "<table>\n";
2105                 my @alignments;
2106                 my $use_row_header = 1;
2107
2108                 # Add Caption, if present
2109
2110                 if ($table =~ s/^$line_start\[\s*(.*?)\s*\](\[\s*(.*?)\s*\])?[ \t]*$//m) {
2111                         my $table_id = "";
2112                         if (defined $3) {
2113                                 # add caption id to cross-ref list
2114                                 $table_id = Header2Label($3);
2115                         } else {
2116                                 # use caption as the id
2117                                 $table_id = Header2Label($1);
2118                         }
2119                         $result .= "<caption id=\"$table_id\">" . _RunSpanGamut($1). "</caption>\n";
2120
2121                         $g_crossrefs{$table_id} = "#$table_id";
2122                         $g_titles{$table_id} = "$1";
2123                 }
2124
2125                 # If a second "caption" is present, treat it as a summary
2126                 # However, this is not valid in XHTML 1.0 Strict
2127                 # But maybe in future
2128
2129                 # A summary might be longer than one line
2130                 if ($table =~ s/\n$line_start\[\s*(.*?)\s*\][ \t]*\n/\n/s) {
2131                         # $result .= "<summary>" . _RunSpanGamut($1) . "</summary>\n";
2132                 }
2133
2134                 # Now, divide table into header, alignment, and body
2135
2136                 # First, add leading \n in case there is no header
2137
2138                 $table = "\n" . $table;
2139
2140                 # Need to be greedy
2141
2142                 $table =~ s/\n($table_divider)\n(($table_rows)+)//s;
2143
2144                 my $body = "";
2145                 my $alignment_string = $1;
2146                 if (defined $2){
2147                         $body = $2;
2148                 }
2149
2150                 # Process column alignment
2151                 while ($alignment_string =~ /\|?\s*(.+?)\s*(\||\Z)/gs) {
2152                         my $cell = _RunSpanGamut($1);
2153                         if ($cell =~ /\+/){
2154                                 $result .= "<col class=\"extended\"";
2155                         } else {
2156                                 $result .= "<col";
2157                         }
2158                         if ($cell =~ /\:$/) {
2159                                 if ($cell =~ /^\:/) {
2160                                         $result .= " align=\"center\"$g_empty_element_suffix\n";
2161                                         push(@alignments,"center");
2162                                 } else {
2163                                         $result .= " align=\"right\"$g_empty_element_suffix\n";
2164                                         push(@alignments,"right");
2165                                 }
2166                         } else {
2167                                 if ($cell =~ /^\:/) {
2168                                         $result .= " align=\"left\"$g_empty_element_suffix\n";
2169                                         push(@alignments,"left");
2170                                 } else {
2171                                         if (($cell =~ /^\./) || ($cell =~ /\.$/)) {
2172                                                 $result .= " align=\"char\"$g_empty_element_suffix\n";
2173                                                 push(@alignments,"char");
2174                                         } else {
2175                                                 $result .= "$g_empty_element_suffix\n";
2176                                                 push(@alignments,"");
2177                                         }
2178                                 }
2179                         }
2180                 }
2181
2182                 # Process headers
2183                 $table =~ s/^\n+//s;
2184
2185                 $result .= "<thead>\n";
2186
2187                 # Strip blank lines
2188                 $table =~ s/\n[ \t]*\n/\n/g;
2189
2190                 foreach my $line (split(/\n/, $table)) {
2191                         # process each line (row) in table
2192                         $result .= "<tr>\n";
2193                         my $count=0;
2194                         while ($line =~ /\|?\s*([^\|]+?)\s*(\|+|\Z)/gs) {
2195                                 # process contents of each cell
2196                                 my $cell = _RunSpanGamut($1);
2197                                 my $ending = $2;
2198                                 my $colspan = "";
2199                                 if ($ending =~ s/^\s*(\|{2,})\s*$/$1/) {
2200                                         $colspan = " colspan=\"" . length($ending) . "\"";
2201                                 }
2202                                 $result .= "\t<th$colspan>$cell</th>\n";
2203                                 if ( $count == 0) {
2204                                         if ($cell =~ /^\s*$/) {
2205                                                 $use_row_header = 1;
2206                                         } else {
2207                                                 $use_row_header = 0;
2208                                         }
2209                                 }
2210                                 $count++;
2211                         }
2212                         $result .= "</tr>\n";
2213                 }
2214
2215                 # Process body
2216
2217                 $result .= "</thead>\n<tbody>\n";
2218
2219                 foreach my $line (split(/\n/, $body)) {
2220                         # process each line (row) in table
2221                         if ($line =~ /^\s*$/) {
2222                                 $result .= "</tbody>\n\n<tbody>\n";
2223                                 next;
2224                         }
2225                         $result .= "<tr>\n";
2226                         my $count=0;
2227                         while ($line =~ /\|?\s*([^\|]+?)\s*(\|+|\Z)/gs) {
2228                                 # process contents of each cell
2229                                 my $cell = _RunSpanGamut($1);
2230                                 my $ending = "";
2231                                 if ($2 ne ""){
2232                                         $ending = $2;
2233                                 }
2234                                 my $colspan = "";
2235                                 my $cell_type = "td";
2236                                 if ($count == 0 && $use_row_header == 1) {
2237                                         $cell_type = "th";
2238                                 }
2239                                 if ($ending =~ s/^\s*(\|{2,})\s*$/$1/) {
2240                                         $colspan = " colspan=\"" . length($ending) . "\"";
2241                                 }
2242                                 if ($alignments[$count] !~ /^\s*$/) {
2243                                         $result .= "\t<$cell_type$colspan align=\"$alignments[$count]\">$cell</$cell_type>\n";
2244                                 } else {
2245                                         $result .= "\t<$cell_type$colspan>$cell</$cell_type>\n";
2246                                         }
2247                                 $count++;
2248                         }
2249                         $result .= "</tr>\n";
2250                 }
2251
2252                 # Strip out empty <thead> sections
2253                 $result =~ s/<thead>\s*<\/thead>\s*//s;
2254
2255                 # Handle pull-quotes
2256
2257                 # This might be too specific for my needs.  If others want it
2258                 # removed, I am open to discussion.
2259
2260                 $result =~ s/<table>\s*<col \/>\s*<tbody>/<table class="pull-quote">\n<col \/>\n<tbody>/s;
2261
2262                 $result .= "</tbody>\n</table>\n";
2263                 $result
2264         }egmx;
2265
2266         my $table_body = qr{
2267                 (                                                               # wrap whole match in $2
2268
2269                         (.*?\|.*?)\n                                    # wrap headers in $3
2270
2271                         [ ]{0,$less_than_tab}
2272                         ($table_divider)        # alignment in $4
2273
2274                         (                                                       # wrap cells in $5
2275                                 $table_rows
2276                         )
2277                 )
2278         }mx;
2279
2280         return $text;
2281 }
2282
2283
2284 sub _DoAttributes{
2285         my $id = shift;
2286         my $result = "";
2287
2288         if (defined $g_attributes{$id}) {
2289                 my $attributes = $g_attributes{$id};
2290                 while ($attributes =~ s/(\S+)="(.*?)"//) {
2291                         $result .= " $1=\"$2\"";
2292                 }
2293                 while ($attributes =~ /(\S+)=(\S+)/g) {
2294                         $result .= " $1=\"$2\"";
2295                 }
2296         }
2297
2298         return $result;
2299 }
2300
2301
2302 sub _StripMarkdownReferences {
2303         my $text = shift;
2304         my $less_than_tab = $g_tab_width - 1;
2305
2306         while ($text =~ s{
2307                 \n\[\#(.+?)\]:[ \t]*    # id = $1
2308                 \n?
2309                 (.*?)\n{1,2}                    # end at new paragraph
2310                 ((?=\n[ ]{0,$less_than_tab}\S)|\Z)      # Lookahead for non-space at line-start, or end of doc
2311         }
2312         {\n}sx)
2313         {
2314                 my $id = $1;
2315                 my $reference = "$2\n";
2316
2317                 $reference =~ s/^[ ]{0,$g_tab_width}//gm;
2318
2319                 $reference = _RunBlockGamut($reference);
2320
2321                 # strip leading and trailing <p> tags (they will be added later)
2322                 $reference =~ s/^\<p\>//s;
2323                 $reference =~ s/\<\/p\>\s*$//s;
2324
2325                 $g_references{$id} = $reference;
2326         }
2327
2328         return $text;
2329 }
2330
2331 sub _DoMarkdownCitations {
2332         my $text = shift;
2333
2334         $text =~ s{                             # Allow for citations without locator to be written
2335                 \[\#([^\[]*?)\]         # in usual manner, e.g. [#author][] rather than
2336                 [ ]?                            # [][#author]
2337                 (?:\n[ ]*)?
2338                 \[\s*\]
2339         }{
2340                 "[][#$1]";
2341         }xsge;
2342
2343         $text =~ s{
2344                 \[([^\[]*?)\]           # citation text = $1
2345                 [ ]?                    # one optional space
2346                 (?:\n[ ]*)?             # one optional newline followed by spaces
2347                 \[\#(.*?)\]             # id = $2
2348         }{
2349                 my $result;
2350                 my $anchor_text = $1;
2351                 my $id = $2;
2352                 my $count;
2353
2354                 # implement equivalent to \citet
2355                 my $textual_string = "";
2356                 if ($anchor_text =~ s/^(.*?);\s*//) {
2357                         $textual_string = "<span class=\"textual citation\">$1</span>";
2358                 }
2359
2360                 if (defined $g_references{$id} ) {
2361                         my $citation_counter=0;
2362
2363                         # See if citation has been used before
2364                         foreach my $old_id (@g_used_references) {
2365                                 $citation_counter++;
2366                                 $count = $citation_counter if ($old_id eq $id);
2367                         }
2368
2369                         if (! defined $count) {
2370                                 $g_citation_counter++;
2371                                 $count = $g_citation_counter;
2372                                 push (@g_used_references,$id);
2373                         }
2374
2375                         $result = "<span class=\"markdowncitation\">$textual_string (<a href=\"#$id\">$count</a>";
2376
2377                         if ($anchor_text ne "") {
2378                                 $result .=", <span class=\"locator\">$anchor_text</span>";
2379                         }
2380
2381                         $result .= ")</span>";
2382                 } else {
2383                         # No reference exists
2384                         $result = "<span class=\"externalcitation\">$textual_string (<a id=\"$id\">$id</a>";
2385
2386                         if ($anchor_text ne "") {
2387                                 $result .=", <span class=\"locator\">$anchor_text</span>";
2388                         }
2389
2390                         $result .= ")</span>";
2391                 }
2392
2393                 if (Header2Label($anchor_text) eq "notcited"){
2394                         $result = "<span class=\"notcited\" id=\"$id\"/>";
2395                 }
2396                 $result;
2397         }xsge;
2398
2399         return $text;
2400
2401 }
2402
2403 sub _PrintMarkdownBibliography{
2404         my $citation_counter = 0;
2405         my $result;
2406
2407         foreach my $id (@g_used_references) {
2408                 $citation_counter++;
2409                 $result.="<div id=\"$id\"><p>[$citation_counter] <span class=\"item\">$g_references{$id}</span></p></div>\n\n";
2410         }
2411         $result .= "</div>";
2412
2413         if ($citation_counter > 0) {
2414                 $result = "\n\n<div class=\"bibliography\">\n<hr$g_empty_element_suffix\n<p>$g_bibliography_title</p>\n\n".$result;
2415         } else {
2416                 $result = "";
2417         }
2418
2419         return $result;
2420 }
2421
2422 sub _GenerateImageCrossRefs {
2423         my $text = shift;
2424
2425         #
2426         # First, handle reference-style labeled images: ![alt text][id]
2427         #
2428         $text =~ s{
2429                 (                               # wrap whole match in $1
2430                   !\[
2431                     (.*?)               # alt text = $2
2432                   \]
2433
2434                   [ ]?                          # one optional space
2435                   (?:\n[ ]*)?           # one optional newline followed by spaces
2436
2437                   \[
2438                     (.*?)               # id = $3
2439                   \]
2440
2441                 )
2442         }{
2443                 my $result;
2444                 my $whole_match = $1;
2445                 my $alt_text    = $2;
2446                 my $link_id     = lc $3;
2447
2448                 if ($link_id eq "") {
2449                         $link_id = lc $alt_text;     # for shortcut links like ![this][].
2450                 }
2451
2452                 $alt_text =~ s/"/&quot;/g;
2453                 if (defined $g_urls{$link_id}) {
2454                         my $label = Header2Label($alt_text);
2455                         $g_crossrefs{$label} = "#$label";
2456                 }
2457                 else {
2458                         # If there's no such link ID, leave intact:
2459                         $result = $whole_match;
2460                 }
2461
2462                 $whole_match;
2463         }xsge;
2464
2465         #
2466         # Next, handle inline images:  ![alt text](url "optional title")
2467         # Don't forget: encode * and _
2468
2469         $text =~ s{
2470                 (                               # wrap whole match in $1
2471                   !\[
2472                     (.*?)               # alt text = $2
2473                   \]
2474                   \(                    # literal paren
2475                         [ \t]*
2476                         <?(\S+?)>?      # src url = $3
2477                         [ \t]*
2478                         (                       # $4
2479                           (['"])        # quote char = $5 '
2480                           (.*?)         # title = $6
2481                           \5            # matching quote
2482                           [ \t]*
2483                         )?                      # title is optional
2484                   \)
2485                 )
2486         }{
2487                 my $result;
2488                 my $whole_match = $1;
2489                 my $alt_text    = $2;
2490
2491                 $alt_text =~ s/"/&quot;/g;
2492                 my $label = Header2Label($alt_text);
2493                 $g_crossrefs{$label} = "#$label";
2494                 $whole_match;
2495         }xsge;
2496
2497         return $text;
2498 }
2499
2500 sub _FindMathEquations{
2501         my $text = shift;
2502
2503         $text =~ s{
2504                 (\<math[^\>]*)id=\"(.*?)\">     # "
2505         }{
2506                 my $label = Header2Label($2);
2507                 my $header = _RunSpanGamut($2);
2508
2509                 $g_crossrefs{$label} = "#$label";
2510                 $g_titles{$label} = $header;
2511
2512                 $1 . "id=\"$label\">";
2513         }xsge;
2514
2515         return $text;
2516 }
2517
2518 sub _DoMathSpans {
2519         # Based on Gruber's _DoCodeSpans
2520
2521         my $text = shift;
2522         my $display_as_block = 0;
2523         $display_as_block = 1 if ($text =~ /^<<[^\>\>]*>>$/);
2524
2525         $text =~ s{
2526                         (?<!\\)         # Character before opening << can't be a backslash
2527                         (<<)            # $1 = Opening
2528                         (.+?)           # $2 = The code block
2529                         (?:\[(.+)\])?   # $3 = optional label
2530                         (>>)
2531                 }{
2532                         my $m = "$2";
2533                         my $label = "";
2534                         my @attr = (xmlns=>"http://www.w3.org/1998/Math/MathML");
2535
2536                         if (defined $3) {
2537                                 $label = Header2Label($3);
2538                                 my $header = _RunSpanGamut($3);
2539
2540                                 $g_crossrefs{$label} = "#$label";
2541                                 $g_titles{$label} = $header;
2542                         }
2543                         $m =~ s/^[ \t]*//g; # leading whitespace
2544                         $m =~ s/[ \t]*$//g; # trailing whitespace
2545                         push(@attr,(id=>"$label")) if ($label ne "");
2546                         push(@attr,(display=>"block")) if ($display_as_block == 1);
2547
2548                         $m = $mathParser->TextToMathML($m,\@attr);
2549                         "$m";
2550                 }egsx;
2551
2552         return $text;
2553 }
2554
2555 sub _DoDefinitionLists {
2556         # Uses the syntax proposed by Michel Fortin in PHP Markdown Extra
2557
2558         my $text = shift;
2559         my $less_than_tab = $g_tab_width -1;
2560
2561         my $line_start = qr{
2562                 [ ]{0,$less_than_tab}
2563         }mx;
2564
2565         my $term = qr{
2566                 $line_start
2567                 [^:\s][^\n]*\n
2568         }sx;
2569
2570         my $definition = qr{
2571                 \n?[ ]{0,$less_than_tab}
2572                 \:[ \t]+(.*?)\n
2573                 ((?=\n*[ ]{0,$less_than_tab}\S)|\n\n|\Z)        # Lookahead for non-space at line-start,
2574                                                                                                         # two returns, or end of doc
2575         }sx;
2576
2577         my $definition_block = qr{
2578                 ((?:$term)+)                            # $1 = one or more terms
2579                 ((?:$definition)+)                      # $2 = by one or more definitions
2580         }sx;
2581
2582         my $definition_list = qr{
2583                 (?:$definition_block\n*)+               # One ore more definition blocks
2584         }sx;
2585
2586         $text =~ s{
2587                 ($definition_list)                      # $1 = the whole list
2588         }{
2589                 my $list = $1;
2590                 my $result = $1;
2591
2592                 $list =~ s{
2593                         (?:$definition_block)\n*
2594                 }{
2595                         my $terms = $1;
2596                         my $defs = $2;
2597
2598                         $terms =~ s{
2599                                 [ ]{0,$less_than_tab}
2600                                 (.*)
2601                                 \s*
2602                         }{
2603                                 my $term = $1;
2604                                 my $result = "";
2605                                 $term =~ s/^\s*(.*?)\s*$/$1/;
2606                                 if ($term !~ /^\s*$/){
2607                                         $result = "<dt>" . _RunSpanGamut($1) . "</dt>\n";
2608                                 }
2609                                 $result;
2610                         }xmge;
2611
2612                         $defs =~ s{
2613                                 $definition
2614                         }{
2615                                 my $def = $1 . "\n";
2616                                 $def =~ s/^[ ]{0,$g_tab_width}//gm;
2617                                 "<dd>\n" . _RunBlockGamut($def) . "\n</dd>\n";
2618                         }xsge;
2619
2620                         $terms . $defs . "\n";
2621                 }xsge;
2622
2623                 "<dl>\n" . $list . "</dl>\n\n";
2624         }xsge;
2625
2626         return $text
2627 }
2628
2629 sub _UnescapeComments{
2630         # Remove encoding inside comments
2631         # Based on proposal by Toras Doran (author of Text::MultiMarkdown)
2632
2633         my $text = shift;
2634         $text =~ s{
2635                 (?<=<!--) # Begin comment
2636                 (.*?)     # Anything inside
2637                 (?=-->)   # End comments
2638         }{
2639                 my $t = $1;
2640                 $t =~ s/&amp;/&/g;
2641                 $t =~ s/&lt;/</g;
2642                 $t;
2643         }egsx;
2644
2645         return $text;
2646 }
2647
2648 1;
2649
2650 __END__
2651
2652
2653 =pod
2654
2655 =head1 NAME
2656
2657 B<MultiMarkdown>
2658
2659
2660 =head1 SYNOPSIS
2661
2662 B<MultiMarkdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
2663     [ I<file> ... ]
2664
2665
2666 =head1 DESCRIPTION
2667
2668 Markdown is a text-to-HTML filter; it translates an easy-to-read /
2669 easy-to-write structured text format into HTML. Markdown's text format
2670 is most similar to that of plain text email, and supports features such
2671 as headers, *emphasis*, code blocks, blockquotes, and links.
2672
2673 Markdown's syntax is designed not as a generic markup language, but
2674 specifically to serve as a front-end to (X)HTML. You can  use span-level
2675 HTML tags anywhere in a Markdown document, and you can use block level
2676 HTML tags (like <div> and <table> as well).
2677
2678 For more information about Markdown's syntax, see:
2679
2680     http://daringfireball.net/projects/markdown/
2681
2682
2683 =head1 OPTIONS
2684
2685 Use "--" to end switch parsing. For example, to open a file named "-z", use:
2686
2687         Markdown.pl -- -z
2688
2689 =over 4
2690
2691
2692 =item B<--html4tags>
2693
2694 Use HTML 4 style for empty element tags, e.g.:
2695
2696     <br>
2697
2698 instead of Markdown's default XHTML style tags, e.g.:
2699
2700     <br />
2701
2702
2703 =item B<-v>, B<--version>
2704
2705 Display Markdown's version number and copyright information.
2706
2707
2708 =item B<-s>, B<--shortversion>
2709
2710 Display the short-form version number.
2711
2712
2713 =back
2714
2715
2716
2717 =head1 BUGS
2718
2719 To file bug reports or feature requests (other than topics listed in the
2720 Caveats section above) please send email to:
2721
2722     support@daringfireball.net (for Markdown issues)
2723
2724     fletcher@fletcherpenney.net (for MultiMarkdown issues)
2725
2726 Please include with your report: (1) the example input; (2) the output
2727 you expected; (3) the output Markdown actually produced.
2728
2729
2730 =head1 VERSION HISTORY
2731
2732 See the readme file for detailed release notes for this version.
2733
2734 1.0.2b8 - Wed 09 May 2007
2735
2736         +       Fixed bug with nested raw HTML tags that contained
2737                 attributes. The problem is that it uses a backreference in
2738                 the expression that it passes to gen_extract_tagged, which
2739                 is broken when Text::Balanced wraps it in parentheses.
2740
2741                 Thanks to Matt Kraai for the patch.
2742
2743         +       Now supports URLs containing literal parentheses, such as:
2744
2745                         http://en.wikipedia.org/wiki/WIMP_(computing)
2746
2747                 Such parentheses may be arbitrarily nested, but must be
2748                 balanced.
2749
2750
2751 1.0.2b7
2752
2753         +       Changed shebang line from "/usr/bin/perl" to "/usr/bin/env perl"
2754
2755         +       Now only trim trailing newlines from code blocks, instead of trimming
2756                 all trailing whitespace characters.
2757
2758
2759 1.0.2b6 - Mon 03 Apr 2006
2760
2761         +       Fixed bad performance bug in new `Text::Balanced`-based block-level parser.
2762
2763
2764 1.0.2b5 - Thu 08 Dec 2005
2765
2766         +       Fixed bug where this:
2767
2768                         [text](http://m.com "title" )
2769
2770                 wasn't working as expected, because the parser wasn't allowing for spaces
2771                 before the closing paren.
2772
2773
2774 1.0.2b4 - Thu 08 Sep 2005
2775
2776         +       Filthy hack to support markdown='1' in div tags, because I need it
2777                 to write today's fireball.
2778
2779         +       First crack at a new, smarter, block-level HTML parser.
2780
2781 1.0.2b3 - Thu 28 Apr 2005
2782
2783         +       _DoAutoLinks() now supports the 'dict://' URL scheme.
2784
2785         +       PHP- and ASP-style processor instructions are now protected as
2786                 raw HTML blocks.
2787
2788                         <? ... ?>
2789                         <% ... %>
2790
2791         +       Workarounds for regressions introduced with fix for "backticks within
2792                 tags" bug in 1.0.2b1. The fix is to allow `...` to be turned into
2793                 <code>...</code> within an HTML tag attribute, and then to turn
2794                 these spurious `<code>` tags back into literal backtick characters
2795                 in _EscapeSpecialCharsWithinTagAttributes().
2796
2797                 The regression was caused because in the fix, we moved
2798                 _EscapeSpecialCharsWithinTagAttributes() ahead of _DoCodeSpans()
2799                 in _RunSpanGamut(), but that's no good. We need to process code
2800                 spans first, otherwise we can get tripped up by something like this:
2801
2802                         `<test a="` content of attribute `">`
2803
2804
2805 1.0.2b2 - 20 Mar 2005
2806
2807         +       Fix for nested sub-lists in list-paragraph mode. Previously we got
2808                 a spurious extra level of `<p>` tags for something like this:
2809
2810                         *       this
2811
2812                                 *       sub
2813
2814                                 that
2815
2816         +       Experimental support for [this] as a synonym for [this][].
2817                 (Note to self: No test yet for this.)
2818                 Be sure to test, e.g.: [permutations of this sort of [thing][].]
2819
2820
2821 1.0.2b1 - 28  Feb 2005
2822
2823         +       Fix for backticks within HTML tag: <span attr='`ticks`'>like this</span>
2824
2825         +       Fix for escaped backticks still triggering code spans:
2826
2827                         There are two raw backticks here: \` and here: \`, not a code span
2828
2829 1.0.1 - 14 Dec 2004
2830
2831 1.0 - 28 Aug 2004
2832
2833
2834 =head1 AUTHOR
2835
2836     John Gruber
2837     http://daringfireball.net/
2838
2839     PHP port and other contributions by Michel Fortin
2840     http://michelf.com/
2841
2842     MultiMarkdown changes by Fletcher Penney
2843     http://fletcherpenney.net/
2844
2845 =head1 COPYRIGHT AND LICENSE
2846
2847 Original Markdown Code Copyright (c) 2003-2007 John Gruber
2848 <http://daringfireball.net/>
2849 All rights reserved.
2850
2851 MultiMarkdown changes Copyright (c) 2005-2007 Fletcher T. Penney
2852 <http://fletcherpenney.net/>
2853 All rights reserved.
2854
2855 Redistribution and use in source and binary forms, with or without
2856 modification, are permitted provided that the following conditions are
2857 met:
2858
2859 * Redistributions of source code must retain the above copyright notice,
2860   this list of conditions and the following disclaimer.
2861
2862 * Redistributions in binary form must reproduce the above copyright
2863   notice, this list of conditions and the following disclaimer in the
2864   documentation and/or other materials provided with the distribution.
2865
2866 * Neither the name "Markdown" nor the names of its contributors may
2867   be used to endorse or promote products derived from this software
2868   without specific prior written permission.
2869
2870 This software is provided by the copyright holders and contributors "as
2871 is" and any express or implied warranties, including, but not limited
2872 to, the implied warranties of merchantability and fitness for a
2873 particular purpose are disclaimed. In no event shall the copyright owner
2874 or contributors be liable for any direct, indirect, incidental, special,
2875 exemplary, or consequential damages (including, but not limited to,
2876 procurement of substitute goods or services; loss of use, data, or
2877 profits; or business interruption) however caused and on any theory of
2878 liability, whether in contract, strict liability, or tort (including
2879 negligence or otherwise) arising in any way out of the use of this
2880 software, even if advised of the possibility of such damage.
2881
2882 =cut
2883
2884
2885 Possibilities for 'THE'
2886
2887 $TE     239
2888 THE     192
2889 $RE     143
2890 INE     142
2891 BLE     131
2892 ODE     98
2893 ABE     96
2894 TLE     90
2895 OTE     87
2896 APE     86
2897 G_E     79
2898 USE     79
2899 ADE     72
2900  RE     71
2901 ITE     65
2902  NE     60
2903  BE     55
2904  WE     52
2905         RE      49
2906  HE     47
2907 NTE     47
2908 NCE     46
2909 ACE     45
2910 _TE     45
2911  TE     44
2912 ORE     38
2913 LSE     37
2914  DE     36
2915 OLE     35
2916 ARE     34
2917 } E     33
2918 ATE     33
2919 OCE     31
2920 _ME     31
2921 (DE     31
2922  LE     31
2923 STE     29
2924  SE     29
2925 ERE     29
2926 ASE     28
2927 UTE     28
2928 $LE     26
2929 PRE     26
2930 ONE     26
2931 EME     25
2932 $KE     24
2933 RRE     24
2934 IRE     24
2935 SPE     24
2936 OKE     24
2937 YPE     22
2938 Y_E     21
2939 ILE     20
2940 TKE     20
2941 AGE     20
2942 $CE     19
2943 RKE     19
2944 $HE     19
2945 CHE     18
2946 NME     18
2947 SRE     18
2948 HRE     18
2949 CTE     18
2950 WHE     18
2951 SGE     17
2952 _NE     17
2953 IDE     17
2954 IKE     17
2955 $DE     17
2956  GE     16
2957 AME     16
2958         }E      16
2959  PE     15
2960 RSE     15
2961 # E     15
2962 R E     14
2963 YLE     14
2964  _E     14
2965 .NE     14
2966 URE     14
2967 OVE     14
2968 TTE     14
2969 _RE     14
2970 _HE     14
2971 'RE     13
2972 'VE     13
2973 PLE     13
2974 _LE     13
2975 OPE     13
2976 CRE     12
2977 CKE     12
2978 -LE     11
2979 CLE     11
2980 O E     11
2981 AVE     11
2982 NDE     11
2983 OWE     10
2984 S E     10
2985 ) E     10
2986 SHE     10
2987 IVE     10
2988 , E     10
2989  $E     10
2990 OME     10
2991 UME     10
2992                 E       10
2993  VE     10
2994 NGE     10
2995 AKE     9
2996 GLE     9
2997 LUE     9
2998 FLE     9
2999 UNE     9
3000 DLE     9
3001 DRE     8
3002 LLE     8
3003 OJE     8
3004 D E     8
3005 NNE     8
3006 MBE     8
3007 LTE     8
3008 RDE     8
3009 =HE     8
3010 OSE     8
3011  ME     7
3012 IME     7
3013 TRE     7
3014 -RE     7
3015 NVE     7
3016 T E     7
3017 QUE     6
3018 NSE     6
3019 UBE     6
3020 Y E     6
3021 NRE     6
3022 $ME     6
3023 AHE     6
3024 N E     6
3025  FE     6
3026 UDE     6
3027  KE     6
3028 RVE     5
3029 SSE     5
3030 PPE     5
3031 $VE     5
3032 TIE     5
3033  CE     5
3034 FTE     5
3035 ($E     5
3036 LDE     5
3037 IZE     5
3038 TDE     4
3039 RPE     4
3040 :TE     4
3041 RIE     4
3042 ICE     4
3043 NLE     4
3044 //E     4
3045 GGE     4
3046 _KE     4
3047 MME     4
3048 DDE     4
3049 GRE     4
3050 RTE     4
3051 TVE     4
3052 H E     3
3053 _DE     3
3054 /PE     3
3055 MPE     3
3056 IXE     3
3057 FRE     3
3058 TWE     3
3059 XTE     3
3060 BBE     3
3061 N_E     3
3062 KDE     3
3063         BE      3
3064 {<E     3
3065 XPE     3
3066 YBE     3
3067 UCE     3
3068 ISE     3
3069 'TE     3
3070 RCE     3
3071 :VE     3
3072 BRE     3
3073 . E     3
3074 </E     3
3075  ZE     2
3076 DGE     2
3077 TME     2
3078 DOE     2
3079 N/E     2
3080 -VE     2
3081 FFE     2
3082         NE      2
3083 <HE     2
3084 $SE     2
3085 KBE     2
3086 "TE     2
3087 }GE     2
3088 SUE     2
3089 LIE     2
3090  YE     2
3091 {DE     2
3092 F E     2
3093 XCE     2
3094         _E      2
3095 "LE     2
3096 "RE     2
3097         $E      2
3098 \"E     2
3099 OHE     2
3100 SIE     2
3101 BSE     2
3102 : E     2
3103 ->E     2
3104 G E     2
3105 "CE     2
3106 {RE     2
3107 X2E     2
3108 RGE     2
3109 LME     2
3110 E E     2
3111 IGE     2
3112 ULE     2
3113 _GE     2
3114         DE      2
3115 $NE     2
3116  *E     2
3117         GE      2
3118 EXE     2
3119 U E     1
3120 MGE     1
3121   E     1
3122 {KE     1
3123 (GE     1
3124 "$E     1
3125 - E     1
3126 -ME     1
3127 RFE     1
3128 .}E     1
3129 (HE     1
3130 L E     1
3131 FIE     1
3132 XSE     1
3133 "ME     1
3134 [$E     1
3135 O-E     1
3136 SEE     1
3137 /HE     1
3138 DSE     1
3139 [TE     1
3140 D-E     1
3141 <ME     1
3142 MTE     1
3143 DIE     1
3144 T-E     1
3145 >TE     1
3146 TNE     1
3147 .PE     1
3148 CPE     1
3149 -DE     1
3150 O@E     1
3151 WNE     1
3152 IPE     1
3153 RNE     1
3154 -PE     1
3155 GUE     1
3156 [PE     1
3157 /DE     1
3158 7UE     1
3159 GNE     1
3160 `TE     1
3161         @E      1
3162 RUE     1
3163  @E     1
3164 (SE     1
3165 (RE     1
3166 +       E       1
3167  (E     1
3168 LRE     1
3169 DME     1
3170 __E     1
3171 KRE     1
3172 ALE     1
3173 LHE     1
3174 <TE     1
3175 = E     1
3176 (VE     1
3177 X E     1
3178 OZE     1
3179 " E     1
3180 -TE     1
3181 ] E     1
3182 ; E     1