htmlscrubber: Add a config setting that can be used to disable the scrubber acting...
[ikiwiki] / IkiWiki / Plugin / inline.pm
1 #!/usr/bin/perl
2 # Page inlining and blogging.
3 package IkiWiki::Plugin::inline;
4
5 use warnings;
6 use strict;
7 use Encode;
8 use IkiWiki 2.00;
9 use URI;
10
11 my %knownfeeds;
12 my %page_numfeeds;
13 my @inline;
14 my $nested=0;
15
16 sub import { #{{{
17         hook(type => "getopt", id => "inline", call => \&getopt);
18         hook(type => "getsetup", id => "inline", call => \&getsetup);
19         hook(type => "checkconfig", id => "inline", call => \&checkconfig);
20         hook(type => "sessioncgi", id => "inline", call => \&sessioncgi);
21         hook(type => "preprocess", id => "inline", 
22                 call => \&IkiWiki::preprocess_inline);
23         hook(type => "pagetemplate", id => "inline",
24                 call => \&IkiWiki::pagetemplate_inline);
25         hook(type => "format", id => "inline", call => \&format);
26         # Hook to change to do pinging since it's called late.
27         # This ensures each page only pings once and prevents slow
28         # pings interrupting page builds.
29         hook(type => "change", id => "inline", 
30                 call => \&IkiWiki::pingurl);
31 } # }}}
32
33 sub getopt () { #{{{
34         eval q{use Getopt::Long};
35         error($@) if $@;
36         Getopt::Long::Configure('pass_through');
37         GetOptions(
38                 "rss!" => \$config{rss},
39                 "atom!" => \$config{atom},
40                 "allowrss!" => \$config{allowrss},
41                 "allowatom!" => \$config{allowatom},
42                 "pingurl=s" => sub {
43                         push @{$config{pingurl}}, $_[1];
44                 },      
45         );
46 } #}}}
47
48 sub getsetup () { #{{{
49         return
50                 plugin => {
51                         safe => 1,
52                         rebuild => undef,
53                 },
54                 rss => {
55                         type => "boolean",
56                         example => 0,
57                         description => "enable rss feeds by default?",
58                         safe => 1,
59                         rebuild => 1,
60                 },
61                 atom => {
62                         type => "boolean",
63                         example => 0,
64                         description => "enable atom feeds by default?",
65                         safe => 1,
66                         rebuild => 1,
67                 },
68                 allowrss => {
69                         type => "boolean",
70                         example => 0,
71                         description => "allow rss feeds to be used?",
72                         safe => 1,
73                         rebuild => 1,
74                 },
75                 allowatom => {
76                         type => "boolean",
77                         example => 0,
78                         description => "allow atom feeds to be used?",
79                         safe => 1,
80                         rebuild => 1,
81                 },
82                 pingurl => {
83                         type => "string",
84                         example => "http://rpc.technorati.com/rpc/ping",
85                         description => "urls to ping (using XML-RPC) on feed update",
86                         safe => 1,
87                         rebuild => 0,
88                 },
89 } #}}}
90
91 sub checkconfig () { #{{{
92         if (($config{rss} || $config{atom}) && ! length $config{url}) {
93                 error(gettext("Must specify url to wiki with --url when using --rss or --atom"));
94         }
95         if ($config{rss}) {
96                 push @{$config{wiki_file_prune_regexps}}, qr/\.rss$/;
97         }
98         if ($config{atom}) {
99                 push @{$config{wiki_file_prune_regexps}}, qr/\.atom$/;
100         }
101         if (! exists $config{pingurl}) {
102                 $config{pingurl}=[];
103         }
104 } #}}}
105
106 sub format (@) { #{{{
107         my %params=@_;
108
109         # Fill in the inline content generated earlier. This is actually an
110         # optimisation.
111         $params{content}=~s{<div class="inline" id="([^"]+)"></div>}{
112                 delete @inline[$1,]
113         }eg;
114         return $params{content};
115 } #}}}
116
117 sub sessioncgi () { #{{{
118         my $q=shift;
119         my $session=shift;
120
121         if ($q->param('do') eq 'blog') {
122                 my $page=IkiWiki::titlepage(decode_utf8($q->param('title')));
123                 $page=~s/(\/)/"__".ord($1)."__"/eg; # don't create subdirs
124                 # if the page already exists, munge it to be unique
125                 my $from=$q->param('from');
126                 my $add="";
127                 while (exists $IkiWiki::pagecase{lc($from."/".$page.$add)}) {
128                         $add=1 unless length $add;
129                         $add++;
130                 }
131                 $q->param('page', $page.$add);
132                 # now go create the page
133                 $q->param('do', 'create');
134                 # make sure the editpage plugin in loaded
135                 if (IkiWiki->can("cgi_editpage")) {
136                         IkiWiki::cgi_editpage($q, $session);
137                 }
138                 else {
139                         error(gettext("page editing not allowed"));
140                 }
141                 exit;
142         }
143 }
144
145 # Back to ikiwiki namespace for the rest, this code is very much
146 # internal to ikiwiki even though it's separated into a plugin.
147 package IkiWiki;
148
149 my %toping;
150 my %feedlinks;
151
152 sub preprocess_inline (@) { #{{{
153         my %params=@_;
154         
155         if (! exists $params{pages}) {
156                 error gettext("missing pages parameter");
157         }
158         my $raw=yesno($params{raw});
159         my $archive=yesno($params{archive});
160         my $rss=(($config{rss} || $config{allowrss}) && exists $params{rss}) ? yesno($params{rss}) : $config{rss};
161         my $atom=(($config{atom} || $config{allowatom}) && exists $params{atom}) ? yesno($params{atom}) : $config{atom};
162         my $quick=exists $params{quick} ? yesno($params{quick}) : 0;
163         my $feeds=exists $params{feeds} ? yesno($params{feeds}) : !$quick;
164         my $feedonly=yesno($params{feedonly});
165         if (! exists $params{show} && ! $archive) {
166                 $params{show}=10;
167         }
168         if (! exists $params{feedshow} && exists $params{show}) {
169                 $params{feedshow}=$params{show};
170         }
171         my $desc;
172         if (exists $params{description}) {
173                 $desc = $params{description} 
174         }
175         else {
176                 $desc = $config{wikiname};
177         }
178         my $actions=yesno($params{actions});
179         if (exists $params{template}) {
180                 $params{template}=~s/[^-_a-zA-Z0-9]+//g;
181         }
182         else {
183                 $params{template} = $archive ? "archivepage" : "inlinepage";
184         }
185
186         my @list;
187         foreach my $page (keys %pagesources) {
188                 next if $page eq $params{page};
189                 if (pagespec_match($page, $params{pages}, location => $params{page})) {
190                         push @list, $page;
191                 }
192         }
193
194         if (exists $params{sort} && $params{sort} eq 'title') {
195                 @list=sort { pagetitle(basename($a)) cmp pagetitle(basename($b)) } @list;
196         }
197         elsif (exists $params{sort} && $params{sort} eq 'mtime') {
198                 @list=sort { $pagemtime{$b} <=> $pagemtime{$a} } @list;
199         }
200         elsif (! exists $params{sort} || $params{sort} eq 'age') {
201                 @list=sort { $pagectime{$b} <=> $pagectime{$a} } @list;
202         }
203         else {
204                 return sprintf(gettext("unknown sort type %s"), $params{sort});
205         }
206
207         if (yesno($params{reverse})) {
208                 @list=reverse(@list);
209         }
210
211         if (exists $params{skip}) {
212                 @list=@list[$params{skip} .. scalar @list - 1];
213         }
214         
215         my @feedlist;
216         if ($feeds) {
217                 if (exists $params{feedshow} &&
218                     $params{feedshow} && @list > $params{feedshow}) {
219                         @feedlist=@list[0..$params{feedshow} - 1];
220                 }
221                 else {
222                         @feedlist=@list;
223                 }
224         }
225         
226         if ($params{show} && @list > $params{show}) {
227                 @list=@list[0..$params{show} - 1];
228         }
229
230         add_depends($params{page}, $params{pages});
231         # Explicitly add all currently displayed pages as dependencies, so
232         # that if they are removed or otherwise changed, the inline will be
233         # sure to be updated.
234         add_depends($params{page}, join(" or ", $#list >= $#feedlist ? @list : @feedlist));
235
236         my $feednum="";
237
238         my $feedid=join("\0", map { $_."\0".$params{$_} } sort keys %params);
239         if (exists $knownfeeds{$feedid}) {
240                 $feednum=$knownfeeds{$feedid};
241         }
242         else {
243                 if (exists $page_numfeeds{$params{destpage}}) {
244                         if ($feeds) {
245                                 $feednum=$knownfeeds{$feedid}=++$page_numfeeds{$params{destpage}};
246                         }
247                 }
248                 else {
249                         $feednum=$knownfeeds{$feedid}="";
250                         if ($feeds) {
251                                 $page_numfeeds{$params{destpage}}=1;
252                         }
253                 }
254         }
255
256         my $rssurl=basename(rsspage($params{destpage}).$feednum) if $feeds && $rss;
257         my $atomurl=basename(atompage($params{destpage}).$feednum) if $feeds && $atom;
258         my $ret="";
259
260         if (length $config{cgiurl} && ! $params{preview} && (exists $params{rootpage} ||
261             (exists $params{postform} && yesno($params{postform}))) &&
262             IkiWiki->can("cgi_editpage")) {
263                 # Add a blog post form, with feed buttons.
264                 my $formtemplate=template("blogpost.tmpl", blind_cache => 1);
265                 $formtemplate->param(cgiurl => $config{cgiurl});
266                 my $rootpage;
267                 if (exists $params{rootpage}) {
268                         $rootpage=bestlink($params{page}, $params{rootpage});
269                 }
270                 else {
271                         $rootpage=$params{page};
272                 }
273                 $formtemplate->param(rootpage => $rootpage);
274                 $formtemplate->param(rssurl => $rssurl) if $feeds && $rss;
275                 $formtemplate->param(atomurl => $atomurl) if $feeds && $atom;
276                 if (exists $params{postformtext}) {
277                         $formtemplate->param(postformtext =>
278                                 $params{postformtext});
279                 }
280                 else {
281                         $formtemplate->param(postformtext =>
282                                 gettext("Add a new post titled:"));
283                 }
284                 $ret.=$formtemplate->output;
285         }
286         elsif ($feeds && !$params{preview}) {
287                 # Add feed buttons.
288                 my $linktemplate=template("feedlink.tmpl", blind_cache => 1);
289                 $linktemplate->param(rssurl => $rssurl) if $rss;
290                 $linktemplate->param(atomurl => $atomurl) if $atom;
291                 $ret.=$linktemplate->output;
292         }
293         
294         if (! $feedonly) {
295                 require HTML::Template;
296                 my @params=IkiWiki::template_params($params{template}.".tmpl", blind_cache => 1);
297                 if (! @params) {
298                         return sprintf(gettext("nonexistant template %s"), $params{template});
299                 }
300                 my $template=HTML::Template->new(@params) unless $raw;
301         
302                 foreach my $page (@list) {
303                         my $file = $pagesources{$page};
304                         my $type = pagetype($file);
305                         if (! $raw || ($raw && ! defined $type)) {
306                                 unless ($archive && $quick) {
307                                         # Get the content before populating the
308                                         # template, since getting the content uses
309                                         # the same template if inlines are nested.
310                                         my $content=get_inline_content($page, $params{destpage});
311                                         $template->param(content => $content);
312                                 }
313                                 $template->param(pageurl => urlto(bestlink($params{page}, $page), $params{destpage}));
314                                 $template->param(title => pagetitle(basename($page)));
315                                 $template->param(ctime => displaytime($pagectime{$page}, $params{timeformat}));
316                                 $template->param(first => 1) if $page eq $list[0];
317                                 $template->param(last => 1) if $page eq $list[$#list];
318         
319                                 if ($actions) {
320                                         my $file = $pagesources{$page};
321                                         my $type = pagetype($file);
322                                         if ($config{discussion}) {
323                                                 my $discussionlink=gettext("discussion");
324                                                 if ($page !~ /.*\/\Q$discussionlink\E$/ &&
325                                                     (length $config{cgiurl} ||
326                                                      exists $links{$page."/".$discussionlink})) {
327                                                         $template->param(have_actions => 1);
328                                                         $template->param(discussionlink =>
329                                                                 htmllink($page,
330                                                                         $params{destpage},
331                                                                         gettext("Discussion"),
332                                                                         noimageinline => 1,
333                                                                         forcesubpage => 1));
334                                                 }
335                                         }
336                                         if (length $config{cgiurl} && defined $type) {
337                                                 $template->param(have_actions => 1);
338                                                 $template->param(editurl => cgiurl(do => "edit", page => $page));
339                                         }
340                                 }
341         
342                                 run_hooks(pagetemplate => sub {
343                                         shift->(page => $page, destpage => $params{destpage},
344                                                 template => $template,);
345                                 });
346         
347                                 $ret.=$template->output;
348                                 $template->clear_params;
349                         }
350                         else {
351                                 if (defined $type) {
352                                         $ret.="\n".
353                                               linkify($page, $params{destpage},
354                                               preprocess($page, $params{destpage},
355                                               filter($page, $params{destpage},
356                                               readfile(srcfile($file)))));
357                                 }
358                         }
359                 }
360         }
361         
362         if ($feeds) {
363                 if (exists $params{feedpages}) {
364                         @feedlist=grep { pagespec_match($_, $params{feedpages}, location => $params{page}) } @feedlist;
365                 }
366         
367                 if ($rss) {
368                         my $rssp=rsspage($params{destpage}).$feednum;
369                         will_render($params{destpage}, $rssp);
370                         if (! $params{preview}) {
371                                 writefile($rssp, $config{destdir},
372                                         genfeed("rss",
373                                                 $config{url}."/".$rssp, $desc, $params{guid}, $params{destpage}, @feedlist));
374                                 $toping{$params{destpage}}=1 unless $config{rebuild};
375                                 $feedlinks{$params{destpage}}=qq{<link rel="alternate" type="application/rss+xml" title="RSS" href="$rssurl" />};
376                         }
377                 }
378                 if ($atom) {
379                         my $atomp=atompage($params{destpage}).$feednum;
380                         will_render($params{destpage}, $atomp);
381                         if (! $params{preview}) {
382                                 writefile($atomp, $config{destdir},
383                                         genfeed("atom", $config{url}."/".$atomp, $desc, $params{guid}, $params{destpage}, @feedlist));
384                                 $toping{$params{destpage}}=1 unless $config{rebuild};
385                                 $feedlinks{$params{destpage}}=qq{<link rel="alternate" type="application/atom+xml" title="Atom" href="$atomurl" />};
386                         }
387                 }
388         }
389         
390         return $ret if $raw || $nested;
391         push @inline, $ret;
392         return "<div class=\"inline\" id=\"$#inline\"></div>\n\n";
393 } #}}}
394
395 sub pagetemplate_inline (@) { #{{{
396         my %params=@_;
397         my $page=$params{page};
398         my $template=$params{template};
399
400         $template->param(feedlinks => $feedlinks{$page})
401                 if exists $feedlinks{$page} && $template->query(name => "feedlinks");
402 } #}}}
403
404 sub get_inline_content ($$) { #{{{
405         my $page=shift;
406         my $destpage=shift;
407         
408         my $file=$pagesources{$page};
409         my $type=pagetype($file);
410         if (defined $type) {
411                 $nested++;
412                 my $ret=htmlize($page, $destpage, $type,
413                        linkify($page, $destpage,
414                        preprocess($page, $destpage,
415                        filter($page, $destpage,
416                        readfile(srcfile($file))))));
417                 $nested--;
418                 return $ret;
419         }
420         else {
421                 return "";
422         }
423 } #}}}
424
425 sub date_822 ($) { #{{{
426         my $time=shift;
427
428         my $lc_time=POSIX::setlocale(&POSIX::LC_TIME);
429         POSIX::setlocale(&POSIX::LC_TIME, "C");
430         my $ret=POSIX::strftime("%a, %d %b %Y %H:%M:%S %z", localtime($time));
431         POSIX::setlocale(&POSIX::LC_TIME, $lc_time);
432         return $ret;
433 } #}}}
434
435 sub date_3339 ($) { #{{{
436         my $time=shift;
437
438         my $lc_time=POSIX::setlocale(&POSIX::LC_TIME);
439         POSIX::setlocale(&POSIX::LC_TIME, "C");
440         my $ret=POSIX::strftime("%Y-%m-%dT%H:%M:%SZ", gmtime($time));
441         POSIX::setlocale(&POSIX::LC_TIME, $lc_time);
442         return $ret;
443 } #}}}
444
445 sub absolute_urls ($$) { #{{{
446         # sucky sub because rss sucks
447         my $content=shift;
448         my $baseurl=shift;
449
450         my $url=$baseurl;
451         $url=~s/[^\/]+$//;
452
453         # what is the non path part of the url?
454         my $top_uri = URI->new($url);
455         $top_uri->path_query(""); # reset the path
456         my $urltop = $top_uri->as_string;
457
458         $content=~s/(<a(?:\s+(?:class|id)\s*="?\w+"?)?)\s+href=\s*"(#[^"]+)"/$1 href="$baseurl$2"/mig;
459         # relative to another wiki page
460         $content=~s/(<a(?:\s+(?:class|id)\s*="?\w+"?)?)\s+href=\s*"(?!\w+:)([^\/][^"]*)"/$1 href="$url$2"/mig;
461         $content=~s/(<img(?:\s+(?:class|id|width|height)\s*="?\w+"?)*)\s+src=\s*"(?!\w+:)([^\/][^"]*)"/$1 src="$url$2"/mig;
462         # relative to the top of the site
463         $content=~s/(<a(?:\s+(?:class|id)\s*="?\w+"?)?)\s+href=\s*"(?!\w+:)(\/[^"]*)"/$1 href="$urltop$2"/mig;
464         $content=~s/(<img(?:\s+(?:class|id|width|height)\s*="?\w+"?)*)\s+src=\s*"(?!\w+:)(\/[^"]*)"/$1 src="$urltop$2"/mig;
465         return $content;
466 } #}}}
467
468 sub rsspage ($) { #{{{
469         return targetpage(shift, "rss");
470 } #}}}
471
472 sub atompage ($) { #{{{
473         return targetpage(shift, "atom");
474 } #}}}
475
476 sub genfeed ($$$$$@) { #{{{
477         my $feedtype=shift;
478         my $feedurl=shift;
479         my $feeddesc=shift;
480         my $guid=shift;
481         my $page=shift;
482         my @pages=@_;
483         
484         my $url=URI->new(encode_utf8(urlto($page,"",1)));
485         
486         my $itemtemplate=template($feedtype."item.tmpl", blind_cache => 1);
487         my $content="";
488         my $lasttime = 0;
489         foreach my $p (@pages) {
490                 my $u=URI->new(encode_utf8(urlto($p, "", 1)));
491                 my $pcontent = absolute_urls(get_inline_content($p, $page), $url);
492
493                 $itemtemplate->param(
494                         title => pagetitle(basename($p)),
495                         url => $u,
496                         permalink => $u,
497                         cdate_822 => date_822($pagectime{$p}),
498                         mdate_822 => date_822($pagemtime{$p}),
499                         cdate_3339 => date_3339($pagectime{$p}),
500                         mdate_3339 => date_3339($pagemtime{$p}),
501                 );
502
503                 if (exists $pagestate{$p} &&
504                     exists $pagestate{$p}{meta}{guid}) {
505                         $itemtemplate->param(guid => $pagestate{$p}{meta}{guid});
506                 }
507
508                 if ($itemtemplate->query(name => "enclosure")) {
509                         my $file=$pagesources{$p};
510                         my $type=pagetype($file);
511                         if (defined $type) {
512                                 $itemtemplate->param(content => $pcontent);
513                         }
514                         else {
515                                 my $size=(srcfile_stat($file))[8];
516                                 my $mime="unknown";
517                                 eval q{use File::MimeInfo};
518                                 if (! $@) {
519                                         $mime = mimetype($file);
520                                 }
521                                 $itemtemplate->param(
522                                         enclosure => $u,
523                                         type => $mime,
524                                         length => $size,
525                                 );
526                         }
527                 }
528                 else {
529                         $itemtemplate->param(content => $pcontent);
530                 }
531
532                 run_hooks(pagetemplate => sub {
533                         shift->(page => $p, destpage => $page,
534                                 template => $itemtemplate);
535                 });
536
537                 $content.=$itemtemplate->output;
538                 $itemtemplate->clear_params;
539
540                 $lasttime = $pagemtime{$p} if $pagemtime{$p} > $lasttime;
541         }
542
543         my $template=template($feedtype."page.tmpl", blind_cache => 1);
544         $template->param(
545                 title => $page ne "index" ? pagetitle($page) : $config{wikiname},
546                 wikiname => $config{wikiname},
547                 pageurl => $url,
548                 content => $content,
549                 feeddesc => $feeddesc,
550                 guid => $guid,
551                 feeddate => date_3339($lasttime),
552                 feedurl => $feedurl,
553                 version => $IkiWiki::version,
554         );
555         run_hooks(pagetemplate => sub {
556                 shift->(page => $page, destpage => $page,
557                         template => $template);
558         });
559         
560         return $template->output;
561 } #}}}
562
563 sub pingurl (@) { #{{{
564         return unless @{$config{pingurl}} && %toping;
565
566         eval q{require RPC::XML::Client};
567         if ($@) {
568                 debug(gettext("RPC::XML::Client not found, not pinging"));
569                 return;
570         }
571
572         # daemonize here so slow pings don't slow down wiki updates
573         defined(my $pid = fork) or error("Can't fork: $!");
574         return if $pid;
575         chdir '/';
576         POSIX::setsid() or error("Can't start a new session: $!");
577         open STDIN, '/dev/null';
578         open STDOUT, '>/dev/null';
579         open STDERR, '>&STDOUT' or error("Can't dup stdout: $!");
580
581         # Don't need to keep a lock on the wiki as a daemon.
582         IkiWiki::unlockwiki();
583
584         foreach my $page (keys %toping) {
585                 my $title=pagetitle(basename($page), 0);
586                 my $url=urlto($page, "", 1);
587                 foreach my $pingurl (@{$config{pingurl}}) {
588                         debug("Pinging $pingurl for $page");
589                         eval {
590                                 my $client = RPC::XML::Client->new($pingurl);
591                                 my $req = RPC::XML::request->new('weblogUpdates.ping',
592                                         $title, $url);
593                                 my $res = $client->send_request($req);
594                                 if (! ref $res) {
595                                         error("Did not receive response to ping");
596                                 }
597                                 my $r=$res->value;
598                                 if (! exists $r->{flerror} || $r->{flerror}) {
599                                         error("Ping rejected: ".(exists $r->{message} ? $r->{message} : "[unknown reason]"));
600                                 }
601                         };
602                         if ($@) {
603                                 error "Ping failed: $@";
604                         }
605                 }
606         }
607
608         exit 0; # daemon done
609 } #}}}
610
611 1