new link change detection method and general code rework
[ikiwiki] / IkiWiki / Render.pm
1 #!/usr/bin/perl
2
3 package IkiWiki;
4
5 use warnings;
6 use strict;
7 use IkiWiki;
8 use Encode;
9
10 my %backlinks;
11 our %brokenlinks;
12 my $links_calculated=0;
13
14 sub calculate_links () {
15         return if $links_calculated;
16         %backlinks=%brokenlinks=();
17         foreach my $page (keys %links) {
18                 foreach my $link (@{$links{$page}}) {
19                         my $bestlink=bestlink($page, $link);
20                         if (length $bestlink) {
21                                 $backlinks{$bestlink}{$page}=1
22                                         if $bestlink ne $page;
23                         }
24                         else {
25                                 push @{$brokenlinks{$link}}, $page;
26                         }
27                 }
28         }
29         $links_calculated=1;
30 }
31
32 sub backlink_pages ($) {
33         my $page=shift;
34
35         calculate_links();
36
37         return keys %{$backlinks{$page}};
38 }
39
40 sub backlinks ($) {
41         my $page=shift;
42
43         my @links;
44         foreach my $p (backlink_pages($page)) {
45                 my $href=urlto($p, $page);
46                 
47                 # Trim common dir prefixes from both pages.
48                 my $p_trimmed=$p;
49                 my $page_trimmed=$page;
50                 my $dir;
51                 1 while (($dir)=$page_trimmed=~m!^([^/]+/)!) &&
52                         defined $dir &&
53                         $p_trimmed=~s/^\Q$dir\E// &&
54                         $page_trimmed=~s/^\Q$dir\E//;
55                                
56                 push @links, { url => $href, page => pagetitle($p_trimmed) };
57         }
58         return @links;
59 }
60
61 sub genpage ($$) {
62         my $page=shift;
63         my $content=shift;
64
65         my $templatefile;
66         run_hooks(templatefile => sub {
67                 return if defined $templatefile;
68                 my $file=shift->(page => $page);
69                 if (defined $file && defined template_file($file)) {
70                         $templatefile=$file;
71                 }
72         });
73         my $template=template(defined $templatefile ? $templatefile : 'page.tmpl', blind_cache => 1);
74         my $actions=0;
75
76         if (length $config{cgiurl}) {
77                 $template->param(editurl => cgiurl(do => "edit", page => $page))
78                         if IkiWiki->can("cgi_editpage");
79                 $template->param(prefsurl => cgiurl(do => "prefs"))
80                         if exists $hooks{auth};
81                 $actions++;
82         }
83                 
84         if (defined $config{historyurl} && length $config{historyurl}) {
85                 my $u=$config{historyurl};
86                 $u=~s/\[\[file\]\]/$pagesources{$page}/g;
87                 $template->param(historyurl => $u);
88                 $actions++;
89         }
90         if ($config{discussion}) {
91                 if ($page !~ /.*\/\Q$config{discussionpage}\E$/ &&
92                    (length $config{cgiurl} ||
93                     exists $links{$page."/".$config{discussionpage}})) {
94                         $template->param(discussionlink => htmllink($page, $page, $config{discussionpage}, noimageinline => 1, forcesubpage => 1));
95                         $actions++;
96                 }
97         }
98
99         if ($actions) {
100                 $template->param(have_actions => 1);
101         }
102
103         my @backlinks=sort { $a->{page} cmp $b->{page} } backlinks($page);
104         my ($backlinks, $more_backlinks);
105         if (@backlinks <= $config{numbacklinks} || ! $config{numbacklinks}) {
106                 $backlinks=\@backlinks;
107                 $more_backlinks=[];
108         }
109         else {
110                 $backlinks=[@backlinks[0..$config{numbacklinks}-1]];
111                 $more_backlinks=[@backlinks[$config{numbacklinks}..$#backlinks]];
112         }
113
114         $template->param(
115                 title => $page eq 'index' 
116                         ? $config{wikiname} 
117                         : pagetitle(basename($page)),
118                 wikiname => $config{wikiname},
119                 content => $content,
120                 backlinks => $backlinks,
121                 more_backlinks => $more_backlinks,
122                 mtime => displaytime($pagemtime{$page}),
123                 ctime => displaytime($pagectime{$page}),
124                 baseurl => baseurl($page),
125         );
126
127         run_hooks(pagetemplate => sub {
128                 shift->(page => $page, destpage => $page, template => $template);
129         });
130         
131         $content=$template->output;
132         
133         run_hooks(postscan => sub {
134                 shift->(page => $page, content => $content);
135         });
136
137         run_hooks(format => sub {
138                 $content=shift->(
139                         page => $page,
140                         content => $content,
141                 );
142         });
143
144         return $content;
145 }
146
147 sub scan ($) {
148         my $file=shift;
149
150         my $type=pagetype($file);
151         if (defined $type) {
152                 my $srcfile=srcfile($file);
153                 my $content=readfile($srcfile);
154                 my $page=pagename($file);
155                 will_render($page, htmlpage($page), 1);
156
157                 if ($config{discussion}) {
158                         # Discussion links are a special case since they're
159                         # not in the text of the page, but on its template.
160                         $links{$page}=[ $page."/".lc($config{discussionpage}) ];
161                 }
162                 else {
163                         $links{$page}=[];
164                 }
165
166                 run_hooks(scan => sub {
167                         shift->(
168                                 page => $page,
169                                 content => $content,
170                         );
171                 });
172
173                 # Preprocess in scan-only mode.
174                 preprocess($page, $page, $content, 1);
175         }
176         else {
177                 will_render($file, $file, 1);
178         }
179 }
180
181 sub fast_file_copy (@) {
182         my $srcfile=shift;
183         my $destfile=shift;
184         my $srcfd=shift;
185         my $destfd=shift;
186         my $cleanup=shift;
187
188         my $blksize = 16384;
189         my ($len, $buf, $written);
190         while ($len = sysread $srcfd, $buf, $blksize) {
191                 if (! defined $len) {
192                         next if $! =~ /^Interrupted/;
193                         error("failed to read $srcfile: $!", $cleanup);
194                 }
195                 my $offset = 0;
196                 while ($len) {
197                         defined($written = syswrite $destfd, $buf, $len, $offset)
198                                 or error("failed to write $destfile: $!", $cleanup);
199                         $len -= $written;
200                         $offset += $written;
201                 }
202         }
203 }
204
205 sub render ($) {
206         my $file=shift;
207         
208         my $type=pagetype($file);
209         my $srcfile=srcfile($file);
210         if (defined $type) {
211                 my $page=pagename($file);
212                 delete $depends{$page};
213                 delete $depends_simple{$page};
214                 will_render($page, htmlpage($page), 1);
215                 return if $type=~/^_/;
216                 
217                 my $content=htmlize($page, $page, $type,
218                         linkify($page, $page,
219                         preprocess($page, $page,
220                         filter($page, $page,
221                         readfile($srcfile)))));
222                 
223                 my $output=htmlpage($page);
224                 writefile($output, $config{destdir}, genpage($page, $content));
225         }
226         else {
227                 delete $depends{$file};
228                 delete $depends_simple{$file};
229                 will_render($file, $file, 1);
230                 
231                 if ($config{hardlink}) {
232                         # only hardlink if owned by same user
233                         my @stat=stat($srcfile);
234                         if ($stat[4] == $>) {
235                                 prep_writefile($file, $config{destdir});
236                                 unlink($config{destdir}."/".$file);
237                                 if (link($srcfile, $config{destdir}."/".$file)) {
238                                         return;
239                                 }
240                         }
241                         # if hardlink fails, fall back to copying
242                 }
243                 
244                 my $srcfd=readfile($srcfile, 1, 1);
245                 writefile($file, $config{destdir}, undef, 1, sub {
246                         fast_file_copy($srcfile, $file, $srcfd, @_);
247                 });
248         }
249 }
250
251 sub prune ($) {
252         my $file=shift;
253
254         unlink($file);
255         my $dir=dirname($file);
256         while (rmdir($dir)) {
257                 $dir=dirname($dir);
258         }
259 }
260
261 sub srcdir_check () {
262         # security check, avoid following symlinks in the srcdir path by default
263         my $test=$config{srcdir};
264         while (length $test) {
265                 if (-l $test && ! $config{allow_symlinks_before_srcdir}) {
266                         error(sprintf(gettext("symlink found in srcdir path (%s) -- set allow_symlinks_before_srcdir to allow this"), $test));
267                 }
268                 unless ($test=~s/\/+$//) {
269                         $test=dirname($test);
270                 }
271         }
272         
273 }
274
275 sub find_src_files () {
276         my (@files, %pages);
277         eval q{use File::Find};
278         error($@) if $@;
279         find({
280                 no_chdir => 1,
281                 wanted => sub {
282                         $_=decode_utf8($_);
283                         if (file_pruned($_, $config{srcdir})) {
284                                 $File::Find::prune=1;
285                         }
286                         elsif (! -l $_ && ! -d _) {
287                                 my ($f)=/$config{wiki_file_regexp}/; # untaint
288                                 if (! defined $f) {
289                                         warn(sprintf(gettext("skipping bad filename %s"), $_)."\n");
290                                 }
291                                 else {
292                                         $f=~s/^\Q$config{srcdir}\E\/?//;
293                                         push @files, $f;
294                                         my $pagename = pagename($f);
295                                         if ($pages{$pagename}) {
296                                                 debug(sprintf(gettext("%s has multiple possible source pages"), $pagename));
297                                         }
298                                         $pages{$pagename}=1;
299                                 }
300                         }
301                 },
302         }, $config{srcdir});
303         foreach my $dir (@{$config{underlaydirs}}, $config{underlaydir}) {
304                 find({
305                         no_chdir => 1,
306                         wanted => sub {
307                                 $_=decode_utf8($_);
308                                 if (file_pruned($_, $dir)) {
309                                         $File::Find::prune=1;
310                                 }
311                                 elsif (! -l $_ && ! -d _) {
312                                         my ($f)=/$config{wiki_file_regexp}/; # untaint
313                                         if (! defined $f) {
314                                                 warn(sprintf(gettext("skipping bad filename %s"), $_)."\n");
315                                         }
316                                         else {
317                                                 $f=~s/^\Q$dir\E\/?//;
318                                                 # avoid underlaydir
319                                                 # override attacks; see
320                                                 # security.mdwn
321                                                 if (! -l "$config{srcdir}/$f" && 
322                                                     ! -e _) {
323                                                         my $page=pagename($f);
324                                                         if (! $pages{$page}) {
325                                                                 push @files, $f;
326                                                                 $pages{$page}=1;
327                                                         }
328                                                 }
329                                         }
330                                 }
331                         },
332                 }, $dir);
333         };
334
335         # Returns a list of all source files found, and a hash of 
336         # the corresponding page names.
337         return \@files, \%pages;
338 }
339
340 sub refresh () {
341         srcdir_check();
342         run_hooks(refresh => sub { shift->() });
343         my ($files, $exists)=find_src_files();
344
345         my (%rendered, @add, @del, @internal, @internal_change);
346
347         # check for added or removed pages
348         foreach my $file (@$files) {
349                 my $page=pagename($file);
350                 if (exists $pagesources{$page} && $pagesources{$page} ne $file) {
351                         # the page has changed its type
352                         $forcerebuild{$page}=1;
353                 }
354                 $pagesources{$page}=$file;
355                 if (! $pagemtime{$page}) {
356                         if (isinternal($page)) {
357                                 push @internal, $file;
358                         }
359                         else {
360                                 push @add, $file;
361                                 if ($config{getctime} && -e "$config{srcdir}/$file") {
362                                         eval {
363                                                 my $time=rcs_getctime("$config{srcdir}/$file");
364                                                 $pagectime{$page}=$time;
365                                         };
366                                         if ($@) {
367                                                 print STDERR $@;
368                                         }
369                                 }
370                         }
371                         $pagecase{lc $page}=$page;
372                         if (! exists $pagectime{$page}) {
373                                 $pagectime{$page}=(srcfile_stat($file))[10];
374                         }
375                 }
376         }
377         foreach my $page (keys %pagemtime) {
378                 if (! $exists->{$page}) {
379                         if (isinternal($page)) {
380                                 push @internal, $pagesources{$page};
381                         }
382                         else {
383                                 debug(sprintf(gettext("removing old page %s"), $page));
384                                 push @del, $pagesources{$page};
385                         }
386                         $links{$page}=[];
387                         $renderedfiles{$page}=[];
388                         $pagemtime{$page}=0;
389                         foreach my $old (@{$oldrenderedfiles{$page}}) {
390                                 prune($config{destdir}."/".$old);
391                         }
392                         delete $pagesources{$page};
393                         foreach my $source (keys %destsources) {
394                                 if ($destsources{$source} eq $page) {
395                                         delete $destsources{$source};
396                                 }
397                         }
398                 }
399         }
400
401         # find changed and new files
402         my @needsbuild;
403         foreach my $file (@$files) {
404                 my $page=pagename($file);
405                 my ($srcfile, @stat)=srcfile_stat($file);
406                 if (! exists $pagemtime{$page} ||
407                     $stat[9] > $pagemtime{$page} ||
408                     $forcerebuild{$page}) {
409                         $pagemtime{$page}=$stat[9];
410
411                         if (isinternal($page)) {
412                                 # Preprocess internal page in scan-only mode.
413                                 preprocess($page, $page, readfile($srcfile), 1);
414                                 push @internal_change, $file;
415                         }
416                         else {
417                                 push @needsbuild, $file;
418                         }
419                 }
420         }
421         run_hooks(needsbuild => sub { shift->(\@needsbuild) });
422
423         # before scanning, make a note of where pages'
424         # old links pointed
425         my %oldlink_targets;
426         foreach my $file (@needsbuild, @del) {
427                 my $page=pagename($file);
428                 if (exists $oldlinks{$page}) {
429                         foreach my $l (@{$oldlinks{$page}}) {
430                                 $oldlink_targets{$page}{$l}=bestlink($page, $l);
431                         }
432                 }
433         }
434
435         # scan and render changed files
436         foreach my $file (@needsbuild) {
437                 debug(sprintf(gettext("scanning %s"), $file));
438                 scan($file);
439         }
440         calculate_links();
441         foreach my $file (@needsbuild) {
442                 debug(sprintf(gettext("building %s"), $file));
443                 render($file);
444                 $rendered{$file}=1;
445         }
446         foreach my $file (@internal, @internal_change) {
447                 # internal pages are not rendered
448                 my $page=pagename($file);
449                 delete $depends{$page};
450                 delete $depends_simple{$page};
451                 foreach my $old (@{$renderedfiles{$page}}) {
452                         delete $destsources{$old};
453                 }
454                 $renderedfiles{$page}=[];
455         }
456         
457         # rebuild pages that link to added or removed pages
458         if (@add || @del) {
459                 foreach my $f (@add, @del) {
460                         my $p=pagename($f);
461                         foreach my $page (keys %{$backlinks{$p}}) {
462                                 my $file=$pagesources{$page};
463                                 next if $rendered{$file};
464                                 debug(sprintf(gettext("building %s, which links to %s"), $file, $p));
465                                 render($file);
466                                 $rendered{$file}=1;
467                         }
468                 }
469         }
470         
471         # determine which links, on what pages, have changed
472         my %backlinkchanged;
473         my %linkchangers;
474         foreach my $file (@needsbuild, @del) {
475                 my $page=pagename($file);
476                 my %link_targets;
477                 if (exists $links{$page}) {
478                         foreach my $l (@{$links{$page}}) {
479                                 my $target=bestlink($page, $l);
480                                 if (! exists $oldlink_targets{$page}{$l} ||
481                                     $target ne $oldlink_targets{$page}{$l}) {
482                                         $backlinkchanged{$l}=1;
483                                         $linkchangers{lc($page)}=1;
484                                 }
485                                 delete $oldlink_targets{$page}{$l};
486                         }
487                 }
488                 if (exists $oldlink_targets{$page} &&
489                     %{$oldlink_targets{$page}}) {
490                         foreach my $target (keys %{$oldlink_targets{$page}}) {
491                                 $backlinkchanged{$target}=1;
492                         }
493                         $linkchangers{lc($page)}=1;
494                 }
495         }
496         %oldlink_targets=();
497                 
498         # rebuild dependant pages, recursively
499         my $deps=(@needsbuild || @del || @internal || @internal_change);
500         do {
501                 $deps=0;
502                 my @changed=(keys %rendered, @del);
503                 my @exists_changed=(@add, @del);
504         
505                 my %lc_changed = map { lc(pagename($_)) => 1 } @changed;
506                 my %lc_exists_changed = map { lc(pagename($_)) => 1 } @exists_changed;
507          
508                 foreach my $f (@$files) {
509                         next if $rendered{$f};
510                         my $p=pagename($f);
511                         my $reason = undef;
512         
513                         if (exists $depends_simple{$p}) {
514                                 foreach my $d (keys %{$depends_simple{$p}}) {
515                                         if (($depends_simple{$p}{$d} & $IkiWiki::DEPEND_CONTENT &&
516                                              $lc_changed{$d})
517                                             ||
518                                             ($depends_simple{$p}{$d} & $IkiWiki::DEPEND_PRESENCE &&
519                                              $lc_exists_changed{$d})
520                                             ||
521                                             ($depends_simple{$p}{$d} & $IkiWiki::DEPEND_LINKS &&
522                                              $linkchangers{$d})
523                                         ) {
524                                                 $reason = $d;
525                                                 last;
526                                         }
527                                 }
528                         }
529         
530                         if (exists $depends{$p} && ! defined $reason) {
531                                 D: foreach my $d (keys %{$depends{$p}}) {
532                                         my $sub=pagespec_translate($d);
533                                         next if $@ || ! defined $sub;
534
535                                         # only consider internal files
536                                         # if the page explicitly depends
537                                         # on such files
538                                         my $internal_dep=$d =~ /internal\(/;
539
540                                         my @candidates;
541                                         if ($depends{$p}{$d} & $IkiWiki::DEPEND_PRESENCE) {
542                                                 @candidates=@exists_changed;
543                                                 push @candidates, @internal
544                                                         if $internal_dep;
545                                         }
546                                         if (($depends{$p}{$d} & ($IkiWiki::DEPEND_CONTENT | $IkiWiki::DEPEND_LINKS))) {
547                                                 @candidates=@changed;
548                                                 push @candidates, @internal, @internal_change
549                                                         if $internal_dep;
550                                         }
551
552                                         foreach my $file (@candidates) {
553                                                 next if $file eq $f;
554                                                 my $page=pagename($file);
555                                                 if ($sub->($page, location => $p)) {
556                                                         if ($depends{$p}{$d} & $IkiWiki::DEPEND_LINKS) {
557                                                                 next unless $linkchangers{lc($page)};
558                                                         }
559                                                         $reason = $page;
560                                                         last D;
561                                                 }
562                                         }
563                                 }
564                         }
565         
566                         if (defined $reason) {
567                                 debug(sprintf(gettext("building %s, which depends on %s"), $f, $reason));
568                                 render($f);
569                                 $rendered{$f}=1;
570                                 $deps=1;
571                                 last;
572                         }
573                 }
574         } while $deps;
575
576         # update backlinks
577         foreach my $link (keys %backlinkchanged) {
578                 my $linkfile=$pagesources{$link};
579                 if (defined $linkfile) {
580                         next if $rendered{$linkfile};
581                         debug(sprintf(gettext("building %s, to update its backlinks"), $linkfile));
582                         render($linkfile);
583                         $rendered{$linkfile}=1;
584                 }
585         }
586
587         # remove no longer rendered files
588         foreach my $src (keys %rendered) {
589                 my $page=pagename($src);
590                 foreach my $file (@{$oldrenderedfiles{$page}}) {
591                         if (! grep { $_ eq $file } @{$renderedfiles{$page}}) {
592                                 debug(sprintf(gettext("removing %s, no longer built by %s"), $file, $page));
593                                 prune($config{destdir}."/".$file);
594                         }
595                 }
596         }
597
598         if (@del) {
599                 run_hooks(delete => sub { shift->(@del) });
600         }
601         if (%rendered) {
602                 run_hooks(change => sub { shift->(keys %rendered) });
603         }
604 }
605
606 sub commandline_render () {
607         lockwiki();
608         loadindex();
609         unlockwiki();
610
611         my $srcfile=possibly_foolish_untaint($config{render});
612         my $file=$srcfile;
613         $file=~s/\Q$config{srcdir}\E\/?//;
614
615         my $type=pagetype($file);
616         die sprintf(gettext("ikiwiki: cannot build %s"), $srcfile)."\n" unless defined $type;
617         my $content=readfile($srcfile);
618         my $page=pagename($file);
619         $pagesources{$page}=$file;
620         $content=filter($page, $page, $content);
621         $content=preprocess($page, $page, $content);
622         $content=linkify($page, $page, $content);
623         $content=htmlize($page, $page, $type, $content);
624         $pagemtime{$page}=(stat($srcfile))[9];
625         $pagectime{$page}=$pagemtime{$page} if ! exists $pagectime{$page};
626
627         print genpage($page, $content);
628         exit 0;
629 }
630
631 1