meta: Generate meta description tags even when the html scrubber is enabled.
[ikiwiki] / IkiWiki / Plugin / meta.pm
1 #!/usr/bin/perl
2 # Ikiwiki metadata plugin.
3 package IkiWiki::Plugin::meta;
4
5 use warnings;
6 use strict;
7 use IkiWiki 3.00;
8
9 my %metaheaders;
10
11 sub import {
12         hook(type => "getsetup", id => "meta", call => \&getsetup);
13         hook(type => "needsbuild", id => "meta", call => \&needsbuild);
14         hook(type => "preprocess", id => "meta", call => \&preprocess, scan => 1);
15         hook(type => "pagetemplate", id => "meta", call => \&pagetemplate);
16 }
17
18 sub getsetup () {
19         return
20                 plugin => {
21                         safe => 1,
22                         rebuild => undef,
23                 },
24 }
25
26 sub needsbuild (@) {
27         my $needsbuild=shift;
28         foreach my $page (keys %pagestate) {
29                 if (exists $pagestate{$page}{meta}) {
30                         if (exists $pagesources{$page} &&
31                             grep { $_ eq $pagesources{$page} } @$needsbuild) {
32                                 # remove state, it will be re-added
33                                 # if the preprocessor directive is still
34                                 # there during the rebuild
35                                 delete $pagestate{$page}{meta};
36                         }
37                 }
38         }
39 }
40
41 sub scrub ($$) {
42         if (IkiWiki::Plugin::htmlscrubber->can("sanitize")) {
43                 return IkiWiki::Plugin::htmlscrubber::sanitize(
44                         content => shift, destpage => shift);
45         }
46         else {
47                 return shift;
48         }
49 }
50
51 sub safeurl ($) {
52         my $url=shift;
53         if (exists $IkiWiki::Plugin::htmlscrubber::{safe_url_regexp} &&
54             defined $IkiWiki::Plugin::htmlscrubber::safe_url_regexp) {
55                 return $url=~/$IkiWiki::Plugin::htmlscrubber::safe_url_regexp/;
56         }
57         else {
58                 return 1;
59         }
60 }
61
62 sub htmlize ($$$) {
63         my $page = shift;
64         my $destpage = shift;
65
66         return IkiWiki::htmlize($page, $destpage, pagetype($pagesources{$page}),
67                 IkiWiki::linkify($page, $destpage,
68                 IkiWiki::preprocess($page, $destpage, shift)));
69 }
70
71 sub preprocess (@) {
72         return "" unless @_;
73         my %params=@_;
74         my $key=shift;
75         my $value=$params{$key};
76         delete $params{$key};
77         my $page=$params{page};
78         delete $params{page};
79         my $destpage=$params{destpage};
80         delete $params{destpage};
81         delete $params{preview};
82
83         eval q{use HTML::Entities};
84         # Always decode, even if encoding later, since it might not be
85         # fully encoded.
86         $value=decode_entities($value);
87
88         # Metadata collection that needs to happen during the scan pass.
89         if ($key eq 'title') {
90                 $pagestate{$page}{meta}{title}=HTML::Entities::encode_numeric($value);
91                 return "";
92         }
93         elsif ($key eq 'description') {
94                 $pagestate{$page}{meta}{description}=HTML::Entities::encode_numeric($value);
95                 # fallthrough
96         }
97         elsif ($key eq 'guid') {
98                 $pagestate{$page}{meta}{guid}=HTML::Entities::encode_numeric($value);
99                 # fallthrough
100         }
101         elsif ($key eq 'license') {
102                 push @{$metaheaders{$page}}, '<link rel="license" href="#page_license" />';
103                 $pagestate{$page}{meta}{license}=$value;
104                 return "";
105         }
106         elsif ($key eq 'copyright') {
107                 push @{$metaheaders{$page}}, '<link rel="copyright" href="#page_copyright" />';
108                 $pagestate{$page}{meta}{copyright}=$value;
109                 return "";
110         }
111         elsif ($key eq 'link' && ! %params) {
112                 # hidden WikiLink
113                 add_link($page, $value);
114                 return "";
115         }
116         elsif ($key eq 'author') {
117                 $pagestate{$page}{meta}{author}=$value;
118                 # fallthorough
119         }
120         elsif ($key eq 'authorurl') {
121                 $pagestate{$page}{meta}{authorurl}=$value if safeurl($value);
122                 # fallthrough
123         }
124         elsif ($key eq 'permalink') {
125                 $pagestate{$page}{meta}{permalink}=$value if safeurl($value);
126                 # fallthrough
127         }
128         elsif ($key eq 'date') {
129                 eval q{use Date::Parse};
130                 if (! $@) {
131                         my $time = str2time($value);
132                         $IkiWiki::pagectime{$page}=$time if defined $time;
133                 }
134         }
135         elsif ($key eq 'updated') {
136                 eval q{use Date::Parse};
137                 if (! $@) {
138                         my $time = str2time($value);
139                         $pagestate{$page}{meta}{updated}=$time if defined $time;
140                 }
141         }
142
143         if (! defined wantarray) {
144                 # avoid collecting duplicate data during scan pass
145                 return;
146         }
147
148         # Metadata handling that happens only during preprocessing pass.
149         if ($key eq 'permalink') {
150                 if (safeurl($value)) {
151                         push @{$metaheaders{$page}}, scrub('<link rel="bookmark" href="'.encode_entities($value).'" />', $destpage);
152                 }
153         }
154         elsif ($key eq 'stylesheet') {
155                 my $rel=exists $params{rel} ? $params{rel} : "alternate stylesheet";
156                 my $title=exists $params{title} ? $params{title} : $value;
157                 # adding .css to the value prevents using any old web
158                 # editable page as a stylesheet
159                 my $stylesheet=bestlink($page, $value.".css");
160                 if (! length $stylesheet) {
161                         error gettext("stylesheet not found")
162                 }
163                 push @{$metaheaders{$page}}, '<link href="'.urlto($stylesheet, $page).
164                         '" rel="'.encode_entities($rel).
165                         '" title="'.encode_entities($title).
166                         "\" type=\"text/css\" />";
167         }
168         elsif ($key eq 'openid') {
169                 my $delegate=0; # both by default
170                 if (exists $params{delegate}) {
171                         $delegate = 1 if lc $params{delegate} eq 'openid';
172                         $delegate = 2 if lc $params{delegate} eq 'openid2';
173                 }
174                 if (exists $params{server} && safeurl($params{server})) {
175                         push @{$metaheaders{$page}}, '<link href="'.encode_entities($params{server}).
176                                 '" rel="openid.server" />' if $delegate ne 2;
177                         push @{$metaheaders{$page}}, '<link href="'.encode_entities($params{server}).
178                                 '" rel="openid2.provider" />' if $delegate ne 1;
179                 }
180                 if (safeurl($value)) {
181                         push @{$metaheaders{$page}}, '<link href="'.encode_entities($value).
182                                 '" rel="openid.delegate" />' if $delegate ne 2;
183                         push @{$metaheaders{$page}}, '<link href="'.encode_entities($value).
184                                 '" rel="openid2.local_id" />' if $delegate ne 1;
185                 }
186                 if (exists $params{"xrds-location"} && safeurl($params{"xrds-location"})) {
187                         push @{$metaheaders{$page}}, '<meta http-equiv="X-XRDS-Location"'.
188                                 'content="'.encode_entities($params{"xrds-location"}).'" />';
189                 }
190         }
191         elsif ($key eq 'redir') {
192                 return "" if $page ne $destpage;
193                 my $safe=0;
194                 if ($value !~ /^\w+:\/\//) {
195                         my ($redir_page, $redir_anchor) = split /\#/, $value;
196
197                         my $link=bestlink($page, $redir_page);
198                         if (! length $link) {
199                                 error gettext("redir page not found")
200                         }
201                         add_depends($page, $link, deptype("presence"));
202
203                         $value=urlto($link, $page);
204                         $value.='#'.$redir_anchor if defined $redir_anchor;
205                         $safe=1;
206
207                         # redir cycle detection
208                         $pagestate{$page}{meta}{redir}=$link;
209                         my $at=$page;
210                         my %seen;
211                         while (exists $pagestate{$at}{meta}{redir}) {
212                                 if ($seen{$at}) {
213                                         error gettext("redir cycle is not allowed")
214                                 }
215                                 $seen{$at}=1;
216                                 $at=$pagestate{$at}{meta}{redir};
217                         }
218                 }
219                 else {
220                         $value=encode_entities($value);
221                 }
222                 my $delay=int(exists $params{delay} ? $params{delay} : 0);
223                 my $redir="<meta http-equiv=\"refresh\" content=\"$delay; URL=$value\" />";
224                 if (! $safe) {
225                         $redir=scrub($redir, $destpage);
226                 }
227                 push @{$metaheaders{$page}}, $redir;
228         }
229         elsif ($key eq 'link') {
230                 if (%params) {
231                         push @{$metaheaders{$page}}, scrub("<link href=\"".encode_entities($value)."\" ".
232                                 join(" ", map {
233                                         encode_entities($_)."=\"".encode_entities(decode_entities($params{$_}))."\""
234                                 } keys %params).
235                                 " />\n", $destpage);
236                 }
237         }
238         elsif ($key eq 'robots') {
239                 push @{$metaheaders{$page}}, '<meta name="robots"'.
240                         ' content="'.encode_entities($value).'" />';
241         }
242         elsif ($key eq 'description') {
243                 push @{$metaheaders{$page}}, '<meta name="'.encode_entities($key).
244                         '" content="'.encode_entities($value).'" />';
245         }
246         else {
247                 push @{$metaheaders{$page}}, scrub('<meta name="'.encode_entities($key).
248                         '" content="'.encode_entities($value).'" />', $destpage);
249         }
250
251         return "";
252 }
253
254 sub pagetemplate (@) {
255         my %params=@_;
256         my $page=$params{page};
257         my $destpage=$params{destpage};
258         my $template=$params{template};
259
260         if (exists $metaheaders{$page} && $template->query(name => "meta")) {
261                 # avoid duplicate meta lines
262                 my %seen;
263                 $template->param(meta => join("\n", grep { (! $seen{$_}) && ($seen{$_}=1) } @{$metaheaders{$page}}));
264         }
265         if (exists $pagestate{$page}{meta}{title} && $template->query(name => "title")) {
266                 $template->param(title => $pagestate{$page}{meta}{title});
267                 $template->param(title_overridden => 1);
268         }
269
270         foreach my $field (qw{author authorurl permalink}) {
271                 $template->param($field => $pagestate{$page}{meta}{$field})
272                         if exists $pagestate{$page}{meta}{$field} && $template->query(name => $field);
273         }
274
275         foreach my $field (qw{license copyright}) {
276                 if (exists $pagestate{$page}{meta}{$field} && $template->query(name => $field) &&
277                     ($page eq $destpage || ! exists $pagestate{$destpage}{meta}{$field} ||
278                      $pagestate{$page}{meta}{$field} ne $pagestate{$destpage}{meta}{$field})) {
279                         $template->param($field => htmlize($page, $destpage, $pagestate{$page}{meta}{$field}));
280                 }
281         }
282 }
283
284 sub match {
285         my $field=shift;
286         my $page=shift;
287         
288         # turn glob into a safe regexp
289         my $re=IkiWiki::glob2re(shift);
290
291         my $val;
292         if (exists $pagestate{$page}{meta}{$field}) {
293                 $val=$pagestate{$page}{meta}{$field};
294         }
295         elsif ($field eq 'title') {
296                 $val = pagetitle($page);
297         }
298
299         if (defined $val) {
300                 if ($val=~/^$re$/i) {
301                         return IkiWiki::SuccessReason->new("$re matches $field of $page", $page => $IkiWiki::DEPEND_CONTENT, "" => 1);
302                 }
303                 else {
304                         return IkiWiki::FailReason->new("$re does not match $field of $page", "" => 1);
305                 }
306         }
307         else {
308                 return IkiWiki::FailReason->new("$page does not have a $field", "" => 1);
309         }
310 }
311
312 package IkiWiki::PageSpec;
313
314 sub match_title ($$;@) {
315         IkiWiki::Plugin::meta::match("title", @_);
316 }
317
318 sub match_author ($$;@) {
319         IkiWiki::Plugin::meta::match("author", @_);
320 }
321
322 sub match_authorurl ($$;@) {
323         IkiWiki::Plugin::meta::match("authorurl", @_);
324 }
325
326 sub match_license ($$;@) {
327         IkiWiki::Plugin::meta::match("license", @_);
328 }
329
330 sub match_copyright ($$;@) {
331         IkiWiki::Plugin::meta::match("copyright", @_);
332 }
333
334 1