Merge branch 'master' of ssh://git.kitenet.net/srv/git/ikiwiki.info
[ikiwiki] / IkiWiki / Plugin / htmlscrubber.pm
1 #!/usr/bin/perl
2 package IkiWiki::Plugin::htmlscrubber;
3
4 use warnings;
5 use strict;
6 use IkiWiki 2.00;
7
8 sub import { #{{{
9         hook(type => "sanitize", id => "htmlscrubber", call => \&sanitize);
10 } # }}}
11
12 sub sanitize (@) { #{{{
13         my %params=@_;
14         return scrubber()->scrub($params{content});
15 } # }}}
16
17 my $_scrubber;
18 sub scrubber { #{{{
19         return $_scrubber if defined $_scrubber;
20         
21         eval q{use HTML::Scrubber};
22         error($@) if $@;
23         # Lists based on http://feedparser.org/docs/html-sanitization.html
24         # With html 5 video and audio tags added.
25         $_scrubber = HTML::Scrubber->new(
26                 allow => [qw{
27                         a abbr acronym address area b big blockquote br br/
28                         button caption center cite code col colgroup dd del
29                         dfn dir div dl dt em fieldset font form h1 h2 h3 h4
30                         h5 h6 hr hr/ i img input ins kbd label legend li map
31                         menu ol optgroup option p p/ pre q s samp select small
32                         span strike strong sub sup table tbody td textarea
33                         tfoot th thead tr tt u ul var
34                         video audio
35                 }],
36                 default => [undef, { (
37                         map { $_ => 1 } qw{
38                                 abbr accept accept-charset accesskey action
39                                 align alt axis border cellpadding cellspacing
40                                 char charoff charset checked cite class
41                                 clear cols colspan color compact coords
42                                 datetime dir disabled enctype for frame
43                                 headers height href hreflang hspace id ismap
44                                 label lang longdesc maxlength media method
45                                 multiple name nohref noshade nowrap prompt
46                                 readonly rel rev rows rowspan rules scope
47                                 selected shape size span src start summary
48                                 tabindex target title type usemap valign
49                                 value vspace width
50                                 poster autoplay loopstart loopend end
51                                 playcount controls 
52                         } ),
53                         "/" => 1, # emit proper <hr /> XHTML
54                         }],
55         );
56         return $_scrubber;
57 } # }}}
58
59 1