review: I would suggest cherry-picking part of the branch
[ikiwiki] / doc / todo / xapian_omega_same_lang_when_indexing_and_searching.mdwn
1 Hi, by default xapian/omega use locate param from blog.setup to set stemmer language when wiki is indexing.
2
3 But, when you search, we use omega cgi, and we not set language, so if you indexing in french, but search in english, you have a bad result.
4
5 I propose to set a new param omega_stemmer in blog.setup, to fix the same language when we indexing, and searching. And if omega_stemmer is not set, we use LANG env param.
6
7 Bellow, you can find the patch.
8
9
10
11         diff --git a/IkiWiki/Plugin/search.pm b/IkiWiki/Plugin/search.pm
12         index 42d2e0d..08a0a01 100644
13         --- a/IkiWiki/Plugin/search.pm
14         +++ b/IkiWiki/Plugin/search.pm
15         @@ -33,6 +33,13 @@ sub getsetup () {
16                                 safe => 0, # external program
17                                 rebuild => 0,
18                         },
19         +               omega_stemmer => {
20         +                       type => "string",
21         +                       example => "en",
22         +                       description => "language used for indexing and searching",
23         +                       safe => 0, # external program
24         +                       rebuild => 0,
25         +               },
26          }
27          
28          sub checkconfig () {
29         @@ -136,7 +143,7 @@ sub indexhtml (@) {
30                 # Index document and add terms for other metadata.
31                 my $tg = Search::Xapian::TermGenerator->new();
32                 if (! $stemmer) {
33         -               my $langcode=$ENV{LANG} || "en";
34         +               my $langcode=$config{omega_stemmer} || $ENV{LANG} || "en";
35                         $langcode=~s/_.*//;
36          
37                         # This whitelist is here to work around a xapian bug (#486138)
38         @@ -183,6 +190,18 @@ sub cgi ($) {
39                         IkiWiki::loadindex();
40                         $ENV{HELPLINK}=htmllink("", "", "ikiwiki/searching",
41                                 noimageinline => 1, linktext => "Help");
42         +               my $langcode=$config{omega_stemmer} || $ENV{LANG} || "en";
43         +               $langcode=~s/_.*//;
44         +
45         +               # This whitelist is here to work around a xapian bug (#486138)
46         +               my @whitelist=qw{da de en es fi fr hu it no pt ru ro sv tr};
47         +
48         +               if (grep { $_ eq $langcode } @whitelist) {
49         +                        $ENV{STEMMER}=$langcode;
50         +               }
51         +               else {
52         +                        $ENV{STEMMER}="en";
53         +               }
54                         exec($config{omega_cgi}) || error("$config{omega_cgi} failed: $!");
55                 }
56          }
57         diff --git a/templates/searchquery.tmpl b/templates/searchquery.tmpl
58         index 15bc78e..4742460 100644
59         --- a/templates/searchquery.tmpl
60         +++ b/templates/searchquery.tmpl
61         @@ -1,6 +1,6 @@
62          $setmap{prefix,title,S}
63          $setmap{prefix,link,XLINK}
64         -$set{thousand,$.}$set{decimal,.}$setmap{BN,,Any Country,uk,England,fr,France}
65         +$set{thousand,$.}$set{decimal,.}$setmap{BN,,Any Country,uk,England,fr,France}$set{stemmer,$env{STEMMER}}
66          ${
67          $def{PREV,
68          $if{$ne{$topdoc,0},<INPUT TYPE=image NAME="&lt;" ALT="&lt;"
69
70 Regards,
71
72 [[!tag patch]]