forgot to sign
[ikiwiki] / doc / todo / Mercurial_backend_update.mdwn
1 I submitted some changes that added 5 "Yes"es and 2 "Fast"s to Mercurial at [[/rcs]], but some functionality is still missing as compared to e.g. `git.pm`, with which it should be able to be equivalent.
2
3 To do this, a more basic rewrite would simplify things. I inline the complete file below with comments. I don't expect anyone to take the time to read it all at once, but I'd be glad if those interested in the Mercurial backend could do some beta testing.
4
5 I break out my comments from the code to make them more readable. I comment all the changes as compared to current upstream. --[[Daniel Andersson]]
6
7 ---
8
9                 #!/usr/bin/perl
10                 package IkiWiki::Plugin::mercurial;
11
12                 use warnings;
13                 use strict;
14                 use IkiWiki;
15                 use Encode;
16                 use open qw{:utf8 :std};
17
18
19 Pattern to validate hg sha1 sums. hg usually truncates the hash to 12
20 characters and prepends a local revision number for output, but internally
21 it keeps a 40 character hash. Will use the long version in this code.
22
23                 my $sha1_pattern = qr/[0-9a-fA-F]{40}/;
24
25 Message to skip in recent changes
26
27                 my $dummy_commit_msg = 'dummy commit';
28
29 *TODO:* $hg_dir not really implemented yet, until a srcdir/repository distinction is
30 made as for e.g. Git. Used in `rcs_receive`, and for attachments in `hg_parse_changes`. See comments in those places, though.
31
32                 my $hg_dir=undef;
33
34                 sub import {
35                         hook(type => "checkconfig", id => "mercurial", call => \&checkconfig);
36                         hook(type => "getsetup", id => "mercurial", call => \&getsetup);
37                         hook(type => "rcs", id => "rcs_update", call => \&rcs_update);
38                         hook(type => "rcs", id => "rcs_prepedit", call => \&rcs_prepedit);
39                         hook(type => "rcs", id => "rcs_commit", call => \&rcs_commit);
40                         hook(type => "rcs", id => "rcs_commit_staged", call => \&rcs_commit_staged);
41                         hook(type => "rcs", id => "rcs_add", call => \&rcs_add);
42                         hook(type => "rcs", id => "rcs_remove", call => \&rcs_remove);
43                         hook(type => "rcs", id => "rcs_rename", call => \&rcs_rename);
44                         hook(type => "rcs", id => "rcs_recentchanges", call => \&rcs_recentchanges);
45                         hook(type => "rcs", id => "rcs_diff", call => \&rcs_diff);
46                         hook(type => "rcs", id => "rcs_getctime", call => \&rcs_getctime);
47                         hook(type => "rcs", id => "rcs_getmtime", call => \&rcs_getmtime);
48                         hook(type => "rcs", id => "rcs_preprevert", call => \&rcs_preprevert);
49                         hook(type => "rcs", id => "rcs_revert", call => \&rcs_revert);
50
51 This last hook is "unsanctioned" from [[Auto-setup and maintain Mercurial wrapper hooks]]. Will try to solve its function
52 another way later.
53
54                         hook(type => "rcs", id => "rcs_wrapper_postcall", call => \&rcs_wrapper_postcall);
55                 }
56
57                 sub checkconfig () {
58                         if (exists $config{mercurial_wrapper} && length $config{mercurial_wrapper}) {
59                                 push @{$config{wrappers}}, {
60                                         wrapper => $config{mercurial_wrapper},
61                                         wrappermode => (defined $config{mercurial_wrappermode} ? $config{mercurial_wrappermode} : "06755"),
62
63 Next line part of [[Auto-setup and maintain Mercurial wrapper hooks]].
64
65                                         wrapper_postcall => (defined $config{mercurial_wrapper_hgrc_update} ? $config{mercurial_wrapper_hgrc_update} : "1"),
66                                 };
67                         }
68                 }
69
70                 sub getsetup () {
71                         return
72                                 plugin => {
73                                         safe => 0, # rcs plugin
74                                         rebuild => undef,
75                                         section => "rcs",
76                                 },
77                                 mercurial_wrapper => {
78                                         type => "string",
79                                         #example => # FIXME add example
80                                         description => "mercurial post-commit hook to generate",
81                                         safe => 0, # file
82                                         rebuild => 0,
83                                 },
84                                 mercurial_wrappermode => {
85                                         type => "string",
86                                         example => '06755',
87                                         description => "mode for mercurial_wrapper (can safely be made suid)",
88                                         safe => 0,
89                                         rebuild => 0,
90                                 },
91                                 mercurial_wrapper_hgrc_update => {
92                                         type => "string",
93                                         example => "1",
94                                         description => "updates existing hgrc to reflect path changes for mercurial_wrapper",
95                                         safe => 0,
96                                         rebuild => 0,
97                                 },
98                                 historyurl => {
99                                         type => "string",
100                                         example => "http://example.com:8000/log/tip/\[[file]]",
101                                         description => "url to hg serve'd repository, to show file history (\[[file]] substituted)",
102                                         safe => 1,
103                                         rebuild => 1,
104                                 },
105                                 diffurl => {
106                                         type => "string",
107                                         example => "http://localhost:8000/?fd=\[[r2]];file=\[[file]]",
108                                         description => "url to hg serve'd repository, to show diff (\[[file]] and \[[r2]] substituted)",
109                                         safe => 1,
110                                         rebuild => 1,
111                                 },
112                 }
113
114                 sub safe_hg (&@) {
115                         # Start a child process safely without resorting to /bin/sh.
116                         # Returns command output (in list content) or success state
117                         # (in scalar context), or runs the specified data handler.
118
119                         my ($error_handler, $data_handler, @cmdline) = @_;
120
121                         my $pid = open my $OUT, "-|";
122
123                         error("Cannot fork: $!") if !defined $pid;
124
125                         if (!$pid) {
126                                 # In child.
127                                 # hg commands want to be in wc.
128
129 This `$hg_dir` logic means nothing and could be stripped until srcdir/repdir distinction is made (it's stripped in upstream `mercurial.pm` right now).
130
131                                 if (! defined $hg_dir) {
132                                         chdir $config{srcdir}
133                                             or error("cannot chdir to $config{srcdir}: $!");
134                                 }
135                                 else {
136                                         chdir $hg_dir or error("cannot chdir to $hg_dir: $!");
137                                 }
138
139                                 exec @cmdline or error("Cannot exec '@cmdline': $!");
140                         }
141                         # In parent.
142
143                         my @lines;
144                         while (<$OUT>) {
145                                 chomp;
146
147                                 if (! defined $data_handler) {
148                                         push @lines, $_;
149                                 }
150                                 else {
151                                         last unless $data_handler->($_);
152                                 }
153                         }
154
155                         close $OUT;
156
157                         $error_handler->("'@cmdline' failed: $!") if $? && $error_handler;
158
159                         return wantarray ? @lines : ($? == 0);
160                 }
161                 # Convenient wrappers.
162                 sub run_or_die ($@) { safe_hg(\&error, undef, @_) }
163                 sub run_or_cry ($@) { safe_hg(sub { warn @_ }, undef, @_) }
164                 sub run_or_non ($@) { safe_hg(undef, undef, @_) }
165
166
167 To handle uncommited local changes ("ULC"s for short), I use logic similar to the (non-standard) "shelve" extension to Mercurial. By taking a diff before resetting to last commit, making changes and then applying diff again, one can do things Mercurial otherwise refuses, which is necessary later.
168
169 This function creates this diff.
170
171                 sub hg_local_dirstate_shelve ($) {
172                         # Creates a diff snapshot of uncommited changes existing the srcdir.
173                         # Takes a string (preferably revision) as input to create a unique and
174                         # identifiable diff name.
175                         my $tempdiffname = "diff_".shift;
176                         my $tempdiffpath;
177                         if (my @tempdiff = run_or_die('hg', 'diff', '-g')) {
178                                 $"="\n";
179                                 writefile($tempdiffname, $config{srcdir},
180                                                 "@tempdiff");
181                                 $"=" ";
182                                 $tempdiffpath = $config{srcdir}.'/'.$tempdiffname;
183                         }
184                         return $tempdiffpath;
185                 }
186
187 This function restores the diff.
188
189                 sub hg_local_dirstate_unshelve ($) {
190                         # Applies diff snapshot to revert back to initial dir state. If diff
191                         # revert succeeds, the diff is removed. Otherwise it stays to not
192                         # eradicate the local changes if they were important. This clutters the
193                         # directory though. Better ways to handle this are welcome. A true way
194                         # around this dance is to have a separate repository for local changes
195                         # and push ready commits to the srcdir instead.
196                         if (my $tempdiffpath = shift) {
197                                 if (run_or_cry('hg', 'import', '--no-commit', $tempdiffpath)) {
198                                         unlink($tempdiffpath);
199                                         return undef;
200                                 }
201                         }
202                 }
203
204 This makes online diffing possible. A similar approach as in `git.pm`, which is [http://source.ikiwiki.branchable.com/?p=source.git;a=blob;f=IkiWiki/Plugin/git.pm;h=cf7fbe9b7c43ee53180612d0411e6202074fb9e0;hb=refs/heads/master#l211](discussed to some length in a comment there), is taken.
205
206                 sub merge_past ($$$) {
207                         my ($sha1, $file, $message) = @_;
208
209                         # Undo stack for cleanup in case of an error
210                         my @undo;
211                         # File content with conflict markers
212                         my $conflict;  
213                         my $tempdiffpath; 
214
215                         eval {
216                                 # Hide local changes from Mercurial by renaming the modified
217                                 # file.  Relative paths must be converted to absolute for
218                                 # renaming.
219                                 my ($target, $hidden) = (
220                                         "$config{srcdir}/${file}",
221                                         "$config{srcdir}/${file}.${sha1}"
222                                 );
223                                 rename($target, $hidden)
224                                         or error("rename '$target' to '$hidden' failed: $!");
225                                 # Ensure to restore the renamed file on error.
226                                 push @undo, sub {
227                                         return if ! -e "$hidden"; # already renamed
228                                         rename($hidden, $target)
229                                             or warn "rename '$hidden' to '$target' failed: $!";
230                                 };
231
232
233 Take a snapshot of srcdir to be able to restore uncommited local changes ("ULCs") afterwards.
234
235 * This must happen _after_ the merging commit in Mercurial, there is no way around it. By design hg refuses to commit merges if there are other changes to tracked content present, no matter how much  you beg.
236
237 * ULCs to the file being edited are special: they can't be diffed here since `editpage.pm` already has overwritten the file. When the web edit session started though, the ULC version (not the commited
238 version) was read into the form, so in a way, the web user _has already merged_ with the ULC. It is not saved in commit history, but that is the exact consequence of "uncommited" changes. If an ULC is done between the time the web edit started and was submitted, then it is lost, though.  All in all, one shouldn't be editing the srcdir directly when web edits of the same file are allowed. Clone the repo and push changes instead.
239
240 Much of these issues disappear, I believe, if one works with a master repo which only is pushed to.
241
242                                 my $tempdiffpath = hg_local_dirstate_shelve($sha1);
243
244                                 # Ensure uniqueness of bookmarks.
245                                 my $bookmark_upstream_head = "current_head_$sha1";
246                                 my $bookmark_edit_base = "edit_base_$sha1";
247
248                                 # Git and Mercurial differ in the branch concept. Mercurial's
249                                 # "bookmarks" are closer in function in this regard.
250
251 Bookmarks aren't standard until Mercurial 1.8 ([2011--02--10](http://selenic.com/hg/rev/d4ab9486e514)), but they've been bundled with Mercurial since ~2008, so they can be enabled by writing a `hgrc`, which is also being worked on.
252
253                                 # Create a bookmark at current tip.
254                                 push @undo, sub { run_or_cry('hg', 'bookmark', '--delete',
255                                                 $bookmark_upstream_head) };
256                                 run_or_die('hg', 'bookmark', $bookmark_upstream_head);
257
258                                 # Create a bookmark at the revision from which the edit was
259                                 # started and switch to it, discarding changes (they are stored
260                                 # in $tempdiff and the hidden file at the moment).
261                                 push @undo, sub { run_or_cry('hg', 'bookmark', '--delete',
262                                                 $bookmark_edit_base) };
263                                 run_or_die('hg', 'bookmark', '-r', $sha1, $bookmark_edit_base);
264                                 run_or_die('hg', 'update', ,'-C', $bookmark_edit_base);
265
266                                 # Reveal the modified file.
267                                 rename($hidden, $target)
268                                     or error("rename '$hidden' to '$target' failed: $!");
269
270                                 # Commit at the bookmarked revision, creating a new head.
271                                 run_or_cry('hg', 'commit', '-m', $message);
272
273                                 # Attempt to merge the newly created head with upstream head.
274                                 # '--tool internal:merge' to avoid spawning a GUI merger.
275
276 (*Semi-TODO:* How do you make this command quiet? On failed merge, it
277 always writes to STDERR and clutters the web server log.)
278
279                                 if (!run_or_non('hg', 'merge', '--tool', 'internal:merge',
280                                                 $bookmark_upstream_head)) {
281                                         # ..., otherwise return file with conflict markers.
282                                         $conflict = readfile($target);
283
284                                         # The hardcore reset approach. Keep your hands inside
285                                         # the cart.
286                                         run_or_die('hg', 'rollback');
287                                         run_or_die('hg', 'update', '-C',
288                                                 $bookmark_upstream_head);
289                                         if ($tempdiffpath) {
290                                                 hg_local_dirstate_unshelve($tempdiffpath);
291                                         }
292
293 Other approaches tried here:
294
295 1. Clean up merge attempt,
296
297         run_or_die('hg', 'update', '-C', $bookmark_upstream_head);
298
299 2. Redo "merge", using only upstream head versions,
300
301         run_or_die('hg', 'merge', '--tool', 'internal:local', $bookmark_edit_base);
302
303 3. dummy commit to close head.
304
305         run_or_non('hg', 'commit', '-m', $message);
306
307 This creates a cluttered and erroneous history. We
308 tell Mercurial to merge, even though we in practice
309 discard. This creates problems when trying to revert
310 changes.
311
312 Other attempt:
313
314 1. Discard merge attempt and switch to temp head,
315
316         run_or_die('hg', 'update', '-C', $bookmark_edit_base);
317
318 2. close the temp head (why do they call the command that in practice closes heads "--close-branch"?),
319
320         run_or_non('hg', 'commit', '--close-branch', '-m', $message);
321
322 3. restore working directory to pre-fiddling status.
323
324         run_or_die('hg', 'update', $bookmark_upstream_head);
325
326 ...but this requires the same amount of forks as the
327 above method, and confuses other parts of ikiwiki
328 since the upstream head is now the third newest
329 revision. Maybe that particular problem is solvable
330 by setting a global default bookmark that follows the
331 main tip.  It will leave clutter in the revision
332 history, though. Two extra commits that in practice
333 don't hold relevant information will be recorded for
334 each failed merge attempt.
335
336 To only create one extra commit, one could imagine
337 adding `--close-branch` to the commit that initially
338 created the new head (since there is no problem
339 merging with closed heads), but it's not possible to
340 close and create a head at the same time, apparently.
341
342                                 }
343                         };
344                         my $failure = $@;
345
346                         # Process undo stack (in reverse order). By policy, cleanup actions
347                         # should normally print a warning on failure.
348                         while (my $handle = pop @undo) {
349                                 $handle->();
350                         }
351
352                         error("Mercurial merge failed!\n$failure\n") if $failure;
353
354                         return ($conflict, $tempdiffpath);
355                 }
356
357                 sub hg_commit_info ($;$;$) {
358                         # Return an array of commit info hashes of num commits starting from
359                         # the given sha1sum.
360                         #
361 This could be optimized by using a lookup cache similar to
362 `findtimes()`. By adding `KeyAttr => ['node']` to `XMLin()` options, one
363 could use the revision ID as key and do a single massive history
364 lookup and later just check if the given revision already exists as a
365 key.  Right now I'm at the "don't optimize it yet" stage, though.
366
367 This uses Mercurial's built-in `--style xml` and parses it with `XML::Simple`. Mercurial's log output is otherwise somewhat cumbersome to get good stuff out of, so this XML solution is quite good, I think. It adds module dependency, but XML::Simple seems fairly standard (but what do I know, I've used 1 Perl installation in my life).
368
369                         use XML::Simple;
370                         use Date::Parse;
371
372                         my ($sha1, $num, $file) = @_;
373
374                         my @opts;
375                         if (defined $sha1) {
376                                 if ($sha1 =~ m/^($sha1_pattern)$/) {
377                                         push @opts, ('-r'. $1.':0');
378                                 }
379                                 elsif ($sha1 =~ m/^($sha1_pattern):($sha1_pattern)$/) {
380                                         push @opts, ('-r', $1.':'.$2);
381                                 }
382                         }
383                         push @opts, ('--limit', $num) if defined $num;
384                         push @opts, ('--', $file) if defined $file;
385
386                         my %env=%ENV;
387                         $ENV{HGENCODING} = 'utf-8';
388                         my @xml = run_or_cry('hg', 'log', '-v', '--style', 'xml', @opts);
389                         %ENV=%env;
390
391                         # hg returns empty string if file is not in repository.
392                         return undef if !@xml;
393
394 Some places it is clear that I'm coding ad-hoc Perl. I don't know if this is a reasonably efficient way to give input to `XMLin`, but it works.
395
396                         # Want to preserve linebreaks in multiline comments.
397                         $"="\n";
398                         my $xmllog = XMLin("@xml",
399                                 ForceArray => ['logentry', 'parent', 'copy', 'path']);
400                         $"=" ";
401
402                         my @c_infos;
403                         foreach my $rev (@{$xmllog->{logentry}}) {
404                                 my %c_info;
405                                 # In Mercurial, "rev" is technically the strictly local
406                                 # revision number.  What ikiwiki wants is what is called
407                                 # "node": a globally defined SHA1 checksum.
408                                 $c_info{rev} = $rev->{node};
409                                 foreach my $parent (@{$rev->{parent}}) {
410                                         push @{$c_info{parents}}, {rev => $parent->{node}};
411                                 }
412                                 $c_info{user} = $rev->{author}{content};
413                                 # Mercurial itself parses out and stores an email address if
414                                 # present in author name. If not, hg sets email to author name.
415                                 if ( $rev->{author}{content} ne $rev->{author}{email} &&
416                                         $rev->{author}{email} =~ m/^([^\@]+)\@(.*)$/ ) {
417                                         if ($2 eq "web") {
418                                                 $c_info{nickname} = $1;
419                                                 $c_info{web_commit} = "1";
420                                         }
421                                 }
422                                 # Mercurial gives date in ISO 8601, well handled by str2time().
423                                 $c_info{when} = str2time($rev->{date});
424                                 # Mercurial doesn't allow empty commit messages, so there
425                                 # should always be a single defined message.
426                                 $c_info{message} = $rev->{msg}{content};
427                                 # Inside "paths" sits a single array "path" that contains
428                                 # multiple paths. Crystal clear :-)
429                                 foreach my $path (@{$rev->{paths}{path}}) {
430                                         push @{$c_info{files}}, {
431                                                 # Mercurial doesn't track file permissions as
432                                                 # Git do, so that's missing here.
433                                                 'file' => $path->{content},
434                                                 'status' => $path->{action},
435                                         };
436                                 }
437                                 # There also exists an XML branch "copies"->"copy", containing
438                                 # source and dest of files that have been copied with "hg cp".
439                                 # The copy action is also registered in "paths" as a removal of
440                                 # source and addition of dest, so it's not needed here.
441                                 push @c_infos, {%c_info};
442                                 use Data::Dumper;
443                         }
444
445                         return wantarray ? @c_infos : $c_infos[0];
446                 }
447
448                 sub hg_sha1 (;$) {
449                         # Return head sha1sum (of given file).
450                         my $file = shift || q{--};
451
452                         # Non-existing file doesn't give error, just empty string.
453                         my $f_info = hg_commit_info(undef, 1, $file);
454                         my $sha1;
455                         if ($f_info->{rev}) {
456                                 ($sha1) = $f_info->{rev} =~ m/($sha1_pattern)/;
457                         }
458                         else {
459                                 debug("Empty sha1sum for '$file'.");
460                         }
461                         return defined $sha1 ? $sha1 : q{};
462                 }
463
464                 sub rcs_update () {
465                         run_or_cry('hg', '-q', 'update');
466                 }
467
468                 sub rcs_prepedit ($) {
469                         # Return the commit sha1sum of the file when editing begins.
470                         # This will be later used in rcs_commit if a merge is required.
471                         my ($file) = @_;
472
473                         return hg_sha1($file);
474                 }
475
476                 sub rcs_commit (@) {
477                         # Try to commit the page; returns undef on _success_ and
478                         # a version of the page with the rcs's conflict markers on
479                         # failure.
480                         my %params=@_;
481
482                         # Check to see if the page has been changed by someone else since
483                         # rcs_prepedit was called.
484                         my $cur    = hg_sha1($params{file});
485                         my ($prev) = $params{token} =~ /^($sha1_pattern)$/; # untaint
486
487                         if (defined $cur && defined $prev && $cur ne $prev) {
488
489 If there was a conflict, the file with conflict markers is returned. Else, the path to the tempdiff, which is to be run to restore previous local state after `rcs_commit_staged`, is returned.
490
491                                 my ($conflict, $tempdiffpath) =
492                                         merge_past($prev, $params{file}, $dummy_commit_msg);
493                                 return defined $conflict
494                                         ? $conflict
495                                         : rcs_commit_helper(
496                                                 @_,
497                                                 merge => 1,
498                                                 tempdiffpath => $tempdiffpath);
499                         }
500
501                         return rcs_commit_helper(@_);
502                 }
503
504                 sub rcs_commit_helper (@) {
505                         my %params=@_;
506
507                         my %env=%ENV;
508                         $ENV{HGENCODING} = 'utf-8';
509
510                         my $user="Anonymous";
511                         my $nickname;
512                         if (defined $params{session}) {
513                                 if (defined $params{session}->param("name")) {
514                                         $user = $params{session}->param("name");
515                                 }
516                                 elsif (defined $params{session}->remote_addr()) {
517                                         $user = $params{session}->remote_addr();
518                                 }
519
520                                 if (defined $params{session}->param("nickname")) {
521                                         $nickname=encode_utf8($params{session}->param("nickname"));
522                                         $nickname=~s/\s+/_/g;
523                                         $nickname=~s/[^-_0-9[:alnum:]]+//g;
524                                 }
525                                 $ENV{HGUSER} = encode_utf8($user . ' <' . $nickname . '@web>');
526                         }
527
528                         if (! length $params{message}) {
529                                 $params{message} = "no message given";
530                         }
531
532                         $params{message} = IkiWiki::possibly_foolish_untaint($params{message});
533
534                         my @opts;
535
536 Mercurial rejects file arguments when performing a merging commit. It
537 only does "all or nothing" commits by design when merging, so given file arguments must be discarded. It should not pose a problem.
538
539                         if (exists $params{file} && ! defined $params{merge}) {
540                                 push @opts, '--', $params{file};
541                         }
542
543                         # hg commit returns non-zero if nothing really changed.
544                         # So we should ignore its exit status (hence run_or_non).
545                         run_or_non('hg', 'commit', '-m', $params{message}, '-q', @opts);
546
547 If there were uncommited local changes in srcdir before a merge was done, they are restored here.
548
549                         if (defined $params{tempdiffpath}) {
550                                 hg_local_dirstate_unshelve($params{tempdiffpath});
551                         }
552
553                         %ENV=%env;
554                         return undef; # success
555                 }
556
557                 sub rcs_commit_staged (@) {
558                         # Commits all staged changes. Changes can be staged using rcs_add,
559                         # rcs_remove, and rcs_rename.
560                         return rcs_commit_helper(@_);
561                 }
562
563                 sub rcs_add ($) {
564                         my ($file) = @_;
565
566                         run_or_cry('hg', 'add', $file);
567                 }
568
569                 sub rcs_remove ($) {
570                         # Remove file from archive.
571                         my ($file) = @_;
572
573                         run_or_cry('hg', 'remove', '-f', $file);
574                 }
575
576                 sub rcs_rename ($$) {
577                         my ($src, $dest) = @_;
578
579                         run_or_cry('hg', 'rename', '-f', $src, $dest);
580                 }
581
582                 sub rcs_recentchanges ($) {
583                         my ($num) = @_;
584
585                         my @c_infos;
586
587                         foreach my $c_info (hg_commit_info(undef, $num, undef)) {
588                                 my @pagenames;
589                                 for my $page (@{$c_info->{files}}) {
590                                         my $diffurl=defined $config{diffurl} ?
591                                                 $config{diffurl} : '';
592                                         # These substitutions enable defining keywords \[[file]]
593                                         # and \[[r2]] (backward compatibility) in the setup file
594                                         # that will be exchanged with filename and revision
595                                         # respectively.
596                                         $diffurl =~ s/\[\[file\]\]/$page->{file}/go;
597                                         $diffurl =~ s/\[\[r2\]\]/$c_info->{rev}/go;
598                                         push @pagenames, {
599                                                 # pagename() strips suffixes and returns the
600                                                 # path to the file as it is to be represented
601                                                 # in the build dir.
602                                                 page => pagename($page->{file}),
603                                                 diffurl => $diffurl,
604                                         };
605                                 }
606
607                                 # It is expected of ikiwiki to get each comment line as a
608                                 # separate entry.
609                                 my @messagelines;
610                                 open my $message, '<', \$c_info->{message};
611                                 while (<$message>) { push @messagelines, { line => $_ } };
612
613                                 push @c_infos, {
614                                         rev        => $c_info->{rev},
615                                         user       => $c_info->{user},
616                                         nickname   => defined $c_info->{nickname} ?
617                                                         $c_info->{nickname} : $c_info->{user},
618                                         committype => $c_info->{web_commit} ? "web" : "hg",
619                                         when       => $c_info->{when},
620                                         message    => [@messagelines],
621                                         pages      => [@pagenames],
622                                 } if @pagenames;
623                         }
624
625                         return @c_infos;
626                 }
627
628                 sub rcs_diff ($;$) {
629                         my $rev=shift;
630                         my $maxlines=shift;
631                         my @lines;
632                         my $addlines=sub {
633                                 my $line=shift;
634                                 return if defined $maxlines && @lines == $maxlines;
635                                 push @lines, $line."\n"
636                                         if (@lines || $line=~/^diff --git/);
637                                 return 1;
638                         };
639                         safe_hg(undef, $addlines, "hg", "diff", "-c", $rev, "-g");
640                         if (wantarray) {
641                                 return @lines;
642                         }
643                         else {
644                                 return join("", @lines);
645                         }
646                 }
647
648                 {
649                 my %time_cache;
650
651 This is an upstream change I did a week ago or so. Perhaps it can be merged in some clever way with the updated `hg_commit_info` to make one shared lookup cache. Don't know how much would be gained.
652
653                 sub findtimes ($$) {
654                         my $file=shift;
655                         my $id=shift; # 0 = mtime ; 1 = ctime
656
657                         if (! keys %time_cache) {
658                                 my $date;
659
660                                 # It doesn't seem possible to specify the format wanted for the
661                                 # changelog (same format as is generated in git.pm:findtimes(),
662                                 # though the date differs slightly) without using a style
663                                 # _file_. There is a "hg log" switch "--template" to directly
664                                 # control simple output formatting, but in this case, the
665                                 # {file} directive must be redefined, which can only be done
666                                 # with "--style".
667                                 #
668                                 # If {file} is not redefined, all files are output on a single
669                                 # line separated with a space. It is not possible to conclude
670                                 # if the space is part of a filename or just a separator, and
671                                 # thus impossible to use in this case.
672                                 # 
673                                 # Some output filters are available in hg, but they are not fit
674                                 # for this cause (and would slow down the process
675                                 # unnecessarily).
676                                 
677                                 eval q{use File::Temp};
678                                 error $@ if $@;
679                                 my ($tmpl_fh, $tmpl_filename) = File::Temp::tempfile(UNLINK => 1);
680                                 
681                                 print $tmpl_fh 'changeset = "{date}\\n{files}\\n"' . "\n";
682                                 print $tmpl_fh 'file = "{file}\\n"' . "\n";
683                                 
684                                 foreach my $line (run_or_die('hg', 'log', '--style', $tmpl_filename)) {
685                                         # {date} gives output on the form
686                                         # 1310694511.0-7200
687                                         # where the first number is UTC Unix timestamp with one
688                                         # decimal (decimal always 0, at least on my system)
689                                         # followed by local timezone offset from UTC in
690                                         # seconds.
691                                         if (! defined $date && $line =~ /^\d+\.\d[+-]\d*$/) {
692                                                 $line =~ s/^(\d+).*/$1/;
693                                                 $date=$line;
694                                         }
695                                         elsif (! length $line) {
696                                                 $date=undef;
697                                         }
698                                         else {
699                                                 my $f=$line;
700
701                                                 if (! $time_cache{$f}) {
702                                                         $time_cache{$f}[0]=$date; # mtime
703                                                 }
704                                                 $time_cache{$f}[1]=$date; # ctime
705                                         }
706                                 }
707                         }
708
709                         return exists $time_cache{$file} ? $time_cache{$file}[$id] : 0;
710                 }
711
712                 }
713
714                 sub rcs_getctime ($) {
715                         my $file = shift;
716
717                         return findtimes($file, 1);
718                 }
719
720                 sub rcs_getmtime ($) {
721                         my $file = shift;
722
723                         return findtimes($file, 0);
724                 }
725
726 The comment just below the function declaration below is taken from `git.pm`. Is it true? Should ikiwiki support sharing its repo with other things? Mercurial-wise that sounds like a world of pain.
727
728                 {
729                 my $ret;
730                 sub hg_find_root {
731                         # The wiki may not be the only thing in the git repo.
732                         # Determine if it is in a subdirectory by examining the srcdir,
733                         # and its parents, looking for the .git directory.
734
735                         return @$ret if defined $ret;
736
737                         my $subdir="";
738                         my $dir=$config{srcdir};
739                         while (! -d "$dir/.hg") {
740                                 $subdir=IkiWiki::basename($dir)."/".$subdir;
741                                 $dir=IkiWiki::dirname($dir);
742                                 if (! length $dir) {
743                                         error("cannot determine root of hg repo");
744                                 }
745                         }
746
747                         $ret=[$subdir, $dir];
748                         return @$ret;
749                 }
750
751                 }
752
753                 sub hg_parse_changes (@) {
754                         # Only takes a single info hash as argument in rcs_preprevert, but
755                         # should be able to take several in rcs_receive.
756                         my @c_infos_raw = shift;
757
758                         my ($subdir, $rootdir) = hg_find_root();
759                         my @c_infos_ret;
760
761                         foreach my $c_info_raw (@c_infos_raw) {
762                                 foreach my $path (@{$c_info_raw->{files}}) {
763                                         my ($file, $action, $temppath);
764
765                                         $file=$path->{file};
766
767                                         # check that all changed files are in the subdir
768                                         if (length $subdir && ! ($file =~ s/^$subdir//)) {
769                                                 error sprintf(gettext("you are not allowed to change %s"), $file);
770                                         }
771
772                                         if    ($path->{status} eq "M") { $action="change" }
773                                         elsif ($path->{status} eq "A") { $action="add" }
774                                         elsif ($path->{status} eq "R") { $action="remove" }
775                                         else  { error "unknown status ".$path->{status} }
776
777 I haven't tested the attachment code below. Is it run when there is an non-trusted file upload?
778
779                                         # extract attachment to temp file
780                                         if (($action eq 'add' || $action eq 'change') &&
781                                                 ! pagetype($file)) {
782
783                                                 eval q{use File::Temp};
784                                                 die $@ if $@;
785
786                                                 my $fh;
787                                                 ($fh, $temppath)=File::Temp::tempfile(undef, UNLINK => 1);
788                                                 my $cmd = "cd $hg_dir && ".
789                                                         "hg diff -g -c $c_info_raw->{rev} > '$temppath'";
790                                                 if (system($cmd) != 0) {
791                                                         error("failed writing temp file '$temppath'.");
792                                                 }
793                                         }
794
795                                         push @c_infos_ret, {
796                                                 file => $file,
797                                                 action => $action,
798                                                 path => $temppath,
799                                         };
800                                 }
801                         }
802
803                         return @c_infos_ret;
804                 }
805
806 *TODO:* I don't know what's happening here. I've changed the code to adhere to this file's variables and functions, but it refers to a srcdir _and_ a default repo, which currently isn't available in the Mercurial setup.
807
808 `rcs_receive` is optional and only runs when running a pre-receive hook. Where `$_` comes from and its format are mysteries to me.
809
810 Also, a comment in `git.pm` mentions that we don't want to chdir to a subdir "and only see changes in it" - but this isn't true for either Git or Mercurial to my knowledge. It only seems to happen in `git.pm` since the `git log` command in `git_commit_info` ends with "`-- .`" - if it didn't do that, one wouldn't have to chdir for this reason, I believe.
811
812 In this case we need to stay in default repo instead of srcdir though, so `hg_dir="."` _is_ needed, but not for the abovementioned reason :-) (maybe there's more to it, though).
813
814                 sub rcs_receive () {
815                         my @c_infos_ret;
816                         while (<>) {
817                                 chomp;
818                                 my ($oldrev, $newrev, $refname) = split(' ', $_, 3);
819
820                                 # only allow changes to hg_default_branch
821
822 *TODO:* What happens here? Some Git voodoo. _If_ `$_` has the exact same format for Mercurial, then the below should work just as well here, I think.
823
824                                 if ($refname !~ m|^refs/heads/$config{hg_default_branch}$|) {
825                                         error sprintf(gettext("you are not allowed to change %s"), $refname);
826                                 }
827
828 Comment from `git.pm`:
829
830                                 # Avoid chdir when running git here, because the changes are in
831                                 # the default git repo, not the srcdir repo.  (Also, if a subdir
832                                 # is involved, we don't want to chdir to it and only see
833                                 # changes in it.) The pre-receive hook already puts us in the
834                                 # right place.
835                                 $hg_dir=".";
836                                 push @c_infos_ret,
837                                         hg_parse_changes(hg_commit_info($newrev.":".$oldrev,
838                                                         undef, undef));
839                                 $hg_dir=undef;
840                         }
841
842                         return @c_infos_ret;
843                 }
844
845                 sub rcs_preprevert ($) {
846                         my $rev=shift;
847                         my ($sha1) = $rev =~ /^($sha1_pattern)$/; # untaint
848
849 The below 4 lines of code are from `git.pm`, but I can't see what they actually do there. Neither Git nor Mercurial only lists changes in working directory when given a command - they always traverse to repository root by themselves. I keep it here for comments, in case I'm missing something.
850
851 *UPDATE:* See earlier note about `git log` ending in "`-- .`".
852
853                         ## Examine changes from root of git repo, not from any subdir,
854                         ## in order to see all changes.
855                         #my ($subdir, $rootdir) = git_find_root();
856                         #$git_dir=$rootdir;
857
858                         my $c_info=hg_commit_info($sha1, 1, undef) or error "unknown commit";
859
860                         # hg revert will fail on merge commits. Add a nice message.
861                         if (exists $c_info->{parents} && $c_info->{parents} > 1) {
862                                 error gettext("you are not allowed to revert a merge");
863                         }
864
865                         my @c_info_ret=hg_parse_changes($c_info);
866
867                         ### Probably not needed, if earlier comment is correct.
868                         #$hg_dir=undef;
869                         return @c_info_ret;
870                 }
871
872                 sub rcs_revert ($) {
873                         # Try to revert the given rev; returns undef on _success_.
874                         my $rev = shift;
875                         my ($sha1) = $rev =~ /^($sha1_pattern)$/; # untaint
876
877                         # Save uncommited local changes to diff file. Attempt to restore later.
878                         my $tempdiffpath = hg_local_dirstate_shelve($sha1);
879
880                         # Clean dir to latest commit.
881                         run_or_die('hg', 'update', '-C');
882
883 Some voodoo is needed here. `hg backout --tool internal:local -r $sha1` is *almost* good, but if the reversion is done to the directly previous revision, hg automatically commits, which is bad in this case. Instead I generate a reverse diff and pipe it to `import --no-commit`.
884
885                         if (run_or_non("hg diff -c $sha1 --reverse | hg import --no-commit -")) {
886                                 if ($tempdiffpath) { hg_local_dirstate_unshelve($tempdiffpath) }
887                                 return undef;
888                         }
889                         else {
890                                 if ($tempdiffpath) { hg_local_dirstate_unshelve($tempdiffpath) }
891                                 return sprintf(gettext("Failed to revert commit %s"), $sha1);
892                         }
893                 }
894
895 Below follows code regarding [[Auto-setup and maintain Mercurial wrapper hooks]]. Will try to solve it in another place later, but the code in itself is working.
896
897 Should perhaps add initiation of the bookmark extension here, to support older Mercurial versions.
898
899                 sub rcs_wrapper_postcall($) {
900                         # Update hgrc if it exists. Change post-commit/incoming hooks with the
901                         # .ikiwiki suffix to point to the wrapper path given in the setup file.
902                         # Work with a tempfile to not delete hgrc if the loop is interrupted
903                         # midway.
904                         # I believe there is a better way to solve this than creating new hooks
905                         # and callbacks. Will await discussion on ikiwiki.info.
906                         my $hgrc=$config{srcdir}.'/.hg/hgrc';
907                         my $backup_suffix='.ikiwiki.bak';
908                         if (-e $hgrc) {
909                                 use File::Spec;
910                                 my $mercurial_wrapper_abspath=File::Spec->rel2abs($config{mercurial_wrapper}, $config{srcdir});
911                                 local ($^I, @ARGV)=($backup_suffix, $hgrc);
912                                 while (<>) {
913                                         s/^(post-commit|incoming)(\.ikiwiki[ \t]*=[ \t]*).*$/$1$2$mercurial_wrapper_abspath/;
914                                         print;
915                                 }
916                                 unlink($hgrc.$backup_suffix);
917                         }
918                 }
919
920                 1