update links
[ikiwiki] / doc / todo / Mercurial_backend_update.mdwn
1 I submitted some changes that added 5 "Yes"es and 2 "Fast"s to Mercurial at [[/rcs]], but some functionality is still missing as compared to e.g. `git.pm`, with which it should be able to be equivalent.
2
3 To do this, a more basic rewrite would simplify things. I inline the complete file below with comments. I don't expect anyone to take the time to read it all at once, but I'd be glad if those interested in the Mercurial backend could do some beta testing.
4
5 * [This specific revision at my hg repo](http://46.239.104.5:81/hg/program/ikiwiki/file/4994ba5e36fa/Plugin/mercurial.pm) ([raw version](http://46.239.104.5:81/hg/program/ikiwiki/raw-file/4994ba5e36fa/Plugin/mercurial.pm)).
6
7 * [My default branch](http://510x.se/hg/program/ikiwiki/file/default/Plugin/mercurial.pm) (where updates will be made, will mention here if anything happens) ([raw version](http://510x.se/hg/program/ikiwiki/raw-file/default/Plugin/mercurial.pm)).
8
9 (I've stripped the `hgrc`-generation from the linked versions, so it should work to just drop them on top of the old `mercurial.pm`).
10
11 I break out my comments from the code to make them more readable. I comment all the changes as compared to current upstream. --[[Daniel Andersson]]
12
13 ---
14
15                 #!/usr/bin/perl
16                 package IkiWiki::Plugin::mercurial;
17
18                 use warnings;
19                 use strict;
20                 use IkiWiki;
21                 use Encode;
22                 use open qw{:utf8 :std};
23
24
25 Pattern to validate hg sha1 sums. hg usually truncates the hash to 12
26 characters and prepends a local revision number for output, but internally
27 it keeps a 40 character hash. Will use the long version in this code.
28
29                 my $sha1_pattern = qr/[0-9a-fA-F]{40}/;
30
31 Message to skip in recent changes
32
33                 my $dummy_commit_msg = 'dummy commit';
34
35 *TODO:* `$hg_dir` not really implemented yet, until a srcdir/repository distinction is
36 made as for e.g. Git. Used in `rcs_receive`, and for attachments in `hg_parse_changes`. See comments in those places, though.
37
38                 my $hg_dir=undef;
39
40                 sub import {
41                         hook(type => "checkconfig", id => "mercurial", call => \&checkconfig);
42                         hook(type => "getsetup", id => "mercurial", call => \&getsetup);
43                         hook(type => "rcs", id => "rcs_update", call => \&rcs_update);
44                         hook(type => "rcs", id => "rcs_prepedit", call => \&rcs_prepedit);
45                         hook(type => "rcs", id => "rcs_commit", call => \&rcs_commit);
46                         hook(type => "rcs", id => "rcs_commit_staged", call => \&rcs_commit_staged);
47                         hook(type => "rcs", id => "rcs_add", call => \&rcs_add);
48                         hook(type => "rcs", id => "rcs_remove", call => \&rcs_remove);
49                         hook(type => "rcs", id => "rcs_rename", call => \&rcs_rename);
50                         hook(type => "rcs", id => "rcs_recentchanges", call => \&rcs_recentchanges);
51                         hook(type => "rcs", id => "rcs_diff", call => \&rcs_diff);
52                         hook(type => "rcs", id => "rcs_getctime", call => \&rcs_getctime);
53                         hook(type => "rcs", id => "rcs_getmtime", call => \&rcs_getmtime);
54                         hook(type => "rcs", id => "rcs_preprevert", call => \&rcs_preprevert);
55                         hook(type => "rcs", id => "rcs_revert", call => \&rcs_revert);
56
57 This last hook is "unsanctioned" from [[Auto-setup and maintain Mercurial wrapper hooks]]. Will try to solve its function
58 another way later.
59
60                         hook(type => "rcs", id => "rcs_wrapper_postcall", call => \&rcs_wrapper_postcall);
61                 }
62
63                 sub checkconfig () {
64                         if (exists $config{mercurial_wrapper} && length $config{mercurial_wrapper}) {
65                                 push @{$config{wrappers}}, {
66                                         wrapper => $config{mercurial_wrapper},
67                                         wrappermode => (defined $config{mercurial_wrappermode} ? $config{mercurial_wrappermode} : "06755"),
68
69 Next line part of [[Auto-setup and maintain Mercurial wrapper hooks]].
70
71                                         wrapper_postcall => (defined $config{mercurial_wrapper_hgrc_update} ? $config{mercurial_wrapper_hgrc_update} : "1"),
72                                 };
73                         }
74                 }
75
76                 sub getsetup () {
77                         return
78                                 plugin => {
79                                         safe => 0, # rcs plugin
80                                         rebuild => undef,
81                                         section => "rcs",
82                                 },
83                                 mercurial_wrapper => {
84                                         type => "string",
85                                         #example => # FIXME add example
86                                         description => "mercurial post-commit hook to generate",
87                                         safe => 0, # file
88                                         rebuild => 0,
89                                 },
90                                 mercurial_wrappermode => {
91                                         type => "string",
92                                         example => '06755',
93                                         description => "mode for mercurial_wrapper (can safely be made suid)",
94                                         safe => 0,
95                                         rebuild => 0,
96                                 },
97                                 mercurial_wrapper_hgrc_update => {
98                                         type => "string",
99                                         example => "1",
100                                         description => "updates existing hgrc to reflect path changes for mercurial_wrapper",
101                                         safe => 0,
102                                         rebuild => 0,
103                                 },
104                                 historyurl => {
105                                         type => "string",
106                                         example => "http://example.com:8000/log/tip/\[[file]]",
107                                         description => "url to hg serve'd repository, to show file history (\[[file]] substituted)",
108                                         safe => 1,
109                                         rebuild => 1,
110                                 },
111                                 diffurl => {
112                                         type => "string",
113                                         example => "http://localhost:8000/?fd=\[[r2]];file=\[[file]]",
114                                         description => "url to hg serve'd repository, to show diff (\[[file]] and \[[r2]] substituted)",
115                                         safe => 1,
116                                         rebuild => 1,
117                                 },
118                 }
119
120                 sub safe_hg (&@) {
121                         # Start a child process safely without resorting to /bin/sh.
122                         # Returns command output (in list content) or success state
123                         # (in scalar context), or runs the specified data handler.
124
125                         my ($error_handler, $data_handler, @cmdline) = @_;
126
127                         my $pid = open my $OUT, "-|";
128
129                         error("Cannot fork: $!") if !defined $pid;
130
131                         if (!$pid) {
132                                 # In child.
133                                 # hg commands want to be in wc.
134
135 This `$hg_dir` logic means nothing and could be stripped until srcdir/repdir distinction is made (it's stripped in upstream `mercurial.pm` right now).
136
137                                 if (! defined $hg_dir) {
138                                         chdir $config{srcdir}
139                                             or error("cannot chdir to $config{srcdir}: $!");
140                                 }
141                                 else {
142                                         chdir $hg_dir or error("cannot chdir to $hg_dir: $!");
143                                 }
144
145                                 exec @cmdline or error("Cannot exec '@cmdline': $!");
146                         }
147                         # In parent.
148
149                         my @lines;
150                         while (<$OUT>) {
151                                 chomp;
152
153                                 if (! defined $data_handler) {
154                                         push @lines, $_;
155                                 }
156                                 else {
157                                         last unless $data_handler->($_);
158                                 }
159                         }
160
161                         close $OUT;
162
163                         $error_handler->("'@cmdline' failed: $!") if $? && $error_handler;
164
165                         return wantarray ? @lines : ($? == 0);
166                 }
167                 # Convenient wrappers.
168                 sub run_or_die ($@) { safe_hg(\&error, undef, @_) }
169                 sub run_or_cry ($@) { safe_hg(sub { warn @_ }, undef, @_) }
170                 sub run_or_non ($@) { safe_hg(undef, undef, @_) }
171
172
173 To handle uncommited local changes ("ULC"s for short), I use logic similar to the (non-standard) "shelve" extension to Mercurial. By taking a diff before resetting to last commit, making changes and then applying diff again, one can do things Mercurial otherwise refuses, which is necessary later.
174
175 This function creates this diff.
176
177                 sub hg_local_dirstate_shelve ($) {
178                         # Creates a diff snapshot of uncommited changes existing the srcdir.
179                         # Takes a string (preferably revision) as input to create a unique and
180                         # identifiable diff name.
181                         my $tempdiffname = "diff_".shift;
182                         my $tempdiffpath;
183                         if (my @tempdiff = run_or_die('hg', 'diff', '-g')) {
184                                 $"="\n";
185                                 writefile($tempdiffname, $config{srcdir},
186                                                 "@tempdiff");
187                                 $"=" ";
188                                 $tempdiffpath = $config{srcdir}.'/'.$tempdiffname;
189                         }
190                         return $tempdiffpath;
191                 }
192
193 This function restores the diff.
194
195                 sub hg_local_dirstate_unshelve ($) {
196                         # Applies diff snapshot to revert back to initial dir state. If diff
197                         # revert succeeds, the diff is removed. Otherwise it stays to not
198                         # eradicate the local changes if they were important. This clutters the
199                         # directory though. Better ways to handle this are welcome. A true way
200                         # around this dance is to have a separate repository for local changes
201                         # and push ready commits to the srcdir instead.
202                         if (my $tempdiffpath = shift) {
203                                 if (run_or_cry('hg', 'import', '--no-commit', $tempdiffpath)) {
204                                         unlink($tempdiffpath);
205                                         return undef;
206                                 }
207                         }
208                 }
209
210 This makes online diffing possible. A similar approach as in `git.pm`, which is [discussed to some length in a comment there](http://source.ikiwiki.branchable.com/?p=source.git;a=blob;f=IkiWiki/Plugin/git.pm;h=cf7fbe9b7c43ee53180612d0411e6202074fb9e0;hb=refs/heads/master#l211), is taken.
211
212                 sub merge_past ($$$) {
213                         my ($sha1, $file, $message) = @_;
214
215                         # Undo stack for cleanup in case of an error
216                         my @undo;
217                         # File content with conflict markers
218                         my $conflict;  
219                         my $tempdiffpath; 
220
221                         eval {
222                                 # Hide local changes from Mercurial by renaming the modified
223                                 # file.  Relative paths must be converted to absolute for
224                                 # renaming.
225                                 my ($target, $hidden) = (
226                                         "$config{srcdir}/${file}",
227                                         "$config{srcdir}/${file}.${sha1}"
228                                 );
229                                 rename($target, $hidden)
230                                         or error("rename '$target' to '$hidden' failed: $!");
231                                 # Ensure to restore the renamed file on error.
232                                 push @undo, sub {
233                                         return if ! -e "$hidden"; # already renamed
234                                         rename($hidden, $target)
235                                             or warn "rename '$hidden' to '$target' failed: $!";
236                                 };
237
238
239 Take a snapshot of srcdir to be able to restore uncommited local changes ("ULCs") afterwards.
240
241 * This must happen _after_ the merging commit in Mercurial, there is no way around it. By design hg refuses to commit merges if there are other changes to tracked content present, no matter how much  you beg.
242
243 * ULCs to the file being edited are special: they can't be diffed here since `editpage.pm` already has overwritten the file. When the web edit session started though, the ULC version (not the commited
244 version) was read into the form, so in a way, the web user _has already merged_ with the ULC. It is not saved in commit history, but that is the exact consequence of "uncommited" changes. If an ULC is done between the time the web edit started and was submitted, then it is lost, though.  All in all, one shouldn't be editing the srcdir directly when web edits of the same file are allowed. Clone the repo and push changes instead.
245
246 Much of these issues disappear, I believe, if one works with a master repo which only is pushed to.
247
248                                 my $tempdiffpath = hg_local_dirstate_shelve($sha1);
249
250                                 # Ensure uniqueness of bookmarks.
251                                 my $bookmark_upstream_head = "current_head_$sha1";
252                                 my $bookmark_edit_base = "edit_base_$sha1";
253
254                                 # Git and Mercurial differ in the branch concept. Mercurial's
255                                 # "bookmarks" are closer in function in this regard.
256
257 Bookmarks aren't standard until Mercurial 1.8 ([2011--02--10](http://selenic.com/hg/rev/d4ab9486e514)), but they've been bundled with Mercurial since ~2008, so they can be enabled by writing a `hgrc`, which is also being worked on.
258
259                                 # Create a bookmark at current tip.
260                                 push @undo, sub { run_or_cry('hg', 'bookmark', '--delete',
261                                                 $bookmark_upstream_head) };
262                                 run_or_die('hg', 'bookmark', $bookmark_upstream_head);
263
264                                 # Create a bookmark at the revision from which the edit was
265                                 # started and switch to it, discarding changes (they are stored
266                                 # in $tempdiff and the hidden file at the moment).
267                                 push @undo, sub { run_or_cry('hg', 'bookmark', '--delete',
268                                                 $bookmark_edit_base) };
269                                 run_or_die('hg', 'bookmark', '-r', $sha1, $bookmark_edit_base);
270                                 run_or_die('hg', 'update', ,'-C', $bookmark_edit_base);
271
272                                 # Reveal the modified file.
273                                 rename($hidden, $target)
274                                     or error("rename '$hidden' to '$target' failed: $!");
275
276                                 # Commit at the bookmarked revision, creating a new head.
277                                 run_or_cry('hg', 'commit', '-m', $message);
278
279                                 # Attempt to merge the newly created head with upstream head.
280                                 # '--tool internal:merge' to avoid spawning a GUI merger.
281
282 (*Semi-TODO:* How do you make this command quiet? On failed merge, it
283 always writes to STDERR and clutters the web server log.)
284
285                                 if (!run_or_non('hg', 'merge', '--tool', 'internal:merge',
286                                                 $bookmark_upstream_head)) {
287                                         # ..., otherwise return file with conflict markers.
288                                         $conflict = readfile($target);
289
290                                         # The hardcore reset approach. Keep your hands inside
291                                         # the cart.
292                                         run_or_die('hg', 'rollback');
293                                         run_or_die('hg', 'update', '-C',
294                                                 $bookmark_upstream_head);
295                                         if ($tempdiffpath) {
296                                                 hg_local_dirstate_unshelve($tempdiffpath);
297                                         }
298
299 Other approaches tried here:
300
301 1. Clean up merge attempt,
302
303         run_or_die('hg', 'update', '-C', $bookmark_upstream_head);
304
305 2. Redo "merge", using only upstream head versions,
306
307         run_or_die('hg', 'merge', '--tool', 'internal:local', $bookmark_edit_base);
308
309 3. dummy commit to close head.
310
311         run_or_non('hg', 'commit', '-m', $message);
312
313 This creates a cluttered and erroneous history. We
314 tell Mercurial to merge, even though we in practice
315 discard. This creates problems when trying to revert
316 changes.
317
318 Other attempt:
319
320 1. Discard merge attempt and switch to temp head,
321
322         run_or_die('hg', 'update', '-C', $bookmark_edit_base);
323
324 2. close the temp head (why do they call the command that in practice closes heads "--close-branch"?),
325
326         run_or_non('hg', 'commit', '--close-branch', '-m', $message);
327
328 3. restore working directory to pre-fiddling status.
329
330         run_or_die('hg', 'update', $bookmark_upstream_head);
331
332 ...but this requires the same amount of forks as the
333 above method, and confuses other parts of ikiwiki
334 since the upstream head is now the third newest
335 revision. Maybe that particular problem is solvable
336 by setting a global default bookmark that follows the
337 main tip.  It will leave clutter in the revision
338 history, though. Two extra commits that in practice
339 don't hold relevant information will be recorded for
340 each failed merge attempt.
341
342 To only create one extra commit, one could imagine
343 adding `--close-branch` to the commit that initially
344 created the new head (since there is no problem
345 merging with closed heads), but it's not possible to
346 close and create a head at the same time, apparently.
347
348                                 }
349                         };
350                         my $failure = $@;
351
352                         # Process undo stack (in reverse order). By policy, cleanup actions
353                         # should normally print a warning on failure.
354                         while (my $handle = pop @undo) {
355                                 $handle->();
356                         }
357
358                         error("Mercurial merge failed!\n$failure\n") if $failure;
359
360                         return ($conflict, $tempdiffpath);
361                 }
362
363                 sub hg_commit_info ($;$;$) {
364                         # Return an array of commit info hashes of num commits starting from
365                         # the given sha1sum.
366                         #
367 This could be optimized by using a lookup cache similar to
368 `findtimes()`. By adding `KeyAttr => ['node']` to `XMLin()` options, one
369 could use the revision ID as key and do a single massive history
370 lookup and later just check if the given revision already exists as a
371 key.  Right now I'm at the "don't optimize it yet" stage, though.
372
373 This uses Mercurial's built-in `--style xml` and parses it with `XML::Simple`. Mercurial's log output is otherwise somewhat cumbersome to get good stuff out of, so this XML solution is quite good, I think. It adds module dependency, but XML::Simple seems fairly standard (but what do I know, I've used 1 Perl installation in my life).
374
375                         use XML::Simple;
376                         use Date::Parse;
377
378                         my ($sha1, $num, $file) = @_;
379
380                         my @opts;
381                         if (defined $sha1) {
382                                 if ($sha1 =~ m/^($sha1_pattern)$/) {
383                                         push @opts, ('-r'. $1.':0');
384                                 }
385                                 elsif ($sha1 =~ m/^($sha1_pattern):($sha1_pattern)$/) {
386                                         push @opts, ('-r', $1.':'.$2);
387                                 }
388                         }
389                         push @opts, ('--limit', $num) if defined $num;
390                         push @opts, ('--', $file) if defined $file;
391
392                         my %env=%ENV;
393                         $ENV{HGENCODING} = 'utf-8';
394                         my @xml = run_or_cry('hg', 'log', '-v', '--style', 'xml', @opts);
395                         %ENV=%env;
396
397                         # hg returns empty string if file is not in repository.
398                         return undef if !@xml;
399
400 Some places it is clear that I'm coding ad-hoc Perl. I don't know if this is a reasonably efficient way to give input to `XMLin`, but it works.
401
402                         # Want to preserve linebreaks in multiline comments.
403                         $"="\n";
404                         my $xmllog = XMLin("@xml",
405                                 ForceArray => ['logentry', 'parent', 'copy', 'path']);
406                         $"=" ";
407
408                         my @c_infos;
409                         foreach my $rev (@{$xmllog->{logentry}}) {
410                                 my %c_info;
411                                 # In Mercurial, "rev" is technically the strictly local
412                                 # revision number.  What ikiwiki wants is what is called
413                                 # "node": a globally defined SHA1 checksum.
414                                 $c_info{rev} = $rev->{node};
415                                 foreach my $parent (@{$rev->{parent}}) {
416                                         push @{$c_info{parents}}, {rev => $parent->{node}};
417                                 }
418                                 $c_info{user} = $rev->{author}{content};
419                                 # Mercurial itself parses out and stores an email address if
420                                 # present in author name. If not, hg sets email to author name.
421                                 if ( $rev->{author}{content} ne $rev->{author}{email} &&
422                                         $rev->{author}{email} =~ m/^([^\@]+)\@(.*)$/ ) {
423                                         if ($2 eq "web") {
424                                                 $c_info{nickname} = $1;
425                                                 $c_info{web_commit} = "1";
426                                         }
427                                 }
428                                 # Mercurial gives date in ISO 8601, well handled by str2time().
429                                 $c_info{when} = str2time($rev->{date});
430                                 # Mercurial doesn't allow empty commit messages, so there
431                                 # should always be a single defined message.
432                                 $c_info{message} = $rev->{msg}{content};
433                                 # Inside "paths" sits a single array "path" that contains
434                                 # multiple paths. Crystal clear :-)
435                                 foreach my $path (@{$rev->{paths}{path}}) {
436                                         push @{$c_info{files}}, {
437                                                 # Mercurial doesn't track file permissions as
438                                                 # Git do, so that's missing here.
439                                                 'file' => $path->{content},
440                                                 'status' => $path->{action},
441                                         };
442                                 }
443                                 # There also exists an XML branch "copies"->"copy", containing
444                                 # source and dest of files that have been copied with "hg cp".
445                                 # The copy action is also registered in "paths" as a removal of
446                                 # source and addition of dest, so it's not needed here.
447                                 push @c_infos, {%c_info};
448                                 use Data::Dumper;
449                         }
450
451                         return wantarray ? @c_infos : $c_infos[0];
452                 }
453
454                 sub hg_sha1 (;$) {
455                         # Return head sha1sum (of given file).
456                         my $file = shift || q{--};
457
458                         # Non-existing file doesn't give error, just empty string.
459                         my $f_info = hg_commit_info(undef, 1, $file);
460                         my $sha1;
461                         if ($f_info->{rev}) {
462                                 ($sha1) = $f_info->{rev} =~ m/($sha1_pattern)/;
463                         }
464                         else {
465                                 debug("Empty sha1sum for '$file'.");
466                         }
467                         return defined $sha1 ? $sha1 : q{};
468                 }
469
470                 sub rcs_update () {
471                         run_or_cry('hg', '-q', 'update');
472                 }
473
474                 sub rcs_prepedit ($) {
475                         # Return the commit sha1sum of the file when editing begins.
476                         # This will be later used in rcs_commit if a merge is required.
477                         my ($file) = @_;
478
479                         return hg_sha1($file);
480                 }
481
482                 sub rcs_commit (@) {
483                         # Try to commit the page; returns undef on _success_ and
484                         # a version of the page with the rcs's conflict markers on
485                         # failure.
486                         my %params=@_;
487
488                         # Check to see if the page has been changed by someone else since
489                         # rcs_prepedit was called.
490                         my $cur    = hg_sha1($params{file});
491                         my ($prev) = $params{token} =~ /^($sha1_pattern)$/; # untaint
492
493                         if (defined $cur && defined $prev && $cur ne $prev) {
494
495 If there was a conflict, the file with conflict markers is returned. Else, the path to the tempdiff, which is to be run to restore previous local state after `rcs_commit_staged`, is returned.
496
497                                 my ($conflict, $tempdiffpath) =
498                                         merge_past($prev, $params{file}, $dummy_commit_msg);
499                                 return defined $conflict
500                                         ? $conflict
501                                         : rcs_commit_helper(
502                                                 @_,
503                                                 merge => 1,
504                                                 tempdiffpath => $tempdiffpath);
505                         }
506
507                         return rcs_commit_helper(@_);
508                 }
509
510                 sub rcs_commit_helper (@) {
511                         my %params=@_;
512
513                         my %env=%ENV;
514                         $ENV{HGENCODING} = 'utf-8';
515
516                         my $user="Anonymous";
517                         my $nickname;
518                         if (defined $params{session}) {
519                                 if (defined $params{session}->param("name")) {
520                                         $user = $params{session}->param("name");
521                                 }
522                                 elsif (defined $params{session}->remote_addr()) {
523                                         $user = $params{session}->remote_addr();
524                                 }
525
526                                 if (defined $params{session}->param("nickname")) {
527                                         $nickname=encode_utf8($params{session}->param("nickname"));
528                                         $nickname=~s/\s+/_/g;
529                                         $nickname=~s/[^-_0-9[:alnum:]]+//g;
530                                 }
531                                 $ENV{HGUSER} = encode_utf8($user . ' <' . $nickname . '@web>');
532                         }
533
534                         if (! length $params{message}) {
535                                 $params{message} = "no message given";
536                         }
537
538                         $params{message} = IkiWiki::possibly_foolish_untaint($params{message});
539
540                         my @opts;
541
542 Mercurial rejects file arguments when performing a merging commit. It
543 only does "all or nothing" commits by design when merging, so given file arguments must be discarded. It should not pose a problem.
544
545                         if (exists $params{file} && ! defined $params{merge}) {
546                                 push @opts, '--', $params{file};
547                         }
548
549                         # hg commit returns non-zero if nothing really changed.
550                         # So we should ignore its exit status (hence run_or_non).
551                         run_or_non('hg', 'commit', '-m', $params{message}, '-q', @opts);
552
553 If there were uncommited local changes in srcdir before a merge was done, they are restored here.
554
555                         if (defined $params{tempdiffpath}) {
556                                 hg_local_dirstate_unshelve($params{tempdiffpath});
557                         }
558
559                         %ENV=%env;
560                         return undef; # success
561                 }
562
563                 sub rcs_commit_staged (@) {
564                         # Commits all staged changes. Changes can be staged using rcs_add,
565                         # rcs_remove, and rcs_rename.
566                         return rcs_commit_helper(@_);
567                 }
568
569                 sub rcs_add ($) {
570                         my ($file) = @_;
571
572                         run_or_cry('hg', 'add', $file);
573                 }
574
575                 sub rcs_remove ($) {
576                         # Remove file from archive.
577                         my ($file) = @_;
578
579                         run_or_cry('hg', 'remove', '-f', $file);
580                 }
581
582                 sub rcs_rename ($$) {
583                         my ($src, $dest) = @_;
584
585                         run_or_cry('hg', 'rename', '-f', $src, $dest);
586                 }
587
588                 sub rcs_recentchanges ($) {
589                         my ($num) = @_;
590
591                         my @c_infos;
592
593                         foreach my $c_info (hg_commit_info(undef, $num, undef)) {
594                                 my @pagenames;
595                                 for my $page (@{$c_info->{files}}) {
596                                         my $diffurl=defined $config{diffurl} ?
597                                                 $config{diffurl} : '';
598                                         # These substitutions enable defining keywords \[[file]]
599                                         # and \[[r2]] (backward compatibility) in the setup file
600                                         # that will be exchanged with filename and revision
601                                         # respectively.
602                                         $diffurl =~ s/\[\[file\]\]/$page->{file}/go;
603                                         $diffurl =~ s/\[\[r2\]\]/$c_info->{rev}/go;
604                                         push @pagenames, {
605                                                 # pagename() strips suffixes and returns the
606                                                 # path to the file as it is to be represented
607                                                 # in the build dir.
608                                                 page => pagename($page->{file}),
609                                                 diffurl => $diffurl,
610                                         };
611                                 }
612
613                                 # It is expected of ikiwiki to get each comment line as a
614                                 # separate entry.
615                                 my @messagelines;
616                                 open my $message, '<', \$c_info->{message};
617                                 while (<$message>) { push @messagelines, { line => $_ } };
618
619                                 push @c_infos, {
620                                         rev        => $c_info->{rev},
621                                         user       => $c_info->{user},
622                                         nickname   => defined $c_info->{nickname} ?
623                                                         $c_info->{nickname} : $c_info->{user},
624                                         committype => $c_info->{web_commit} ? "web" : "hg",
625                                         when       => $c_info->{when},
626                                         message    => [@messagelines],
627                                         pages      => [@pagenames],
628                                 } if @pagenames;
629                         }
630
631                         return @c_infos;
632                 }
633
634                 sub rcs_diff ($;$) {
635                         my $rev=shift;
636                         my $maxlines=shift;
637                         my @lines;
638                         my $addlines=sub {
639                                 my $line=shift;
640                                 return if defined $maxlines && @lines == $maxlines;
641                                 push @lines, $line."\n"
642                                         if (@lines || $line=~/^diff --git/);
643                                 return 1;
644                         };
645                         safe_hg(undef, $addlines, "hg", "diff", "-c", $rev, "-g");
646                         if (wantarray) {
647                                 return @lines;
648                         }
649                         else {
650                                 return join("", @lines);
651                         }
652                 }
653
654                 {
655                 my %time_cache;
656
657 This is an upstream change I did a week ago or so. Perhaps it can be merged in some clever way with the updated `hg_commit_info` to make one shared lookup cache. Don't know how much would be gained.
658
659                 sub findtimes ($$) {
660                         my $file=shift;
661                         my $id=shift; # 0 = mtime ; 1 = ctime
662
663                         if (! keys %time_cache) {
664                                 my $date;
665
666                                 # It doesn't seem possible to specify the format wanted for the
667                                 # changelog (same format as is generated in git.pm:findtimes(),
668                                 # though the date differs slightly) without using a style
669                                 # _file_. There is a "hg log" switch "--template" to directly
670                                 # control simple output formatting, but in this case, the
671                                 # {file} directive must be redefined, which can only be done
672                                 # with "--style".
673                                 #
674                                 # If {file} is not redefined, all files are output on a single
675                                 # line separated with a space. It is not possible to conclude
676                                 # if the space is part of a filename or just a separator, and
677                                 # thus impossible to use in this case.
678                                 # 
679                                 # Some output filters are available in hg, but they are not fit
680                                 # for this cause (and would slow down the process
681                                 # unnecessarily).
682                                 
683                                 eval q{use File::Temp};
684                                 error $@ if $@;
685                                 my ($tmpl_fh, $tmpl_filename) = File::Temp::tempfile(UNLINK => 1);
686                                 
687                                 print $tmpl_fh 'changeset = "{date}\\n{files}\\n"' . "\n";
688                                 print $tmpl_fh 'file = "{file}\\n"' . "\n";
689                                 
690                                 foreach my $line (run_or_die('hg', 'log', '--style', $tmpl_filename)) {
691                                         # {date} gives output on the form
692                                         # 1310694511.0-7200
693                                         # where the first number is UTC Unix timestamp with one
694                                         # decimal (decimal always 0, at least on my system)
695                                         # followed by local timezone offset from UTC in
696                                         # seconds.
697                                         if (! defined $date && $line =~ /^\d+\.\d[+-]\d*$/) {
698                                                 $line =~ s/^(\d+).*/$1/;
699                                                 $date=$line;
700                                         }
701                                         elsif (! length $line) {
702                                                 $date=undef;
703                                         }
704                                         else {
705                                                 my $f=$line;
706
707                                                 if (! $time_cache{$f}) {
708                                                         $time_cache{$f}[0]=$date; # mtime
709                                                 }
710                                                 $time_cache{$f}[1]=$date; # ctime
711                                         }
712                                 }
713                         }
714
715                         return exists $time_cache{$file} ? $time_cache{$file}[$id] : 0;
716                 }
717
718                 }
719
720                 sub rcs_getctime ($) {
721                         my $file = shift;
722
723                         return findtimes($file, 1);
724                 }
725
726                 sub rcs_getmtime ($) {
727                         my $file = shift;
728
729                         return findtimes($file, 0);
730                 }
731
732 The comment just below the function declaration below is taken from `git.pm`. Is it true? Should ikiwiki support sharing its repo with other things? Mercurial-wise that sounds like a world of pain.
733
734                 {
735                 my $ret;
736                 sub hg_find_root {
737                         # The wiki may not be the only thing in the git repo.
738                         # Determine if it is in a subdirectory by examining the srcdir,
739                         # and its parents, looking for the .git directory.
740
741                         return @$ret if defined $ret;
742
743                         my $subdir="";
744                         my $dir=$config{srcdir};
745                         while (! -d "$dir/.hg") {
746                                 $subdir=IkiWiki::basename($dir)."/".$subdir;
747                                 $dir=IkiWiki::dirname($dir);
748                                 if (! length $dir) {
749                                         error("cannot determine root of hg repo");
750                                 }
751                         }
752
753                         $ret=[$subdir, $dir];
754                         return @$ret;
755                 }
756
757                 }
758
759                 sub hg_parse_changes (@) {
760                         # Only takes a single info hash as argument in rcs_preprevert, but
761                         # should be able to take several in rcs_receive.
762                         my @c_infos_raw = shift;
763
764                         my ($subdir, $rootdir) = hg_find_root();
765                         my @c_infos_ret;
766
767                         foreach my $c_info_raw (@c_infos_raw) {
768                                 foreach my $path (@{$c_info_raw->{files}}) {
769                                         my ($file, $action, $temppath);
770
771                                         $file=$path->{file};
772
773                                         # check that all changed files are in the subdir
774                                         if (length $subdir && ! ($file =~ s/^$subdir//)) {
775                                                 error sprintf(gettext("you are not allowed to change %s"), $file);
776                                         }
777
778                                         if    ($path->{status} eq "M") { $action="change" }
779                                         elsif ($path->{status} eq "A") { $action="add" }
780                                         elsif ($path->{status} eq "R") { $action="remove" }
781                                         else  { error "unknown status ".$path->{status} }
782
783 I haven't tested the attachment code below. Is it run when there is an non-trusted file upload?
784
785                                         # extract attachment to temp file
786                                         if (($action eq 'add' || $action eq 'change') &&
787                                                 ! pagetype($file)) {
788
789                                                 eval q{use File::Temp};
790                                                 die $@ if $@;
791
792                                                 my $fh;
793                                                 ($fh, $temppath)=File::Temp::tempfile(undef, UNLINK => 1);
794                                                 my $cmd = "cd $hg_dir && ".
795                                                         "hg diff -g -c $c_info_raw->{rev} > '$temppath'";
796                                                 if (system($cmd) != 0) {
797                                                         error("failed writing temp file '$temppath'.");
798                                                 }
799                                         }
800
801                                         push @c_infos_ret, {
802                                                 file => $file,
803                                                 action => $action,
804                                                 path => $temppath,
805                                         };
806                                 }
807                         }
808
809                         return @c_infos_ret;
810                 }
811
812 *TODO:* I don't know what's happening here. I've changed the code to adhere to this file's variables and functions, but it refers to a srcdir _and_ a default repo, which currently isn't available in the Mercurial setup.
813
814 `rcs_receive` is optional and only runs when running a pre-receive hook. Where `$_` comes from and its format are mysteries to me.
815
816 Also, a comment in `git.pm` mentions that we don't want to chdir to a subdir "and only see changes in it" - but this isn't true for either Git or Mercurial to my knowledge. It only seems to happen in `git.pm` since the `git log` command in `git_commit_info` ends with "`-- .`" - if it didn't do that, one wouldn't have to chdir for this reason, I believe.
817
818 In this case we need to stay in default repo instead of srcdir though, so `hg_dir="."` _is_ needed, but not for the abovementioned reason :-) (maybe there's more to it, though).
819
820                 sub rcs_receive () {
821                         my @c_infos_ret;
822                         while (<>) {
823                                 chomp;
824                                 my ($oldrev, $newrev, $refname) = split(' ', $_, 3);
825
826                                 # only allow changes to hg_default_branch
827
828 *TODO:* What happens here? Some Git voodoo. _If_ `$_` has the exact same format for Mercurial, then the below should work just as well here, I think.
829
830                                 if ($refname !~ m|^refs/heads/$config{hg_default_branch}$|) {
831                                         error sprintf(gettext("you are not allowed to change %s"), $refname);
832                                 }
833
834 Comment from `git.pm`:
835
836                                 # Avoid chdir when running git here, because the changes are in
837                                 # the default git repo, not the srcdir repo.  (Also, if a subdir
838                                 # is involved, we don't want to chdir to it and only see
839                                 # changes in it.) The pre-receive hook already puts us in the
840                                 # right place.
841                                 $hg_dir=".";
842                                 push @c_infos_ret,
843                                         hg_parse_changes(hg_commit_info($newrev.":".$oldrev,
844                                                         undef, undef));
845                                 $hg_dir=undef;
846                         }
847
848                         return @c_infos_ret;
849                 }
850
851                 sub rcs_preprevert ($) {
852                         my $rev=shift;
853                         my ($sha1) = $rev =~ /^($sha1_pattern)$/; # untaint
854
855 The below 4 lines of code are from `git.pm`, but I can't see what they actually do there. Neither Git nor Mercurial only lists changes in working directory when given a command - they always traverse to repository root by themselves. I keep it here for comments, in case I'm missing something.
856
857 *UPDATE:* See earlier note about `git log` ending in "`-- .`".
858
859                         ## Examine changes from root of git repo, not from any subdir,
860                         ## in order to see all changes.
861                         #my ($subdir, $rootdir) = git_find_root();
862                         #$git_dir=$rootdir;
863
864                         my $c_info=hg_commit_info($sha1, 1, undef) or error "unknown commit";
865
866                         # hg revert will fail on merge commits. Add a nice message.
867                         if (exists $c_info->{parents} && $c_info->{parents} > 1) {
868                                 error gettext("you are not allowed to revert a merge");
869                         }
870
871                         my @c_info_ret=hg_parse_changes($c_info);
872
873                         ### Probably not needed, if earlier comment is correct.
874                         #$hg_dir=undef;
875                         return @c_info_ret;
876                 }
877
878                 sub rcs_revert ($) {
879                         # Try to revert the given rev; returns undef on _success_.
880                         my $rev = shift;
881                         my ($sha1) = $rev =~ /^($sha1_pattern)$/; # untaint
882
883                         # Save uncommited local changes to diff file. Attempt to restore later.
884                         my $tempdiffpath = hg_local_dirstate_shelve($sha1);
885
886                         # Clean dir to latest commit.
887                         run_or_die('hg', 'update', '-C');
888
889 Some voodoo is needed here. `hg backout --tool internal:local -r $sha1` is *almost* good, but if the reversion is done to the directly previous revision, hg automatically commits, which is bad in this case. Instead I generate a reverse diff and pipe it to `import --no-commit`.
890
891                         if (run_or_non("hg diff -c $sha1 --reverse | hg import --no-commit -")) {
892                                 if ($tempdiffpath) { hg_local_dirstate_unshelve($tempdiffpath) }
893                                 return undef;
894                         }
895                         else {
896                                 if ($tempdiffpath) { hg_local_dirstate_unshelve($tempdiffpath) }
897                                 return sprintf(gettext("Failed to revert commit %s"), $sha1);
898                         }
899                 }
900
901 Below follows code regarding [[Auto-setup and maintain Mercurial wrapper hooks]]. Will try to solve it in another place later, but the code in itself is working.
902
903 Should perhaps add initiation of the bookmark extension here, to support older Mercurial versions.
904
905                 sub rcs_wrapper_postcall($) {
906                         # Update hgrc if it exists. Change post-commit/incoming hooks with the
907                         # .ikiwiki suffix to point to the wrapper path given in the setup file.
908                         # Work with a tempfile to not delete hgrc if the loop is interrupted
909                         # midway.
910                         # I believe there is a better way to solve this than creating new hooks
911                         # and callbacks. Will await discussion on ikiwiki.info.
912                         my $hgrc=$config{srcdir}.'/.hg/hgrc';
913                         my $backup_suffix='.ikiwiki.bak';
914                         if (-e $hgrc) {
915                                 use File::Spec;
916                                 my $mercurial_wrapper_abspath=File::Spec->rel2abs($config{mercurial_wrapper}, $config{srcdir});
917                                 local ($^I, @ARGV)=($backup_suffix, $hgrc);
918                                 while (<>) {
919                                         s/^(post-commit|incoming)(\.ikiwiki[ \t]*=[ \t]*).*$/$1$2$mercurial_wrapper_abspath/;
920                                         print;
921                                 }
922                                 unlink($hgrc.$backup_suffix);
923                         }
924                 }
925
926                 1