added links to my repo
[ikiwiki] / doc / todo / Mercurial_backend_update.mdwn
1 I submitted some changes that added 5 "Yes"es and 2 "Fast"s to Mercurial at [[/rcs]], but some functionality is still missing as compared to e.g. `git.pm`, with which it should be able to be equivalent.
2
3 To do this, a more basic rewrite would simplify things. I inline the complete file below with comments. I don't expect anyone to take the time to read it all at once, but I'd be glad if those interested in the Mercurial backend could do some beta testing.
4
5 * [This specific revision at my hg repo](http://46.239.104.5:81/hg/program/ikiwiki/file/4994ba5e36fa/Plugin/mercurial.pm) ([raw version](http://46.239.104.5:81/hg/program/ikiwiki/raw-file/4994ba5e36fa/Plugin/mercurial.pm)).
6
7 * [My default branch](http://510x.se/hg/program/ikiwiki/file/default/Plugin/mercurial.pm) (where updates will be made, will mention here if anything happens) ([raw version](http://510x.se/hg/program/ikiwiki/raw-file/default/Plugin/mercurial.pm)).
8
9 I break out my comments from the code to make them more readable. I comment all the changes as compared to current upstream. --[[Daniel Andersson]]
10
11 ---
12
13                 #!/usr/bin/perl
14                 package IkiWiki::Plugin::mercurial;
15
16                 use warnings;
17                 use strict;
18                 use IkiWiki;
19                 use Encode;
20                 use open qw{:utf8 :std};
21
22
23 Pattern to validate hg sha1 sums. hg usually truncates the hash to 12
24 characters and prepends a local revision number for output, but internally
25 it keeps a 40 character hash. Will use the long version in this code.
26
27                 my $sha1_pattern = qr/[0-9a-fA-F]{40}/;
28
29 Message to skip in recent changes
30
31                 my $dummy_commit_msg = 'dummy commit';
32
33 *TODO:* $hg_dir not really implemented yet, until a srcdir/repository distinction is
34 made as for e.g. Git. Used in `rcs_receive`, and for attachments in `hg_parse_changes`. See comments in those places, though.
35
36                 my $hg_dir=undef;
37
38                 sub import {
39                         hook(type => "checkconfig", id => "mercurial", call => \&checkconfig);
40                         hook(type => "getsetup", id => "mercurial", call => \&getsetup);
41                         hook(type => "rcs", id => "rcs_update", call => \&rcs_update);
42                         hook(type => "rcs", id => "rcs_prepedit", call => \&rcs_prepedit);
43                         hook(type => "rcs", id => "rcs_commit", call => \&rcs_commit);
44                         hook(type => "rcs", id => "rcs_commit_staged", call => \&rcs_commit_staged);
45                         hook(type => "rcs", id => "rcs_add", call => \&rcs_add);
46                         hook(type => "rcs", id => "rcs_remove", call => \&rcs_remove);
47                         hook(type => "rcs", id => "rcs_rename", call => \&rcs_rename);
48                         hook(type => "rcs", id => "rcs_recentchanges", call => \&rcs_recentchanges);
49                         hook(type => "rcs", id => "rcs_diff", call => \&rcs_diff);
50                         hook(type => "rcs", id => "rcs_getctime", call => \&rcs_getctime);
51                         hook(type => "rcs", id => "rcs_getmtime", call => \&rcs_getmtime);
52                         hook(type => "rcs", id => "rcs_preprevert", call => \&rcs_preprevert);
53                         hook(type => "rcs", id => "rcs_revert", call => \&rcs_revert);
54
55 This last hook is "unsanctioned" from [[Auto-setup and maintain Mercurial wrapper hooks]]. Will try to solve its function
56 another way later.
57
58                         hook(type => "rcs", id => "rcs_wrapper_postcall", call => \&rcs_wrapper_postcall);
59                 }
60
61                 sub checkconfig () {
62                         if (exists $config{mercurial_wrapper} && length $config{mercurial_wrapper}) {
63                                 push @{$config{wrappers}}, {
64                                         wrapper => $config{mercurial_wrapper},
65                                         wrappermode => (defined $config{mercurial_wrappermode} ? $config{mercurial_wrappermode} : "06755"),
66
67 Next line part of [[Auto-setup and maintain Mercurial wrapper hooks]].
68
69                                         wrapper_postcall => (defined $config{mercurial_wrapper_hgrc_update} ? $config{mercurial_wrapper_hgrc_update} : "1"),
70                                 };
71                         }
72                 }
73
74                 sub getsetup () {
75                         return
76                                 plugin => {
77                                         safe => 0, # rcs plugin
78                                         rebuild => undef,
79                                         section => "rcs",
80                                 },
81                                 mercurial_wrapper => {
82                                         type => "string",
83                                         #example => # FIXME add example
84                                         description => "mercurial post-commit hook to generate",
85                                         safe => 0, # file
86                                         rebuild => 0,
87                                 },
88                                 mercurial_wrappermode => {
89                                         type => "string",
90                                         example => '06755',
91                                         description => "mode for mercurial_wrapper (can safely be made suid)",
92                                         safe => 0,
93                                         rebuild => 0,
94                                 },
95                                 mercurial_wrapper_hgrc_update => {
96                                         type => "string",
97                                         example => "1",
98                                         description => "updates existing hgrc to reflect path changes for mercurial_wrapper",
99                                         safe => 0,
100                                         rebuild => 0,
101                                 },
102                                 historyurl => {
103                                         type => "string",
104                                         example => "http://example.com:8000/log/tip/\[[file]]",
105                                         description => "url to hg serve'd repository, to show file history (\[[file]] substituted)",
106                                         safe => 1,
107                                         rebuild => 1,
108                                 },
109                                 diffurl => {
110                                         type => "string",
111                                         example => "http://localhost:8000/?fd=\[[r2]];file=\[[file]]",
112                                         description => "url to hg serve'd repository, to show diff (\[[file]] and \[[r2]] substituted)",
113                                         safe => 1,
114                                         rebuild => 1,
115                                 },
116                 }
117
118                 sub safe_hg (&@) {
119                         # Start a child process safely without resorting to /bin/sh.
120                         # Returns command output (in list content) or success state
121                         # (in scalar context), or runs the specified data handler.
122
123                         my ($error_handler, $data_handler, @cmdline) = @_;
124
125                         my $pid = open my $OUT, "-|";
126
127                         error("Cannot fork: $!") if !defined $pid;
128
129                         if (!$pid) {
130                                 # In child.
131                                 # hg commands want to be in wc.
132
133 This `$hg_dir` logic means nothing and could be stripped until srcdir/repdir distinction is made (it's stripped in upstream `mercurial.pm` right now).
134
135                                 if (! defined $hg_dir) {
136                                         chdir $config{srcdir}
137                                             or error("cannot chdir to $config{srcdir}: $!");
138                                 }
139                                 else {
140                                         chdir $hg_dir or error("cannot chdir to $hg_dir: $!");
141                                 }
142
143                                 exec @cmdline or error("Cannot exec '@cmdline': $!");
144                         }
145                         # In parent.
146
147                         my @lines;
148                         while (<$OUT>) {
149                                 chomp;
150
151                                 if (! defined $data_handler) {
152                                         push @lines, $_;
153                                 }
154                                 else {
155                                         last unless $data_handler->($_);
156                                 }
157                         }
158
159                         close $OUT;
160
161                         $error_handler->("'@cmdline' failed: $!") if $? && $error_handler;
162
163                         return wantarray ? @lines : ($? == 0);
164                 }
165                 # Convenient wrappers.
166                 sub run_or_die ($@) { safe_hg(\&error, undef, @_) }
167                 sub run_or_cry ($@) { safe_hg(sub { warn @_ }, undef, @_) }
168                 sub run_or_non ($@) { safe_hg(undef, undef, @_) }
169
170
171 To handle uncommited local changes ("ULC"s for short), I use logic similar to the (non-standard) "shelve" extension to Mercurial. By taking a diff before resetting to last commit, making changes and then applying diff again, one can do things Mercurial otherwise refuses, which is necessary later.
172
173 This function creates this diff.
174
175                 sub hg_local_dirstate_shelve ($) {
176                         # Creates a diff snapshot of uncommited changes existing the srcdir.
177                         # Takes a string (preferably revision) as input to create a unique and
178                         # identifiable diff name.
179                         my $tempdiffname = "diff_".shift;
180                         my $tempdiffpath;
181                         if (my @tempdiff = run_or_die('hg', 'diff', '-g')) {
182                                 $"="\n";
183                                 writefile($tempdiffname, $config{srcdir},
184                                                 "@tempdiff");
185                                 $"=" ";
186                                 $tempdiffpath = $config{srcdir}.'/'.$tempdiffname;
187                         }
188                         return $tempdiffpath;
189                 }
190
191 This function restores the diff.
192
193                 sub hg_local_dirstate_unshelve ($) {
194                         # Applies diff snapshot to revert back to initial dir state. If diff
195                         # revert succeeds, the diff is removed. Otherwise it stays to not
196                         # eradicate the local changes if they were important. This clutters the
197                         # directory though. Better ways to handle this are welcome. A true way
198                         # around this dance is to have a separate repository for local changes
199                         # and push ready commits to the srcdir instead.
200                         if (my $tempdiffpath = shift) {
201                                 if (run_or_cry('hg', 'import', '--no-commit', $tempdiffpath)) {
202                                         unlink($tempdiffpath);
203                                         return undef;
204                                 }
205                         }
206                 }
207
208 This makes online diffing possible. A similar approach as in `git.pm`, which is [discussed to some length in a comment there](http://source.ikiwiki.branchable.com/?p=source.git;a=blob;f=IkiWiki/Plugin/git.pm;h=cf7fbe9b7c43ee53180612d0411e6202074fb9e0;hb=refs/heads/master#l211), is taken.
209
210                 sub merge_past ($$$) {
211                         my ($sha1, $file, $message) = @_;
212
213                         # Undo stack for cleanup in case of an error
214                         my @undo;
215                         # File content with conflict markers
216                         my $conflict;  
217                         my $tempdiffpath; 
218
219                         eval {
220                                 # Hide local changes from Mercurial by renaming the modified
221                                 # file.  Relative paths must be converted to absolute for
222                                 # renaming.
223                                 my ($target, $hidden) = (
224                                         "$config{srcdir}/${file}",
225                                         "$config{srcdir}/${file}.${sha1}"
226                                 );
227                                 rename($target, $hidden)
228                                         or error("rename '$target' to '$hidden' failed: $!");
229                                 # Ensure to restore the renamed file on error.
230                                 push @undo, sub {
231                                         return if ! -e "$hidden"; # already renamed
232                                         rename($hidden, $target)
233                                             or warn "rename '$hidden' to '$target' failed: $!";
234                                 };
235
236
237 Take a snapshot of srcdir to be able to restore uncommited local changes ("ULCs") afterwards.
238
239 * This must happen _after_ the merging commit in Mercurial, there is no way around it. By design hg refuses to commit merges if there are other changes to tracked content present, no matter how much  you beg.
240
241 * ULCs to the file being edited are special: they can't be diffed here since `editpage.pm` already has overwritten the file. When the web edit session started though, the ULC version (not the commited
242 version) was read into the form, so in a way, the web user _has already merged_ with the ULC. It is not saved in commit history, but that is the exact consequence of "uncommited" changes. If an ULC is done between the time the web edit started and was submitted, then it is lost, though.  All in all, one shouldn't be editing the srcdir directly when web edits of the same file are allowed. Clone the repo and push changes instead.
243
244 Much of these issues disappear, I believe, if one works with a master repo which only is pushed to.
245
246                                 my $tempdiffpath = hg_local_dirstate_shelve($sha1);
247
248                                 # Ensure uniqueness of bookmarks.
249                                 my $bookmark_upstream_head = "current_head_$sha1";
250                                 my $bookmark_edit_base = "edit_base_$sha1";
251
252                                 # Git and Mercurial differ in the branch concept. Mercurial's
253                                 # "bookmarks" are closer in function in this regard.
254
255 Bookmarks aren't standard until Mercurial 1.8 ([2011--02--10](http://selenic.com/hg/rev/d4ab9486e514)), but they've been bundled with Mercurial since ~2008, so they can be enabled by writing a `hgrc`, which is also being worked on.
256
257                                 # Create a bookmark at current tip.
258                                 push @undo, sub { run_or_cry('hg', 'bookmark', '--delete',
259                                                 $bookmark_upstream_head) };
260                                 run_or_die('hg', 'bookmark', $bookmark_upstream_head);
261
262                                 # Create a bookmark at the revision from which the edit was
263                                 # started and switch to it, discarding changes (they are stored
264                                 # in $tempdiff and the hidden file at the moment).
265                                 push @undo, sub { run_or_cry('hg', 'bookmark', '--delete',
266                                                 $bookmark_edit_base) };
267                                 run_or_die('hg', 'bookmark', '-r', $sha1, $bookmark_edit_base);
268                                 run_or_die('hg', 'update', ,'-C', $bookmark_edit_base);
269
270                                 # Reveal the modified file.
271                                 rename($hidden, $target)
272                                     or error("rename '$hidden' to '$target' failed: $!");
273
274                                 # Commit at the bookmarked revision, creating a new head.
275                                 run_or_cry('hg', 'commit', '-m', $message);
276
277                                 # Attempt to merge the newly created head with upstream head.
278                                 # '--tool internal:merge' to avoid spawning a GUI merger.
279
280 (*Semi-TODO:* How do you make this command quiet? On failed merge, it
281 always writes to STDERR and clutters the web server log.)
282
283                                 if (!run_or_non('hg', 'merge', '--tool', 'internal:merge',
284                                                 $bookmark_upstream_head)) {
285                                         # ..., otherwise return file with conflict markers.
286                                         $conflict = readfile($target);
287
288                                         # The hardcore reset approach. Keep your hands inside
289                                         # the cart.
290                                         run_or_die('hg', 'rollback');
291                                         run_or_die('hg', 'update', '-C',
292                                                 $bookmark_upstream_head);
293                                         if ($tempdiffpath) {
294                                                 hg_local_dirstate_unshelve($tempdiffpath);
295                                         }
296
297 Other approaches tried here:
298
299 1. Clean up merge attempt,
300
301         run_or_die('hg', 'update', '-C', $bookmark_upstream_head);
302
303 2. Redo "merge", using only upstream head versions,
304
305         run_or_die('hg', 'merge', '--tool', 'internal:local', $bookmark_edit_base);
306
307 3. dummy commit to close head.
308
309         run_or_non('hg', 'commit', '-m', $message);
310
311 This creates a cluttered and erroneous history. We
312 tell Mercurial to merge, even though we in practice
313 discard. This creates problems when trying to revert
314 changes.
315
316 Other attempt:
317
318 1. Discard merge attempt and switch to temp head,
319
320         run_or_die('hg', 'update', '-C', $bookmark_edit_base);
321
322 2. close the temp head (why do they call the command that in practice closes heads "--close-branch"?),
323
324         run_or_non('hg', 'commit', '--close-branch', '-m', $message);
325
326 3. restore working directory to pre-fiddling status.
327
328         run_or_die('hg', 'update', $bookmark_upstream_head);
329
330 ...but this requires the same amount of forks as the
331 above method, and confuses other parts of ikiwiki
332 since the upstream head is now the third newest
333 revision. Maybe that particular problem is solvable
334 by setting a global default bookmark that follows the
335 main tip.  It will leave clutter in the revision
336 history, though. Two extra commits that in practice
337 don't hold relevant information will be recorded for
338 each failed merge attempt.
339
340 To only create one extra commit, one could imagine
341 adding `--close-branch` to the commit that initially
342 created the new head (since there is no problem
343 merging with closed heads), but it's not possible to
344 close and create a head at the same time, apparently.
345
346                                 }
347                         };
348                         my $failure = $@;
349
350                         # Process undo stack (in reverse order). By policy, cleanup actions
351                         # should normally print a warning on failure.
352                         while (my $handle = pop @undo) {
353                                 $handle->();
354                         }
355
356                         error("Mercurial merge failed!\n$failure\n") if $failure;
357
358                         return ($conflict, $tempdiffpath);
359                 }
360
361                 sub hg_commit_info ($;$;$) {
362                         # Return an array of commit info hashes of num commits starting from
363                         # the given sha1sum.
364                         #
365 This could be optimized by using a lookup cache similar to
366 `findtimes()`. By adding `KeyAttr => ['node']` to `XMLin()` options, one
367 could use the revision ID as key and do a single massive history
368 lookup and later just check if the given revision already exists as a
369 key.  Right now I'm at the "don't optimize it yet" stage, though.
370
371 This uses Mercurial's built-in `--style xml` and parses it with `XML::Simple`. Mercurial's log output is otherwise somewhat cumbersome to get good stuff out of, so this XML solution is quite good, I think. It adds module dependency, but XML::Simple seems fairly standard (but what do I know, I've used 1 Perl installation in my life).
372
373                         use XML::Simple;
374                         use Date::Parse;
375
376                         my ($sha1, $num, $file) = @_;
377
378                         my @opts;
379                         if (defined $sha1) {
380                                 if ($sha1 =~ m/^($sha1_pattern)$/) {
381                                         push @opts, ('-r'. $1.':0');
382                                 }
383                                 elsif ($sha1 =~ m/^($sha1_pattern):($sha1_pattern)$/) {
384                                         push @opts, ('-r', $1.':'.$2);
385                                 }
386                         }
387                         push @opts, ('--limit', $num) if defined $num;
388                         push @opts, ('--', $file) if defined $file;
389
390                         my %env=%ENV;
391                         $ENV{HGENCODING} = 'utf-8';
392                         my @xml = run_or_cry('hg', 'log', '-v', '--style', 'xml', @opts);
393                         %ENV=%env;
394
395                         # hg returns empty string if file is not in repository.
396                         return undef if !@xml;
397
398 Some places it is clear that I'm coding ad-hoc Perl. I don't know if this is a reasonably efficient way to give input to `XMLin`, but it works.
399
400                         # Want to preserve linebreaks in multiline comments.
401                         $"="\n";
402                         my $xmllog = XMLin("@xml",
403                                 ForceArray => ['logentry', 'parent', 'copy', 'path']);
404                         $"=" ";
405
406                         my @c_infos;
407                         foreach my $rev (@{$xmllog->{logentry}}) {
408                                 my %c_info;
409                                 # In Mercurial, "rev" is technically the strictly local
410                                 # revision number.  What ikiwiki wants is what is called
411                                 # "node": a globally defined SHA1 checksum.
412                                 $c_info{rev} = $rev->{node};
413                                 foreach my $parent (@{$rev->{parent}}) {
414                                         push @{$c_info{parents}}, {rev => $parent->{node}};
415                                 }
416                                 $c_info{user} = $rev->{author}{content};
417                                 # Mercurial itself parses out and stores an email address if
418                                 # present in author name. If not, hg sets email to author name.
419                                 if ( $rev->{author}{content} ne $rev->{author}{email} &&
420                                         $rev->{author}{email} =~ m/^([^\@]+)\@(.*)$/ ) {
421                                         if ($2 eq "web") {
422                                                 $c_info{nickname} = $1;
423                                                 $c_info{web_commit} = "1";
424                                         }
425                                 }
426                                 # Mercurial gives date in ISO 8601, well handled by str2time().
427                                 $c_info{when} = str2time($rev->{date});
428                                 # Mercurial doesn't allow empty commit messages, so there
429                                 # should always be a single defined message.
430                                 $c_info{message} = $rev->{msg}{content};
431                                 # Inside "paths" sits a single array "path" that contains
432                                 # multiple paths. Crystal clear :-)
433                                 foreach my $path (@{$rev->{paths}{path}}) {
434                                         push @{$c_info{files}}, {
435                                                 # Mercurial doesn't track file permissions as
436                                                 # Git do, so that's missing here.
437                                                 'file' => $path->{content},
438                                                 'status' => $path->{action},
439                                         };
440                                 }
441                                 # There also exists an XML branch "copies"->"copy", containing
442                                 # source and dest of files that have been copied with "hg cp".
443                                 # The copy action is also registered in "paths" as a removal of
444                                 # source and addition of dest, so it's not needed here.
445                                 push @c_infos, {%c_info};
446                                 use Data::Dumper;
447                         }
448
449                         return wantarray ? @c_infos : $c_infos[0];
450                 }
451
452                 sub hg_sha1 (;$) {
453                         # Return head sha1sum (of given file).
454                         my $file = shift || q{--};
455
456                         # Non-existing file doesn't give error, just empty string.
457                         my $f_info = hg_commit_info(undef, 1, $file);
458                         my $sha1;
459                         if ($f_info->{rev}) {
460                                 ($sha1) = $f_info->{rev} =~ m/($sha1_pattern)/;
461                         }
462                         else {
463                                 debug("Empty sha1sum for '$file'.");
464                         }
465                         return defined $sha1 ? $sha1 : q{};
466                 }
467
468                 sub rcs_update () {
469                         run_or_cry('hg', '-q', 'update');
470                 }
471
472                 sub rcs_prepedit ($) {
473                         # Return the commit sha1sum of the file when editing begins.
474                         # This will be later used in rcs_commit if a merge is required.
475                         my ($file) = @_;
476
477                         return hg_sha1($file);
478                 }
479
480                 sub rcs_commit (@) {
481                         # Try to commit the page; returns undef on _success_ and
482                         # a version of the page with the rcs's conflict markers on
483                         # failure.
484                         my %params=@_;
485
486                         # Check to see if the page has been changed by someone else since
487                         # rcs_prepedit was called.
488                         my $cur    = hg_sha1($params{file});
489                         my ($prev) = $params{token} =~ /^($sha1_pattern)$/; # untaint
490
491                         if (defined $cur && defined $prev && $cur ne $prev) {
492
493 If there was a conflict, the file with conflict markers is returned. Else, the path to the tempdiff, which is to be run to restore previous local state after `rcs_commit_staged`, is returned.
494
495                                 my ($conflict, $tempdiffpath) =
496                                         merge_past($prev, $params{file}, $dummy_commit_msg);
497                                 return defined $conflict
498                                         ? $conflict
499                                         : rcs_commit_helper(
500                                                 @_,
501                                                 merge => 1,
502                                                 tempdiffpath => $tempdiffpath);
503                         }
504
505                         return rcs_commit_helper(@_);
506                 }
507
508                 sub rcs_commit_helper (@) {
509                         my %params=@_;
510
511                         my %env=%ENV;
512                         $ENV{HGENCODING} = 'utf-8';
513
514                         my $user="Anonymous";
515                         my $nickname;
516                         if (defined $params{session}) {
517                                 if (defined $params{session}->param("name")) {
518                                         $user = $params{session}->param("name");
519                                 }
520                                 elsif (defined $params{session}->remote_addr()) {
521                                         $user = $params{session}->remote_addr();
522                                 }
523
524                                 if (defined $params{session}->param("nickname")) {
525                                         $nickname=encode_utf8($params{session}->param("nickname"));
526                                         $nickname=~s/\s+/_/g;
527                                         $nickname=~s/[^-_0-9[:alnum:]]+//g;
528                                 }
529                                 $ENV{HGUSER} = encode_utf8($user . ' <' . $nickname . '@web>');
530                         }
531
532                         if (! length $params{message}) {
533                                 $params{message} = "no message given";
534                         }
535
536                         $params{message} = IkiWiki::possibly_foolish_untaint($params{message});
537
538                         my @opts;
539
540 Mercurial rejects file arguments when performing a merging commit. It
541 only does "all or nothing" commits by design when merging, so given file arguments must be discarded. It should not pose a problem.
542
543                         if (exists $params{file} && ! defined $params{merge}) {
544                                 push @opts, '--', $params{file};
545                         }
546
547                         # hg commit returns non-zero if nothing really changed.
548                         # So we should ignore its exit status (hence run_or_non).
549                         run_or_non('hg', 'commit', '-m', $params{message}, '-q', @opts);
550
551 If there were uncommited local changes in srcdir before a merge was done, they are restored here.
552
553                         if (defined $params{tempdiffpath}) {
554                                 hg_local_dirstate_unshelve($params{tempdiffpath});
555                         }
556
557                         %ENV=%env;
558                         return undef; # success
559                 }
560
561                 sub rcs_commit_staged (@) {
562                         # Commits all staged changes. Changes can be staged using rcs_add,
563                         # rcs_remove, and rcs_rename.
564                         return rcs_commit_helper(@_);
565                 }
566
567                 sub rcs_add ($) {
568                         my ($file) = @_;
569
570                         run_or_cry('hg', 'add', $file);
571                 }
572
573                 sub rcs_remove ($) {
574                         # Remove file from archive.
575                         my ($file) = @_;
576
577                         run_or_cry('hg', 'remove', '-f', $file);
578                 }
579
580                 sub rcs_rename ($$) {
581                         my ($src, $dest) = @_;
582
583                         run_or_cry('hg', 'rename', '-f', $src, $dest);
584                 }
585
586                 sub rcs_recentchanges ($) {
587                         my ($num) = @_;
588
589                         my @c_infos;
590
591                         foreach my $c_info (hg_commit_info(undef, $num, undef)) {
592                                 my @pagenames;
593                                 for my $page (@{$c_info->{files}}) {
594                                         my $diffurl=defined $config{diffurl} ?
595                                                 $config{diffurl} : '';
596                                         # These substitutions enable defining keywords \[[file]]
597                                         # and \[[r2]] (backward compatibility) in the setup file
598                                         # that will be exchanged with filename and revision
599                                         # respectively.
600                                         $diffurl =~ s/\[\[file\]\]/$page->{file}/go;
601                                         $diffurl =~ s/\[\[r2\]\]/$c_info->{rev}/go;
602                                         push @pagenames, {
603                                                 # pagename() strips suffixes and returns the
604                                                 # path to the file as it is to be represented
605                                                 # in the build dir.
606                                                 page => pagename($page->{file}),
607                                                 diffurl => $diffurl,
608                                         };
609                                 }
610
611                                 # It is expected of ikiwiki to get each comment line as a
612                                 # separate entry.
613                                 my @messagelines;
614                                 open my $message, '<', \$c_info->{message};
615                                 while (<$message>) { push @messagelines, { line => $_ } };
616
617                                 push @c_infos, {
618                                         rev        => $c_info->{rev},
619                                         user       => $c_info->{user},
620                                         nickname   => defined $c_info->{nickname} ?
621                                                         $c_info->{nickname} : $c_info->{user},
622                                         committype => $c_info->{web_commit} ? "web" : "hg",
623                                         when       => $c_info->{when},
624                                         message    => [@messagelines],
625                                         pages      => [@pagenames],
626                                 } if @pagenames;
627                         }
628
629                         return @c_infos;
630                 }
631
632                 sub rcs_diff ($;$) {
633                         my $rev=shift;
634                         my $maxlines=shift;
635                         my @lines;
636                         my $addlines=sub {
637                                 my $line=shift;
638                                 return if defined $maxlines && @lines == $maxlines;
639                                 push @lines, $line."\n"
640                                         if (@lines || $line=~/^diff --git/);
641                                 return 1;
642                         };
643                         safe_hg(undef, $addlines, "hg", "diff", "-c", $rev, "-g");
644                         if (wantarray) {
645                                 return @lines;
646                         }
647                         else {
648                                 return join("", @lines);
649                         }
650                 }
651
652                 {
653                 my %time_cache;
654
655 This is an upstream change I did a week ago or so. Perhaps it can be merged in some clever way with the updated `hg_commit_info` to make one shared lookup cache. Don't know how much would be gained.
656
657                 sub findtimes ($$) {
658                         my $file=shift;
659                         my $id=shift; # 0 = mtime ; 1 = ctime
660
661                         if (! keys %time_cache) {
662                                 my $date;
663
664                                 # It doesn't seem possible to specify the format wanted for the
665                                 # changelog (same format as is generated in git.pm:findtimes(),
666                                 # though the date differs slightly) without using a style
667                                 # _file_. There is a "hg log" switch "--template" to directly
668                                 # control simple output formatting, but in this case, the
669                                 # {file} directive must be redefined, which can only be done
670                                 # with "--style".
671                                 #
672                                 # If {file} is not redefined, all files are output on a single
673                                 # line separated with a space. It is not possible to conclude
674                                 # if the space is part of a filename or just a separator, and
675                                 # thus impossible to use in this case.
676                                 # 
677                                 # Some output filters are available in hg, but they are not fit
678                                 # for this cause (and would slow down the process
679                                 # unnecessarily).
680                                 
681                                 eval q{use File::Temp};
682                                 error $@ if $@;
683                                 my ($tmpl_fh, $tmpl_filename) = File::Temp::tempfile(UNLINK => 1);
684                                 
685                                 print $tmpl_fh 'changeset = "{date}\\n{files}\\n"' . "\n";
686                                 print $tmpl_fh 'file = "{file}\\n"' . "\n";
687                                 
688                                 foreach my $line (run_or_die('hg', 'log', '--style', $tmpl_filename)) {
689                                         # {date} gives output on the form
690                                         # 1310694511.0-7200
691                                         # where the first number is UTC Unix timestamp with one
692                                         # decimal (decimal always 0, at least on my system)
693                                         # followed by local timezone offset from UTC in
694                                         # seconds.
695                                         if (! defined $date && $line =~ /^\d+\.\d[+-]\d*$/) {
696                                                 $line =~ s/^(\d+).*/$1/;
697                                                 $date=$line;
698                                         }
699                                         elsif (! length $line) {
700                                                 $date=undef;
701                                         }
702                                         else {
703                                                 my $f=$line;
704
705                                                 if (! $time_cache{$f}) {
706                                                         $time_cache{$f}[0]=$date; # mtime
707                                                 }
708                                                 $time_cache{$f}[1]=$date; # ctime
709                                         }
710                                 }
711                         }
712
713                         return exists $time_cache{$file} ? $time_cache{$file}[$id] : 0;
714                 }
715
716                 }
717
718                 sub rcs_getctime ($) {
719                         my $file = shift;
720
721                         return findtimes($file, 1);
722                 }
723
724                 sub rcs_getmtime ($) {
725                         my $file = shift;
726
727                         return findtimes($file, 0);
728                 }
729
730 The comment just below the function declaration below is taken from `git.pm`. Is it true? Should ikiwiki support sharing its repo with other things? Mercurial-wise that sounds like a world of pain.
731
732                 {
733                 my $ret;
734                 sub hg_find_root {
735                         # The wiki may not be the only thing in the git repo.
736                         # Determine if it is in a subdirectory by examining the srcdir,
737                         # and its parents, looking for the .git directory.
738
739                         return @$ret if defined $ret;
740
741                         my $subdir="";
742                         my $dir=$config{srcdir};
743                         while (! -d "$dir/.hg") {
744                                 $subdir=IkiWiki::basename($dir)."/".$subdir;
745                                 $dir=IkiWiki::dirname($dir);
746                                 if (! length $dir) {
747                                         error("cannot determine root of hg repo");
748                                 }
749                         }
750
751                         $ret=[$subdir, $dir];
752                         return @$ret;
753                 }
754
755                 }
756
757                 sub hg_parse_changes (@) {
758                         # Only takes a single info hash as argument in rcs_preprevert, but
759                         # should be able to take several in rcs_receive.
760                         my @c_infos_raw = shift;
761
762                         my ($subdir, $rootdir) = hg_find_root();
763                         my @c_infos_ret;
764
765                         foreach my $c_info_raw (@c_infos_raw) {
766                                 foreach my $path (@{$c_info_raw->{files}}) {
767                                         my ($file, $action, $temppath);
768
769                                         $file=$path->{file};
770
771                                         # check that all changed files are in the subdir
772                                         if (length $subdir && ! ($file =~ s/^$subdir//)) {
773                                                 error sprintf(gettext("you are not allowed to change %s"), $file);
774                                         }
775
776                                         if    ($path->{status} eq "M") { $action="change" }
777                                         elsif ($path->{status} eq "A") { $action="add" }
778                                         elsif ($path->{status} eq "R") { $action="remove" }
779                                         else  { error "unknown status ".$path->{status} }
780
781 I haven't tested the attachment code below. Is it run when there is an non-trusted file upload?
782
783                                         # extract attachment to temp file
784                                         if (($action eq 'add' || $action eq 'change') &&
785                                                 ! pagetype($file)) {
786
787                                                 eval q{use File::Temp};
788                                                 die $@ if $@;
789
790                                                 my $fh;
791                                                 ($fh, $temppath)=File::Temp::tempfile(undef, UNLINK => 1);
792                                                 my $cmd = "cd $hg_dir && ".
793                                                         "hg diff -g -c $c_info_raw->{rev} > '$temppath'";
794                                                 if (system($cmd) != 0) {
795                                                         error("failed writing temp file '$temppath'.");
796                                                 }
797                                         }
798
799                                         push @c_infos_ret, {
800                                                 file => $file,
801                                                 action => $action,
802                                                 path => $temppath,
803                                         };
804                                 }
805                         }
806
807                         return @c_infos_ret;
808                 }
809
810 *TODO:* I don't know what's happening here. I've changed the code to adhere to this file's variables and functions, but it refers to a srcdir _and_ a default repo, which currently isn't available in the Mercurial setup.
811
812 `rcs_receive` is optional and only runs when running a pre-receive hook. Where `$_` comes from and its format are mysteries to me.
813
814 Also, a comment in `git.pm` mentions that we don't want to chdir to a subdir "and only see changes in it" - but this isn't true for either Git or Mercurial to my knowledge. It only seems to happen in `git.pm` since the `git log` command in `git_commit_info` ends with "`-- .`" - if it didn't do that, one wouldn't have to chdir for this reason, I believe.
815
816 In this case we need to stay in default repo instead of srcdir though, so `hg_dir="."` _is_ needed, but not for the abovementioned reason :-) (maybe there's more to it, though).
817
818                 sub rcs_receive () {
819                         my @c_infos_ret;
820                         while (<>) {
821                                 chomp;
822                                 my ($oldrev, $newrev, $refname) = split(' ', $_, 3);
823
824                                 # only allow changes to hg_default_branch
825
826 *TODO:* What happens here? Some Git voodoo. _If_ `$_` has the exact same format for Mercurial, then the below should work just as well here, I think.
827
828                                 if ($refname !~ m|^refs/heads/$config{hg_default_branch}$|) {
829                                         error sprintf(gettext("you are not allowed to change %s"), $refname);
830                                 }
831
832 Comment from `git.pm`:
833
834                                 # Avoid chdir when running git here, because the changes are in
835                                 # the default git repo, not the srcdir repo.  (Also, if a subdir
836                                 # is involved, we don't want to chdir to it and only see
837                                 # changes in it.) The pre-receive hook already puts us in the
838                                 # right place.
839                                 $hg_dir=".";
840                                 push @c_infos_ret,
841                                         hg_parse_changes(hg_commit_info($newrev.":".$oldrev,
842                                                         undef, undef));
843                                 $hg_dir=undef;
844                         }
845
846                         return @c_infos_ret;
847                 }
848
849                 sub rcs_preprevert ($) {
850                         my $rev=shift;
851                         my ($sha1) = $rev =~ /^($sha1_pattern)$/; # untaint
852
853 The below 4 lines of code are from `git.pm`, but I can't see what they actually do there. Neither Git nor Mercurial only lists changes in working directory when given a command - they always traverse to repository root by themselves. I keep it here for comments, in case I'm missing something.
854
855 *UPDATE:* See earlier note about `git log` ending in "`-- .`".
856
857                         ## Examine changes from root of git repo, not from any subdir,
858                         ## in order to see all changes.
859                         #my ($subdir, $rootdir) = git_find_root();
860                         #$git_dir=$rootdir;
861
862                         my $c_info=hg_commit_info($sha1, 1, undef) or error "unknown commit";
863
864                         # hg revert will fail on merge commits. Add a nice message.
865                         if (exists $c_info->{parents} && $c_info->{parents} > 1) {
866                                 error gettext("you are not allowed to revert a merge");
867                         }
868
869                         my @c_info_ret=hg_parse_changes($c_info);
870
871                         ### Probably not needed, if earlier comment is correct.
872                         #$hg_dir=undef;
873                         return @c_info_ret;
874                 }
875
876                 sub rcs_revert ($) {
877                         # Try to revert the given rev; returns undef on _success_.
878                         my $rev = shift;
879                         my ($sha1) = $rev =~ /^($sha1_pattern)$/; # untaint
880
881                         # Save uncommited local changes to diff file. Attempt to restore later.
882                         my $tempdiffpath = hg_local_dirstate_shelve($sha1);
883
884                         # Clean dir to latest commit.
885                         run_or_die('hg', 'update', '-C');
886
887 Some voodoo is needed here. `hg backout --tool internal:local -r $sha1` is *almost* good, but if the reversion is done to the directly previous revision, hg automatically commits, which is bad in this case. Instead I generate a reverse diff and pipe it to `import --no-commit`.
888
889                         if (run_or_non("hg diff -c $sha1 --reverse | hg import --no-commit -")) {
890                                 if ($tempdiffpath) { hg_local_dirstate_unshelve($tempdiffpath) }
891                                 return undef;
892                         }
893                         else {
894                                 if ($tempdiffpath) { hg_local_dirstate_unshelve($tempdiffpath) }
895                                 return sprintf(gettext("Failed to revert commit %s"), $sha1);
896                         }
897                 }
898
899 Below follows code regarding [[Auto-setup and maintain Mercurial wrapper hooks]]. Will try to solve it in another place later, but the code in itself is working.
900
901 Should perhaps add initiation of the bookmark extension here, to support older Mercurial versions.
902
903                 sub rcs_wrapper_postcall($) {
904                         # Update hgrc if it exists. Change post-commit/incoming hooks with the
905                         # .ikiwiki suffix to point to the wrapper path given in the setup file.
906                         # Work with a tempfile to not delete hgrc if the loop is interrupted
907                         # midway.
908                         # I believe there is a better way to solve this than creating new hooks
909                         # and callbacks. Will await discussion on ikiwiki.info.
910                         my $hgrc=$config{srcdir}.'/.hg/hgrc';
911                         my $backup_suffix='.ikiwiki.bak';
912                         if (-e $hgrc) {
913                                 use File::Spec;
914                                 my $mercurial_wrapper_abspath=File::Spec->rel2abs($config{mercurial_wrapper}, $config{srcdir});
915                                 local ($^I, @ARGV)=($backup_suffix, $hgrc);
916                                 while (<>) {
917                                         s/^(post-commit|incoming)(\.ikiwiki[ \t]*=[ \t]*).*$/$1$2$mercurial_wrapper_abspath/;
918                                         print;
919                                 }
920                                 unlink($hgrc.$backup_suffix);
921                         }
922                 }
923
924                 1