yes please
[ikiwiki] / doc / todo / Mercurial_backend_update.mdwn
1 I submitted some changes that added 5 "Yes"es and 2 "Fast"s to Mercurial at [[/rcs]], but some functionality is still missing as compared to e.g. `git.pm`, with which it should be able to be equivalent.
2
3 To do this, a more basic rewrite would simplify things. I inline the complete file below with comments. I don't expect anyone to take the time to read it all at once, but I'd be glad if those interested in the Mercurial backend could do some beta testing.
4
5 * [This specific revision at my hg repo](http://46.239.104.5:81/hg/program/ikiwiki/file/4994ba5e36fa/Plugin/mercurial.pm) ([raw version](http://46.239.104.5:81/hg/program/ikiwiki/raw-file/4994ba5e36fa/Plugin/mercurial.pm)).
6
7 * [My default branch](http://510x.se/hg/program/ikiwiki/file/default/Plugin/mercurial.pm) (where updates will be made, will mention here if anything happens) ([raw version](http://510x.se/hg/program/ikiwiki/raw-file/default/Plugin/mercurial.pm)).
8
9 (I've stripped the `hgrc`-generation from the linked versions, so it should work to just drop them on top of the old `mercurial.pm`).
10
11 I break out my comments from the code to make them more readable. I comment all the changes as compared to current upstream. --[[Daniel Andersson]]
12
13 > So, sorry it took me so long (summer vacation), but I've finally
14 > gotten around to looking at this. Based mostly just on the comments,
15 > it does not seem mergeable as-is, yet. Red flags for me include:
16
17 > * This is a big rewrite, and the main idea seems to be to copy git.pm
18 >   and hack on it until it works, which I think is unlikely to be ideal
19 >   as git and mercurial are not really similar at the level used here.
20 > * There have been no changes in your hg repo to the code since you
21 >   originally committed it. Either it's perfect, or it's not been tested..
22 > * `hg_local_dirstate_shelve` writes to a temp file in the srcdir,
23 >   which is hardly clean or ideal.
24 > * Relies on mercurial bookmarks extension that seems to need to be
25 >   turned on (how?)
26 > * There are some places where code was taken from git.pm and the
27 >   comment asks if it even makes sense for mercurial, which obviously
28 >   would need to be cleaned up.
29 > * The `rcs_receive` support especially is very ambitious to try to add to
30 >   the mercurial code. Does mercurial support anonymous pushes at all? How
31 >   would ikiwiki be run to handle such a push? How would it tell 
32 >   mercurial not to accept a push if it made prohibited changes?
33 >
34 > I'm glad we already got so many standalone improvements into
35 > mercurial.pm. That's a better approach than rewriting the world, unless
36 > the world is badly broken. 
37
38 > --[[Joey]] 
39
40 ---
41
42                 #!/usr/bin/perl
43                 package IkiWiki::Plugin::mercurial;
44
45                 use warnings;
46                 use strict;
47                 use IkiWiki;
48                 use Encode;
49                 use open qw{:utf8 :std};
50
51
52 Pattern to validate hg sha1 sums. hg usually truncates the hash to 12
53 characters and prepends a local revision number for output, but internally
54 it keeps a 40 character hash. Will use the long version in this code.
55
56                 my $sha1_pattern = qr/[0-9a-fA-F]{40}/;
57
58 Message to skip in recent changes
59
60                 my $dummy_commit_msg = 'dummy commit';
61
62 *TODO:* `$hg_dir` not really implemented yet, until a srcdir/repository distinction is
63 made as for e.g. Git. Used in `rcs_receive`, and for attachments in `hg_parse_changes`. See comments in those places, though.
64
65                 my $hg_dir=undef;
66
67                 sub import {
68                         hook(type => "checkconfig", id => "mercurial", call => \&checkconfig);
69                         hook(type => "getsetup", id => "mercurial", call => \&getsetup);
70                         hook(type => "rcs", id => "rcs_update", call => \&rcs_update);
71                         hook(type => "rcs", id => "rcs_prepedit", call => \&rcs_prepedit);
72                         hook(type => "rcs", id => "rcs_commit", call => \&rcs_commit);
73                         hook(type => "rcs", id => "rcs_commit_staged", call => \&rcs_commit_staged);
74                         hook(type => "rcs", id => "rcs_add", call => \&rcs_add);
75                         hook(type => "rcs", id => "rcs_remove", call => \&rcs_remove);
76                         hook(type => "rcs", id => "rcs_rename", call => \&rcs_rename);
77                         hook(type => "rcs", id => "rcs_recentchanges", call => \&rcs_recentchanges);
78                         hook(type => "rcs", id => "rcs_diff", call => \&rcs_diff);
79                         hook(type => "rcs", id => "rcs_getctime", call => \&rcs_getctime);
80                         hook(type => "rcs", id => "rcs_getmtime", call => \&rcs_getmtime);
81                         hook(type => "rcs", id => "rcs_preprevert", call => \&rcs_preprevert);
82                         hook(type => "rcs", id => "rcs_revert", call => \&rcs_revert);
83
84 This last hook is "unsanctioned" from [[Auto-setup and maintain Mercurial wrapper hooks]]. Will try to solve its function
85 another way later.
86
87                         hook(type => "rcs", id => "rcs_wrapper_postcall", call => \&rcs_wrapper_postcall);
88                 }
89
90                 sub checkconfig () {
91                         if (exists $config{mercurial_wrapper} && length $config{mercurial_wrapper}) {
92                                 push @{$config{wrappers}}, {
93                                         wrapper => $config{mercurial_wrapper},
94                                         wrappermode => (defined $config{mercurial_wrappermode} ? $config{mercurial_wrappermode} : "06755"),
95
96 Next line part of [[Auto-setup and maintain Mercurial wrapper hooks]].
97
98                                         wrapper_postcall => (defined $config{mercurial_wrapper_hgrc_update} ? $config{mercurial_wrapper_hgrc_update} : "1"),
99                                 };
100                         }
101                 }
102
103                 sub getsetup () {
104                         return
105                                 plugin => {
106                                         safe => 0, # rcs plugin
107                                         rebuild => undef,
108                                         section => "rcs",
109                                 },
110                                 mercurial_wrapper => {
111                                         type => "string",
112                                         #example => # FIXME add example
113                                         description => "mercurial post-commit hook to generate",
114                                         safe => 0, # file
115                                         rebuild => 0,
116                                 },
117                                 mercurial_wrappermode => {
118                                         type => "string",
119                                         example => '06755',
120                                         description => "mode for mercurial_wrapper (can safely be made suid)",
121                                         safe => 0,
122                                         rebuild => 0,
123                                 },
124                                 mercurial_wrapper_hgrc_update => {
125                                         type => "string",
126                                         example => "1",
127                                         description => "updates existing hgrc to reflect path changes for mercurial_wrapper",
128                                         safe => 0,
129                                         rebuild => 0,
130                                 },
131                                 historyurl => {
132                                         type => "string",
133                                         example => "http://example.com:8000/log/tip/\[[file]]",
134                                         description => "url to hg serve'd repository, to show file history (\[[file]] substituted)",
135                                         safe => 1,
136                                         rebuild => 1,
137                                 },
138                                 diffurl => {
139                                         type => "string",
140                                         example => "http://localhost:8000/?fd=\[[r2]];file=\[[file]]",
141                                         description => "url to hg serve'd repository, to show diff (\[[file]] and \[[r2]] substituted)",
142                                         safe => 1,
143                                         rebuild => 1,
144                                 },
145                 }
146
147                 sub safe_hg (&@) {
148                         # Start a child process safely without resorting to /bin/sh.
149                         # Returns command output (in list content) or success state
150                         # (in scalar context), or runs the specified data handler.
151
152                         my ($error_handler, $data_handler, @cmdline) = @_;
153
154                         my $pid = open my $OUT, "-|";
155
156                         error("Cannot fork: $!") if !defined $pid;
157
158                         if (!$pid) {
159                                 # In child.
160                                 # hg commands want to be in wc.
161
162 This `$hg_dir` logic means nothing and could be stripped until srcdir/repdir distinction is made (it's stripped in upstream `mercurial.pm` right now).
163
164                                 if (! defined $hg_dir) {
165                                         chdir $config{srcdir}
166                                             or error("cannot chdir to $config{srcdir}: $!");
167                                 }
168                                 else {
169                                         chdir $hg_dir or error("cannot chdir to $hg_dir: $!");
170                                 }
171
172                                 exec @cmdline or error("Cannot exec '@cmdline': $!");
173                         }
174                         # In parent.
175
176                         my @lines;
177                         while (<$OUT>) {
178                                 chomp;
179
180                                 if (! defined $data_handler) {
181                                         push @lines, $_;
182                                 }
183                                 else {
184                                         last unless $data_handler->($_);
185                                 }
186                         }
187
188                         close $OUT;
189
190                         $error_handler->("'@cmdline' failed: $!") if $? && $error_handler;
191
192                         return wantarray ? @lines : ($? == 0);
193                 }
194                 # Convenient wrappers.
195                 sub run_or_die ($@) { safe_hg(\&error, undef, @_) }
196                 sub run_or_cry ($@) { safe_hg(sub { warn @_ }, undef, @_) }
197                 sub run_or_non ($@) { safe_hg(undef, undef, @_) }
198
199
200 To handle uncommited local changes ("ULC"s for short), I use logic similar to the (non-standard) "shelve" extension to Mercurial. By taking a diff before resetting to last commit, making changes and then applying diff again, one can do things Mercurial otherwise refuses, which is necessary later.
201
202 This function creates this diff.
203
204                 sub hg_local_dirstate_shelve ($) {
205                         # Creates a diff snapshot of uncommited changes existing the srcdir.
206                         # Takes a string (preferably revision) as input to create a unique and
207                         # identifiable diff name.
208                         my $tempdiffname = "diff_".shift;
209                         my $tempdiffpath;
210                         if (my @tempdiff = run_or_die('hg', 'diff', '-g')) {
211                                 $"="\n";
212                                 writefile($tempdiffname, $config{srcdir},
213                                                 "@tempdiff");
214                                 $"=" ";
215                                 $tempdiffpath = $config{srcdir}.'/'.$tempdiffname;
216                         }
217                         return $tempdiffpath;
218                 }
219
220 This function restores the diff.
221
222                 sub hg_local_dirstate_unshelve ($) {
223                         # Applies diff snapshot to revert back to initial dir state. If diff
224                         # revert succeeds, the diff is removed. Otherwise it stays to not
225                         # eradicate the local changes if they were important. This clutters the
226                         # directory though. Better ways to handle this are welcome. A true way
227                         # around this dance is to have a separate repository for local changes
228                         # and push ready commits to the srcdir instead.
229                         if (my $tempdiffpath = shift) {
230                                 if (run_or_cry('hg', 'import', '--no-commit', $tempdiffpath)) {
231                                         unlink($tempdiffpath);
232                                         return undef;
233                                 }
234                         }
235                 }
236
237 This makes online diffing possible. A similar approach as in `git.pm`, which is [discussed to some length in a comment there](http://source.ikiwiki.branchable.com/?p=source.git;a=blob;f=IkiWiki/Plugin/git.pm;h=cf7fbe9b7c43ee53180612d0411e6202074fb9e0;hb=refs/heads/master#l211), is taken.
238
239                 sub merge_past ($$$) {
240                         my ($sha1, $file, $message) = @_;
241
242                         # Undo stack for cleanup in case of an error
243                         my @undo;
244                         # File content with conflict markers
245                         my $conflict;  
246                         my $tempdiffpath; 
247
248                         eval {
249                                 # Hide local changes from Mercurial by renaming the modified
250                                 # file.  Relative paths must be converted to absolute for
251                                 # renaming.
252                                 my ($target, $hidden) = (
253                                         "$config{srcdir}/${file}",
254                                         "$config{srcdir}/${file}.${sha1}"
255                                 );
256                                 rename($target, $hidden)
257                                         or error("rename '$target' to '$hidden' failed: $!");
258                                 # Ensure to restore the renamed file on error.
259                                 push @undo, sub {
260                                         return if ! -e "$hidden"; # already renamed
261                                         rename($hidden, $target)
262                                             or warn "rename '$hidden' to '$target' failed: $!";
263                                 };
264
265
266 Take a snapshot of srcdir to be able to restore uncommited local changes ("ULCs") afterwards.
267
268 * This must happen _after_ the merging commit in Mercurial, there is no way around it. By design hg refuses to commit merges if there are other changes to tracked content present, no matter how much  you beg.
269
270 * ULCs to the file being edited are special: they can't be diffed here since `editpage.pm` already has overwritten the file. When the web edit session started though, the ULC version (not the commited
271 version) was read into the form, so in a way, the web user _has already merged_ with the ULC. It is not saved in commit history, but that is the exact consequence of "uncommited" changes. If an ULC is done between the time the web edit started and was submitted, then it is lost, though.  All in all, one shouldn't be editing the srcdir directly when web edits of the same file are allowed. Clone the repo and push changes instead.
272
273 Much of these issues disappear, I believe, if one works with a master repo which only is pushed to.
274
275                                 my $tempdiffpath = hg_local_dirstate_shelve($sha1);
276
277                                 # Ensure uniqueness of bookmarks.
278                                 my $bookmark_upstream_head = "current_head_$sha1";
279                                 my $bookmark_edit_base = "edit_base_$sha1";
280
281                                 # Git and Mercurial differ in the branch concept. Mercurial's
282                                 # "bookmarks" are closer in function in this regard.
283
284 Bookmarks aren't standard until Mercurial 1.8 ([2011--02--10](http://selenic.com/hg/rev/d4ab9486e514)), but they've been bundled with Mercurial since ~2008, so they can be enabled by writing a `hgrc`, which is also being worked on.
285
286                                 # Create a bookmark at current tip.
287                                 push @undo, sub { run_or_cry('hg', 'bookmark', '--delete',
288                                                 $bookmark_upstream_head) };
289                                 run_or_die('hg', 'bookmark', $bookmark_upstream_head);
290
291                                 # Create a bookmark at the revision from which the edit was
292                                 # started and switch to it, discarding changes (they are stored
293                                 # in $tempdiff and the hidden file at the moment).
294                                 push @undo, sub { run_or_cry('hg', 'bookmark', '--delete',
295                                                 $bookmark_edit_base) };
296                                 run_or_die('hg', 'bookmark', '-r', $sha1, $bookmark_edit_base);
297                                 run_or_die('hg', 'update', ,'-C', $bookmark_edit_base);
298
299                                 # Reveal the modified file.
300                                 rename($hidden, $target)
301                                     or error("rename '$hidden' to '$target' failed: $!");
302
303                                 # Commit at the bookmarked revision, creating a new head.
304                                 run_or_cry('hg', 'commit', '-m', $message);
305
306                                 # Attempt to merge the newly created head with upstream head.
307                                 # '--tool internal:merge' to avoid spawning a GUI merger.
308
309 (*Semi-TODO:* How do you make this command quiet? On failed merge, it
310 always writes to STDERR and clutters the web server log.)
311
312                                 if (!run_or_non('hg', 'merge', '--tool', 'internal:merge',
313                                                 $bookmark_upstream_head)) {
314                                         # ..., otherwise return file with conflict markers.
315                                         $conflict = readfile($target);
316
317                                         # The hardcore reset approach. Keep your hands inside
318                                         # the cart.
319                                         run_or_die('hg', 'rollback');
320                                         run_or_die('hg', 'update', '-C',
321                                                 $bookmark_upstream_head);
322                                         if ($tempdiffpath) {
323                                                 hg_local_dirstate_unshelve($tempdiffpath);
324                                         }
325
326 Other approaches tried here:
327
328 1. Clean up merge attempt,
329
330         run_or_die('hg', 'update', '-C', $bookmark_upstream_head);
331
332 2. Redo "merge", using only upstream head versions,
333
334         run_or_die('hg', 'merge', '--tool', 'internal:local', $bookmark_edit_base);
335
336 3. dummy commit to close head.
337
338         run_or_non('hg', 'commit', '-m', $message);
339
340 This creates a cluttered and erroneous history. We
341 tell Mercurial to merge, even though we in practice
342 discard. This creates problems when trying to revert
343 changes.
344
345 Other attempt:
346
347 1. Discard merge attempt and switch to temp head,
348
349         run_or_die('hg', 'update', '-C', $bookmark_edit_base);
350
351 2. close the temp head (why do they call the command that in practice closes heads "--close-branch"?),
352
353         run_or_non('hg', 'commit', '--close-branch', '-m', $message);
354
355 3. restore working directory to pre-fiddling status.
356
357         run_or_die('hg', 'update', $bookmark_upstream_head);
358
359 ...but this requires the same amount of forks as the
360 above method, and confuses other parts of ikiwiki
361 since the upstream head is now the third newest
362 revision. Maybe that particular problem is solvable
363 by setting a global default bookmark that follows the
364 main tip.  It will leave clutter in the revision
365 history, though. Two extra commits that in practice
366 don't hold relevant information will be recorded for
367 each failed merge attempt.
368
369 To only create one extra commit, one could imagine
370 adding `--close-branch` to the commit that initially
371 created the new head (since there is no problem
372 merging with closed heads), but it's not possible to
373 close and create a head at the same time, apparently.
374
375                                 }
376                         };
377                         my $failure = $@;
378
379                         # Process undo stack (in reverse order). By policy, cleanup actions
380                         # should normally print a warning on failure.
381                         while (my $handle = pop @undo) {
382                                 $handle->();
383                         }
384
385                         error("Mercurial merge failed!\n$failure\n") if $failure;
386
387                         return ($conflict, $tempdiffpath);
388                 }
389
390                 sub hg_commit_info ($;$;$) {
391                         # Return an array of commit info hashes of num commits starting from
392                         # the given sha1sum.
393                         #
394 This could be optimized by using a lookup cache similar to
395 `findtimes()`. By adding `KeyAttr => ['node']` to `XMLin()` options, one
396 could use the revision ID as key and do a single massive history
397 lookup and later just check if the given revision already exists as a
398 key.  Right now I'm at the "don't optimize it yet" stage, though.
399
400 This uses Mercurial's built-in `--style xml` and parses it with `XML::Simple`. Mercurial's log output is otherwise somewhat cumbersome to get good stuff out of, so this XML solution is quite good, I think. It adds module dependency, but XML::Simple seems fairly standard (but what do I know, I've used 1 Perl installation in my life).
401
402                         use XML::Simple;
403                         use Date::Parse;
404
405                         my ($sha1, $num, $file) = @_;
406
407                         my @opts;
408                         if (defined $sha1) {
409                                 if ($sha1 =~ m/^($sha1_pattern)$/) {
410                                         push @opts, ('-r'. $1.':0');
411                                 }
412                                 elsif ($sha1 =~ m/^($sha1_pattern):($sha1_pattern)$/) {
413                                         push @opts, ('-r', $1.':'.$2);
414                                 }
415                         }
416                         push @opts, ('--limit', $num) if defined $num;
417                         push @opts, ('--', $file) if defined $file;
418
419                         my %env=%ENV;
420                         $ENV{HGENCODING} = 'utf-8';
421                         my @xml = run_or_cry('hg', 'log', '-v', '--style', 'xml', @opts);
422                         %ENV=%env;
423
424                         # hg returns empty string if file is not in repository.
425                         return undef if !@xml;
426
427 Some places it is clear that I'm coding ad-hoc Perl. I don't know if this is a reasonably efficient way to give input to `XMLin`, but it works.
428
429                         # Want to preserve linebreaks in multiline comments.
430                         $"="\n";
431                         my $xmllog = XMLin("@xml",
432                                 ForceArray => ['logentry', 'parent', 'copy', 'path']);
433                         $"=" ";
434
435                         my @c_infos;
436                         foreach my $rev (@{$xmllog->{logentry}}) {
437                                 my %c_info;
438                                 # In Mercurial, "rev" is technically the strictly local
439                                 # revision number.  What ikiwiki wants is what is called
440                                 # "node": a globally defined SHA1 checksum.
441                                 $c_info{rev} = $rev->{node};
442                                 foreach my $parent (@{$rev->{parent}}) {
443                                         push @{$c_info{parents}}, {rev => $parent->{node}};
444                                 }
445                                 $c_info{user} = $rev->{author}{content};
446                                 # Mercurial itself parses out and stores an email address if
447                                 # present in author name. If not, hg sets email to author name.
448                                 if ( $rev->{author}{content} ne $rev->{author}{email} &&
449                                         $rev->{author}{email} =~ m/^([^\@]+)\@(.*)$/ ) {
450                                         if ($2 eq "web") {
451                                                 $c_info{nickname} = $1;
452                                                 $c_info{web_commit} = "1";
453                                         }
454                                 }
455                                 # Mercurial gives date in ISO 8601, well handled by str2time().
456                                 $c_info{when} = str2time($rev->{date});
457                                 # Mercurial doesn't allow empty commit messages, so there
458                                 # should always be a single defined message.
459                                 $c_info{message} = $rev->{msg}{content};
460                                 # Inside "paths" sits a single array "path" that contains
461                                 # multiple paths. Crystal clear :-)
462                                 foreach my $path (@{$rev->{paths}{path}}) {
463                                         push @{$c_info{files}}, {
464                                                 # Mercurial doesn't track file permissions as
465                                                 # Git do, so that's missing here.
466                                                 'file' => $path->{content},
467                                                 'status' => $path->{action},
468                                         };
469                                 }
470                                 # There also exists an XML branch "copies"->"copy", containing
471                                 # source and dest of files that have been copied with "hg cp".
472                                 # The copy action is also registered in "paths" as a removal of
473                                 # source and addition of dest, so it's not needed here.
474                                 push @c_infos, {%c_info};
475                                 use Data::Dumper;
476                         }
477
478                         return wantarray ? @c_infos : $c_infos[0];
479                 }
480
481                 sub hg_sha1 (;$) {
482                         # Return head sha1sum (of given file).
483                         my $file = shift || q{--};
484
485                         # Non-existing file doesn't give error, just empty string.
486                         my $f_info = hg_commit_info(undef, 1, $file);
487                         my $sha1;
488                         if ($f_info->{rev}) {
489                                 ($sha1) = $f_info->{rev} =~ m/($sha1_pattern)/;
490                         }
491                         else {
492                                 debug("Empty sha1sum for '$file'.");
493                         }
494                         return defined $sha1 ? $sha1 : q{};
495                 }
496
497                 sub rcs_update () {
498                         run_or_cry('hg', '-q', 'update');
499                 }
500
501                 sub rcs_prepedit ($) {
502                         # Return the commit sha1sum of the file when editing begins.
503                         # This will be later used in rcs_commit if a merge is required.
504                         my ($file) = @_;
505
506                         return hg_sha1($file);
507                 }
508
509                 sub rcs_commit (@) {
510                         # Try to commit the page; returns undef on _success_ and
511                         # a version of the page with the rcs's conflict markers on
512                         # failure.
513                         my %params=@_;
514
515                         # Check to see if the page has been changed by someone else since
516                         # rcs_prepedit was called.
517                         my $cur    = hg_sha1($params{file});
518                         my ($prev) = $params{token} =~ /^($sha1_pattern)$/; # untaint
519
520                         if (defined $cur && defined $prev && $cur ne $prev) {
521
522 If there was a conflict, the file with conflict markers is returned. Else, the path to the tempdiff, which is to be run to restore previous local state after `rcs_commit_staged`, is returned.
523
524                                 my ($conflict, $tempdiffpath) =
525                                         merge_past($prev, $params{file}, $dummy_commit_msg);
526                                 return defined $conflict
527                                         ? $conflict
528                                         : rcs_commit_helper(
529                                                 @_,
530                                                 merge => 1,
531                                                 tempdiffpath => $tempdiffpath);
532                         }
533
534                         return rcs_commit_helper(@_);
535                 }
536
537                 sub rcs_commit_helper (@) {
538                         my %params=@_;
539
540                         my %env=%ENV;
541                         $ENV{HGENCODING} = 'utf-8';
542
543                         my $user="Anonymous";
544                         my $nickname;
545                         if (defined $params{session}) {
546                                 if (defined $params{session}->param("name")) {
547                                         $user = $params{session}->param("name");
548                                 }
549                                 elsif (defined $params{session}->remote_addr()) {
550                                         $user = $params{session}->remote_addr();
551                                 }
552
553                                 if (defined $params{session}->param("nickname")) {
554                                         $nickname=encode_utf8($params{session}->param("nickname"));
555                                         $nickname=~s/\s+/_/g;
556                                         $nickname=~s/[^-_0-9[:alnum:]]+//g;
557                                 }
558                                 $ENV{HGUSER} = encode_utf8($user . ' <' . $nickname . '@web>');
559                         }
560
561                         if (! length $params{message}) {
562                                 $params{message} = "no message given";
563                         }
564
565                         $params{message} = IkiWiki::possibly_foolish_untaint($params{message});
566
567                         my @opts;
568
569 Mercurial rejects file arguments when performing a merging commit. It
570 only does "all or nothing" commits by design when merging, so given file arguments must be discarded. It should not pose a problem.
571
572                         if (exists $params{file} && ! defined $params{merge}) {
573                                 push @opts, '--', $params{file};
574                         }
575
576                         # hg commit returns non-zero if nothing really changed.
577                         # So we should ignore its exit status (hence run_or_non).
578                         run_or_non('hg', 'commit', '-m', $params{message}, '-q', @opts);
579
580 If there were uncommited local changes in srcdir before a merge was done, they are restored here.
581
582                         if (defined $params{tempdiffpath}) {
583                                 hg_local_dirstate_unshelve($params{tempdiffpath});
584                         }
585
586                         %ENV=%env;
587                         return undef; # success
588                 }
589
590                 sub rcs_commit_staged (@) {
591                         # Commits all staged changes. Changes can be staged using rcs_add,
592                         # rcs_remove, and rcs_rename.
593                         return rcs_commit_helper(@_);
594                 }
595
596                 sub rcs_add ($) {
597                         my ($file) = @_;
598
599                         run_or_cry('hg', 'add', $file);
600                 }
601
602                 sub rcs_remove ($) {
603                         # Remove file from archive.
604                         my ($file) = @_;
605
606                         run_or_cry('hg', 'remove', '-f', $file);
607                 }
608
609                 sub rcs_rename ($$) {
610                         my ($src, $dest) = @_;
611
612                         run_or_cry('hg', 'rename', '-f', $src, $dest);
613                 }
614
615                 sub rcs_recentchanges ($) {
616                         my ($num) = @_;
617
618                         my @c_infos;
619
620                         foreach my $c_info (hg_commit_info(undef, $num, undef)) {
621                                 my @pagenames;
622                                 for my $page (@{$c_info->{files}}) {
623                                         my $diffurl=defined $config{diffurl} ?
624                                                 $config{diffurl} : '';
625                                         # These substitutions enable defining keywords \[[file]]
626                                         # and \[[r2]] (backward compatibility) in the setup file
627                                         # that will be exchanged with filename and revision
628                                         # respectively.
629                                         $diffurl =~ s/\[\[file\]\]/$page->{file}/go;
630                                         $diffurl =~ s/\[\[r2\]\]/$c_info->{rev}/go;
631                                         push @pagenames, {
632                                                 # pagename() strips suffixes and returns the
633                                                 # path to the file as it is to be represented
634                                                 # in the build dir.
635                                                 page => pagename($page->{file}),
636                                                 diffurl => $diffurl,
637                                         };
638                                 }
639
640                                 # It is expected of ikiwiki to get each comment line as a
641                                 # separate entry.
642                                 my @messagelines;
643                                 open my $message, '<', \$c_info->{message};
644                                 while (<$message>) { push @messagelines, { line => $_ } };
645
646                                 push @c_infos, {
647                                         rev        => $c_info->{rev},
648                                         user       => $c_info->{user},
649                                         nickname   => defined $c_info->{nickname} ?
650                                                         $c_info->{nickname} : $c_info->{user},
651                                         committype => $c_info->{web_commit} ? "web" : "hg",
652                                         when       => $c_info->{when},
653                                         message    => [@messagelines],
654                                         pages      => [@pagenames],
655                                 } if @pagenames;
656                         }
657
658                         return @c_infos;
659                 }
660
661                 sub rcs_diff ($;$) {
662                         my $rev=shift;
663                         my $maxlines=shift;
664                         my @lines;
665                         my $addlines=sub {
666                                 my $line=shift;
667                                 return if defined $maxlines && @lines == $maxlines;
668                                 push @lines, $line."\n"
669                                         if (@lines || $line=~/^diff --git/);
670                                 return 1;
671                         };
672                         safe_hg(undef, $addlines, "hg", "diff", "-c", $rev, "-g");
673                         if (wantarray) {
674                                 return @lines;
675                         }
676                         else {
677                                 return join("", @lines);
678                         }
679                 }
680
681                 {
682                 my %time_cache;
683
684 This is an upstream change I did a week ago or so. Perhaps it can be merged in some clever way with the updated `hg_commit_info` to make one shared lookup cache. Don't know how much would be gained.
685
686                 sub findtimes ($$) {
687                         my $file=shift;
688                         my $id=shift; # 0 = mtime ; 1 = ctime
689
690                         if (! keys %time_cache) {
691                                 my $date;
692
693                                 # It doesn't seem possible to specify the format wanted for the
694                                 # changelog (same format as is generated in git.pm:findtimes(),
695                                 # though the date differs slightly) without using a style
696                                 # _file_. There is a "hg log" switch "--template" to directly
697                                 # control simple output formatting, but in this case, the
698                                 # {file} directive must be redefined, which can only be done
699                                 # with "--style".
700                                 #
701                                 # If {file} is not redefined, all files are output on a single
702                                 # line separated with a space. It is not possible to conclude
703                                 # if the space is part of a filename or just a separator, and
704                                 # thus impossible to use in this case.
705                                 # 
706                                 # Some output filters are available in hg, but they are not fit
707                                 # for this cause (and would slow down the process
708                                 # unnecessarily).
709                                 
710                                 eval q{use File::Temp};
711                                 error $@ if $@;
712                                 my ($tmpl_fh, $tmpl_filename) = File::Temp::tempfile(UNLINK => 1);
713                                 
714                                 print $tmpl_fh 'changeset = "{date}\\n{files}\\n"' . "\n";
715                                 print $tmpl_fh 'file = "{file}\\n"' . "\n";
716                                 
717                                 foreach my $line (run_or_die('hg', 'log', '--style', $tmpl_filename)) {
718                                         # {date} gives output on the form
719                                         # 1310694511.0-7200
720                                         # where the first number is UTC Unix timestamp with one
721                                         # decimal (decimal always 0, at least on my system)
722                                         # followed by local timezone offset from UTC in
723                                         # seconds.
724                                         if (! defined $date && $line =~ /^\d+\.\d[+-]\d*$/) {
725                                                 $line =~ s/^(\d+).*/$1/;
726                                                 $date=$line;
727                                         }
728                                         elsif (! length $line) {
729                                                 $date=undef;
730                                         }
731                                         else {
732                                                 my $f=$line;
733
734                                                 if (! $time_cache{$f}) {
735                                                         $time_cache{$f}[0]=$date; # mtime
736                                                 }
737                                                 $time_cache{$f}[1]=$date; # ctime
738                                         }
739                                 }
740                         }
741
742                         return exists $time_cache{$file} ? $time_cache{$file}[$id] : 0;
743                 }
744
745                 }
746
747                 sub rcs_getctime ($) {
748                         my $file = shift;
749
750                         return findtimes($file, 1);
751                 }
752
753                 sub rcs_getmtime ($) {
754                         my $file = shift;
755
756                         return findtimes($file, 0);
757                 }
758
759 The comment just below the function declaration below is taken from `git.pm`. Is it true? Should ikiwiki support sharing its repo with other things? Mercurial-wise that sounds like a world of pain.
760
761 > Yes, ikiwiki supports this for git and svn. It's useful when you want
762 > a doc/ directory with the wiki for a project. I don't know why
763 > it wouldn't be a useful thing to do with mercurial, but it's not
764 > required. --[[Joey]]
765
766                 {
767                 my $ret;
768                 sub hg_find_root {
769                         # The wiki may not be the only thing in the git repo.
770                         # Determine if it is in a subdirectory by examining the srcdir,
771                         # and its parents, looking for the .git directory.
772
773                         return @$ret if defined $ret;
774
775                         my $subdir="";
776                         my $dir=$config{srcdir};
777                         while (! -d "$dir/.hg") {
778                                 $subdir=IkiWiki::basename($dir)."/".$subdir;
779                                 $dir=IkiWiki::dirname($dir);
780                                 if (! length $dir) {
781                                         error("cannot determine root of hg repo");
782                                 }
783                         }
784
785                         $ret=[$subdir, $dir];
786                         return @$ret;
787                 }
788
789                 }
790
791                 sub hg_parse_changes (@) {
792                         # Only takes a single info hash as argument in rcs_preprevert, but
793                         # should be able to take several in rcs_receive.
794                         my @c_infos_raw = shift;
795
796                         my ($subdir, $rootdir) = hg_find_root();
797                         my @c_infos_ret;
798
799                         foreach my $c_info_raw (@c_infos_raw) {
800                                 foreach my $path (@{$c_info_raw->{files}}) {
801                                         my ($file, $action, $temppath);
802
803                                         $file=$path->{file};
804
805                                         # check that all changed files are in the subdir
806                                         if (length $subdir && ! ($file =~ s/^$subdir//)) {
807                                                 error sprintf(gettext("you are not allowed to change %s"), $file);
808                                         }
809
810                                         if    ($path->{status} eq "M") { $action="change" }
811                                         elsif ($path->{status} eq "A") { $action="add" }
812                                         elsif ($path->{status} eq "R") { $action="remove" }
813                                         else  { error "unknown status ".$path->{status} }
814
815 I haven't tested the attachment code below. Is it run when there is an non-trusted file upload?
816
817 > It's run when an anonymous git push is done. I don't know if there would
818 > be any equivilant with mercurial; if not, it does not makes sense
819 > to implement this at all (this function is only used by `rcs_receive`). --[[Joey]] 
820
821                                         # extract attachment to temp file
822                                         if (($action eq 'add' || $action eq 'change') &&
823                                                 ! pagetype($file)) {
824
825                                                 eval q{use File::Temp};
826                                                 die $@ if $@;
827
828                                                 my $fh;
829                                                 ($fh, $temppath)=File::Temp::tempfile(undef, UNLINK => 1);
830                                                 my $cmd = "cd $hg_dir && ".
831                                                         "hg diff -g -c $c_info_raw->{rev} > '$temppath'";
832                                                 if (system($cmd) != 0) {
833                                                         error("failed writing temp file '$temppath'.");
834                                                 }
835                                         }
836
837                                         push @c_infos_ret, {
838                                                 file => $file,
839                                                 action => $action,
840                                                 path => $temppath,
841                                         };
842                                 }
843                         }
844
845                         return @c_infos_ret;
846                 }
847
848 *TODO:* I don't know what's happening here. I've changed the code to adhere to this file's variables and functions, but it refers to a srcdir _and_ a default repo, which currently isn't available in the Mercurial setup.
849
850 `rcs_receive` is optional and only runs when running a pre-receive hook. Where `$_` comes from and its format are mysteries to me.
851
852 Also, a comment in `git.pm` mentions that we don't want to chdir to a subdir "and only see changes in it" - but this isn't true for either Git or Mercurial to my knowledge. It only seems to happen in `git.pm` since the `git log` command in `git_commit_info` ends with "`-- .`" - if it didn't do that, one wouldn't have to chdir for this reason, I believe.
853
854 In this case we need to stay in default repo instead of srcdir though, so `hg_dir="."` _is_ needed, but not for the abovementioned reason :-) (maybe there's more to it, though).
855
856 > Implementing some sort of anonymous push handling for mercurial is not something
857 > you can funble your way through like this, if it can be done at all.
858
859 > Hint: `$_` is being populated by the specific format git sends to a
860 > specific hook script.
861 > --[[Joey]]
862
863                 sub rcs_receive () {
864                         my @c_infos_ret;
865                         while (<>) {
866                                 chomp;
867                                 my ($oldrev, $newrev, $refname) = split(' ', $_, 3);
868
869                                 # only allow changes to hg_default_branch
870
871 *TODO:* What happens here? Some Git voodoo. _If_ `$_` has the exact same format for Mercurial, then the below should work just as well here, I think.
872
873                                 if ($refname !~ m|^refs/heads/$config{hg_default_branch}$|) {
874                                         error sprintf(gettext("you are not allowed to change %s"), $refname);
875                                 }
876
877 Comment from `git.pm`:
878
879                                 # Avoid chdir when running git here, because the changes are in
880                                 # the default git repo, not the srcdir repo.  (Also, if a subdir
881                                 # is involved, we don't want to chdir to it and only see
882                                 # changes in it.) The pre-receive hook already puts us in the
883                                 # right place.
884                                 $hg_dir=".";
885                                 push @c_infos_ret,
886                                         hg_parse_changes(hg_commit_info($newrev.":".$oldrev,
887                                                         undef, undef));
888                                 $hg_dir=undef;
889                         }
890
891                         return @c_infos_ret;
892                 }
893
894                 sub rcs_preprevert ($) {
895                         my $rev=shift;
896                         my ($sha1) = $rev =~ /^($sha1_pattern)$/; # untaint
897
898 The below 4 lines of code are from `git.pm`, but I can't see what they actually do there. Neither Git nor Mercurial only lists changes in working directory when given a command - they always traverse to repository root by themselves. I keep it here for comments, in case I'm missing something.
899
900 *UPDATE:* See earlier note about `git log` ending in "`-- .`".
901
902                         ## Examine changes from root of git repo, not from any subdir,
903                         ## in order to see all changes.
904                         #my ($subdir, $rootdir) = git_find_root();
905                         #$git_dir=$rootdir;
906
907                         my $c_info=hg_commit_info($sha1, 1, undef) or error "unknown commit";
908
909                         # hg revert will fail on merge commits. Add a nice message.
910                         if (exists $c_info->{parents} && $c_info->{parents} > 1) {
911                                 error gettext("you are not allowed to revert a merge");
912                         }
913
914                         my @c_info_ret=hg_parse_changes($c_info);
915
916                         ### Probably not needed, if earlier comment is correct.
917                         #$hg_dir=undef;
918                         return @c_info_ret;
919                 }
920
921                 sub rcs_revert ($) {
922                         # Try to revert the given rev; returns undef on _success_.
923                         my $rev = shift;
924                         my ($sha1) = $rev =~ /^($sha1_pattern)$/; # untaint
925
926                         # Save uncommited local changes to diff file. Attempt to restore later.
927                         my $tempdiffpath = hg_local_dirstate_shelve($sha1);
928
929                         # Clean dir to latest commit.
930                         run_or_die('hg', 'update', '-C');
931
932 Some voodoo is needed here. `hg backout --tool internal:local -r $sha1` is *almost* good, but if the reversion is done to the directly previous revision, hg automatically commits, which is bad in this case. Instead I generate a reverse diff and pipe it to `import --no-commit`.
933
934                         if (run_or_non("hg diff -c $sha1 --reverse | hg import --no-commit -")) {
935                                 if ($tempdiffpath) { hg_local_dirstate_unshelve($tempdiffpath) }
936                                 return undef;
937                         }
938                         else {
939                                 if ($tempdiffpath) { hg_local_dirstate_unshelve($tempdiffpath) }
940                                 return sprintf(gettext("Failed to revert commit %s"), $sha1);
941                         }
942                 }
943
944 Below follows code regarding [[Auto-setup and maintain Mercurial wrapper hooks]]. Will try to solve it in another place later, but the code in itself is working.
945
946 Should perhaps add initiation of the bookmark extension here, to support older Mercurial versions.
947
948                 sub rcs_wrapper_postcall($) {
949                         # Update hgrc if it exists. Change post-commit/incoming hooks with the
950                         # .ikiwiki suffix to point to the wrapper path given in the setup file.
951                         # Work with a tempfile to not delete hgrc if the loop is interrupted
952                         # midway.
953                         # I believe there is a better way to solve this than creating new hooks
954                         # and callbacks. Will await discussion on ikiwiki.info.
955                         my $hgrc=$config{srcdir}.'/.hg/hgrc';
956                         my $backup_suffix='.ikiwiki.bak';
957                         if (-e $hgrc) {
958                                 use File::Spec;
959                                 my $mercurial_wrapper_abspath=File::Spec->rel2abs($config{mercurial_wrapper}, $config{srcdir});
960                                 local ($^I, @ARGV)=($backup_suffix, $hgrc);
961                                 while (<>) {
962                                         s/^(post-commit|incoming)(\.ikiwiki[ \t]*=[ \t]*).*$/$1$2$mercurial_wrapper_abspath/;
963                                         print;
964                                 }
965                                 unlink($hgrc.$backup_suffix);
966                         }
967                 }
968
969                 1