Merge branch 'kb/fast-hashmap'
[git] / contrib / subtree / git-subtree.sh
1 #!/bin/sh
2 #
3 # git-subtree.sh: split/join git repositories in subdirectories of this one
4 #
5 # Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
6 #
7 if [ $# -eq 0 ]; then
8     set -- -h
9 fi
10 OPTS_SPEC="\
11 git subtree add   --prefix=<prefix> <commit>
12 git subtree add   --prefix=<prefix> <repository> <ref>
13 git subtree merge --prefix=<prefix> <commit>
14 git subtree pull  --prefix=<prefix> <repository> <ref>
15 git subtree push  --prefix=<prefix> <repository> <ref>
16 git subtree split --prefix=<prefix> <commit...>
17 --
18 h,help        show the help
19 q             quiet
20 d             show debug messages
21 P,prefix=     the name of the subdir to split out
22 m,message=    use the given message as the commit message for the merge commit
23  options for 'split'
24 annotate=     add a prefix to commit message of new commits
25 b,branch=     create a new branch from the split subtree
26 ignore-joins  ignore prior --rejoin commits
27 onto=         try connecting new tree to an existing one
28 rejoin        merge the new branch back into HEAD
29  options for 'add', 'merge', 'pull' and 'push'
30 squash        merge subtree changes as a single commit
31 "
32 eval "$(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)"
33
34 PATH=$PATH:$(git --exec-path)
35 . git-sh-setup
36
37 require_work_tree
38
39 quiet=
40 branch=
41 debug=
42 command=
43 onto=
44 rejoin=
45 ignore_joins=
46 annotate=
47 squash=
48 message=
49 prefix=
50
51 debug()
52 {
53         if [ -n "$debug" ]; then
54                 echo "$@" >&2
55         fi
56 }
57
58 say()
59 {
60         if [ -z "$quiet" ]; then
61                 echo "$@" >&2
62         fi
63 }
64
65 assert()
66 {
67         if "$@"; then
68                 :
69         else
70                 die "assertion failed: " "$@"
71         fi
72 }
73
74
75 #echo "Options: $*"
76
77 while [ $# -gt 0 ]; do
78         opt="$1"
79         shift
80         case "$opt" in
81                 -q) quiet=1 ;;
82                 -d) debug=1 ;;
83                 --annotate) annotate="$1"; shift ;;
84                 --no-annotate) annotate= ;;
85                 -b) branch="$1"; shift ;;
86                 -P) prefix="$1"; shift ;;
87                 -m) message="$1"; shift ;;
88                 --no-prefix) prefix= ;;
89                 --onto) onto="$1"; shift ;;
90                 --no-onto) onto= ;;
91                 --rejoin) rejoin=1 ;;
92                 --no-rejoin) rejoin= ;;
93                 --ignore-joins) ignore_joins=1 ;;
94                 --no-ignore-joins) ignore_joins= ;;
95                 --squash) squash=1 ;;
96                 --no-squash) squash= ;;
97                 --) break ;;
98                 *) die "Unexpected option: $opt" ;;
99         esac
100 done
101
102 command="$1"
103 shift
104 case "$command" in
105         add|merge|pull) default= ;;
106         split|push) default="--default HEAD" ;;
107         *) die "Unknown command '$command'" ;;
108 esac
109
110 if [ -z "$prefix" ]; then
111         die "You must provide the --prefix option."
112 fi
113
114 case "$command" in
115         add) [ -e "$prefix" ] && 
116                 die "prefix '$prefix' already exists." ;;
117         *)   [ -e "$prefix" ] || 
118                 die "'$prefix' does not exist; use 'git subtree add'" ;;
119 esac
120
121 dir="$(dirname "$prefix/.")"
122
123 if [ "$command" != "pull" -a "$command" != "add" -a "$command" != "push" ]; then
124         revs=$(git rev-parse $default --revs-only "$@") || exit $?
125         dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
126         if [ -n "$dirs" ]; then
127                 die "Error: Use --prefix instead of bare filenames."
128         fi
129 fi
130
131 debug "command: {$command}"
132 debug "quiet: {$quiet}"
133 debug "revs: {$revs}"
134 debug "dir: {$dir}"
135 debug "opts: {$*}"
136 debug
137
138 cache_setup()
139 {
140         cachedir="$GIT_DIR/subtree-cache/$$"
141         rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
142         mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
143         mkdir -p "$cachedir/notree" || die "Can't create new cachedir: $cachedir/notree"
144         debug "Using cachedir: $cachedir" >&2
145 }
146
147 cache_get()
148 {
149         for oldrev in $*; do
150                 if [ -r "$cachedir/$oldrev" ]; then
151                         read newrev <"$cachedir/$oldrev"
152                         echo $newrev
153                 fi
154         done
155 }
156
157 cache_miss()
158 {
159         for oldrev in $*; do
160                 if [ ! -r "$cachedir/$oldrev" ]; then
161                         echo $oldrev
162                 fi
163         done
164 }
165
166 check_parents()
167 {
168         missed=$(cache_miss $*)
169         for miss in $missed; do
170                 if [ ! -r "$cachedir/notree/$miss" ]; then
171                         debug "  incorrect order: $miss"
172                 fi
173         done
174 }
175
176 set_notree()
177 {
178         echo "1" > "$cachedir/notree/$1"
179 }
180
181 cache_set()
182 {
183         oldrev="$1"
184         newrev="$2"
185         if [ "$oldrev" != "latest_old" \
186              -a "$oldrev" != "latest_new" \
187              -a -e "$cachedir/$oldrev" ]; then
188                 die "cache for $oldrev already exists!"
189         fi
190         echo "$newrev" >"$cachedir/$oldrev"
191 }
192
193 rev_exists()
194 {
195         if git rev-parse "$1" >/dev/null 2>&1; then
196                 return 0
197         else
198                 return 1
199         fi
200 }
201
202 rev_is_descendant_of_branch()
203 {
204         newrev="$1"
205         branch="$2"
206         branch_hash=$(git rev-parse $branch)
207         match=$(git rev-list -1 $branch_hash ^$newrev)
208
209         if [ -z "$match" ]; then
210                 return 0
211         else
212                 return 1
213         fi
214 }
215
216 # if a commit doesn't have a parent, this might not work.  But we only want
217 # to remove the parent from the rev-list, and since it doesn't exist, it won't
218 # be there anyway, so do nothing in that case.
219 try_remove_previous()
220 {
221         if rev_exists "$1^"; then
222                 echo "^$1^"
223         fi
224 }
225
226 find_latest_squash()
227 {
228         debug "Looking for latest squash ($dir)..."
229         dir="$1"
230         sq=
231         main=
232         sub=
233         git log --grep="^git-subtree-dir: $dir/*\$" \
234                 --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
235         while read a b junk; do
236                 debug "$a $b $junk"
237                 debug "{{$sq/$main/$sub}}"
238                 case "$a" in
239                         START) sq="$b" ;;
240                         git-subtree-mainline:) main="$b" ;;
241                         git-subtree-split:) sub="$b" ;;
242                         END)
243                                 if [ -n "$sub" ]; then
244                                         if [ -n "$main" ]; then
245                                                 # a rejoin commit?
246                                                 # Pretend its sub was a squash.
247                                                 sq="$sub"
248                                         fi
249                                         debug "Squash found: $sq $sub"
250                                         echo "$sq" "$sub"
251                                         break
252                                 fi
253                                 sq=
254                                 main=
255                                 sub=
256                                 ;;
257                 esac
258         done
259 }
260
261 find_existing_splits()
262 {
263         debug "Looking for prior splits..."
264         dir="$1"
265         revs="$2"
266         main=
267         sub=
268         git log --grep="^git-subtree-dir: $dir/*\$" \
269                 --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
270         while read a b junk; do
271                 case "$a" in
272                         START) sq="$b" ;;
273                         git-subtree-mainline:) main="$b" ;;
274                         git-subtree-split:) sub="$b" ;;
275                         END)
276                                 debug "  Main is: '$main'"
277                                 if [ -z "$main" -a -n "$sub" ]; then
278                                         # squash commits refer to a subtree
279                                         debug "  Squash: $sq from $sub"
280                                         cache_set "$sq" "$sub"
281                                 fi
282                                 if [ -n "$main" -a -n "$sub" ]; then
283                                         debug "  Prior: $main -> $sub"
284                                         cache_set $main $sub
285                                         cache_set $sub $sub
286                                         try_remove_previous "$main"
287                                         try_remove_previous "$sub"
288                                 fi
289                                 main=
290                                 sub=
291                                 ;;
292                 esac
293         done
294 }
295
296 copy_commit()
297 {
298         # We're going to set some environment vars here, so
299         # do it in a subshell to get rid of them safely later
300         debug copy_commit "{$1}" "{$2}" "{$3}"
301         git log -1 --pretty=format:'%an%n%ae%n%ad%n%cn%n%ce%n%cd%n%B' "$1" |
302         (
303                 read GIT_AUTHOR_NAME
304                 read GIT_AUTHOR_EMAIL
305                 read GIT_AUTHOR_DATE
306                 read GIT_COMMITTER_NAME
307                 read GIT_COMMITTER_EMAIL
308                 read GIT_COMMITTER_DATE
309                 export  GIT_AUTHOR_NAME \
310                         GIT_AUTHOR_EMAIL \
311                         GIT_AUTHOR_DATE \
312                         GIT_COMMITTER_NAME \
313                         GIT_COMMITTER_EMAIL \
314                         GIT_COMMITTER_DATE
315                 (printf "%s" "$annotate"; cat ) |
316                 git commit-tree "$2" $3  # reads the rest of stdin
317         ) || die "Can't copy commit $1"
318 }
319
320 add_msg()
321 {
322         dir="$1"
323         latest_old="$2"
324         latest_new="$3"
325         if [ -n "$message" ]; then
326                 commit_message="$message"
327         else
328                 commit_message="Add '$dir/' from commit '$latest_new'"
329         fi
330         cat <<-EOF
331                 $commit_message
332                 
333                 git-subtree-dir: $dir
334                 git-subtree-mainline: $latest_old
335                 git-subtree-split: $latest_new
336         EOF
337 }
338
339 add_squashed_msg()
340 {
341         if [ -n "$message" ]; then
342                 echo "$message"
343         else
344                 echo "Merge commit '$1' as '$2'"
345         fi
346 }
347
348 rejoin_msg()
349 {
350         dir="$1"
351         latest_old="$2"
352         latest_new="$3"
353         if [ -n "$message" ]; then
354                 commit_message="$message"
355         else
356                 commit_message="Split '$dir/' into commit '$latest_new'"
357         fi
358         cat <<-EOF
359                 $commit_message
360                 
361                 git-subtree-dir: $dir
362                 git-subtree-mainline: $latest_old
363                 git-subtree-split: $latest_new
364         EOF
365 }
366
367 squash_msg()
368 {
369         dir="$1"
370         oldsub="$2"
371         newsub="$3"
372         newsub_short=$(git rev-parse --short "$newsub")
373         
374         if [ -n "$oldsub" ]; then
375                 oldsub_short=$(git rev-parse --short "$oldsub")
376                 echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
377                 echo
378                 git log --pretty=tformat:'%h %s' "$oldsub..$newsub"
379                 git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
380         else
381                 echo "Squashed '$dir/' content from commit $newsub_short"
382         fi
383         
384         echo
385         echo "git-subtree-dir: $dir"
386         echo "git-subtree-split: $newsub"
387 }
388
389 toptree_for_commit()
390 {
391         commit="$1"
392         git log -1 --pretty=format:'%T' "$commit" -- || exit $?
393 }
394
395 subtree_for_commit()
396 {
397         commit="$1"
398         dir="$2"
399         git ls-tree "$commit" -- "$dir" |
400         while read mode type tree name; do
401                 assert [ "$name" = "$dir" ]
402                 assert [ "$type" = "tree" -o "$type" = "commit" ]
403                 [ "$type" = "commit" ] && continue  # ignore submodules
404                 echo $tree
405                 break
406         done
407 }
408
409 tree_changed()
410 {
411         tree=$1
412         shift
413         if [ $# -ne 1 ]; then
414                 return 0   # weird parents, consider it changed
415         else
416                 ptree=$(toptree_for_commit $1)
417                 if [ "$ptree" != "$tree" ]; then
418                         return 0   # changed
419                 else
420                         return 1   # not changed
421                 fi
422         fi
423 }
424
425 new_squash_commit()
426 {
427         old="$1"
428         oldsub="$2"
429         newsub="$3"
430         tree=$(toptree_for_commit $newsub) || exit $?
431         if [ -n "$old" ]; then
432                 squash_msg "$dir" "$oldsub" "$newsub" | 
433                         git commit-tree "$tree" -p "$old" || exit $?
434         else
435                 squash_msg "$dir" "" "$newsub" |
436                         git commit-tree "$tree" || exit $?
437         fi
438 }
439
440 copy_or_skip()
441 {
442         rev="$1"
443         tree="$2"
444         newparents="$3"
445         assert [ -n "$tree" ]
446
447         identical=
448         nonidentical=
449         p=
450         gotparents=
451         for parent in $newparents; do
452                 ptree=$(toptree_for_commit $parent) || exit $?
453                 [ -z "$ptree" ] && continue
454                 if [ "$ptree" = "$tree" ]; then
455                         # an identical parent could be used in place of this rev.
456                         identical="$parent"
457                 else
458                         nonidentical="$parent"
459                 fi
460                 
461                 # sometimes both old parents map to the same newparent;
462                 # eliminate duplicates
463                 is_new=1
464                 for gp in $gotparents; do
465                         if [ "$gp" = "$parent" ]; then
466                                 is_new=
467                                 break
468                         fi
469                 done
470                 if [ -n "$is_new" ]; then
471                         gotparents="$gotparents $parent"
472                         p="$p -p $parent"
473                 fi
474         done
475         
476         if [ -n "$identical" ]; then
477                 echo $identical
478         else
479                 copy_commit $rev $tree "$p" || exit $?
480         fi
481 }
482
483 ensure_clean()
484 {
485         if ! git diff-index HEAD --exit-code --quiet 2>&1; then
486                 die "Working tree has modifications.  Cannot add."
487         fi
488         if ! git diff-index --cached HEAD --exit-code --quiet 2>&1; then
489                 die "Index has modifications.  Cannot add."
490         fi
491 }
492
493 ensure_valid_ref_format()
494 {
495         git check-ref-format "refs/heads/$1" ||
496             die "'$1' does not look like a ref"
497 }
498
499 cmd_add()
500 {
501         if [ -e "$dir" ]; then
502                 die "'$dir' already exists.  Cannot add."
503         fi
504
505         ensure_clean
506         
507         if [ $# -eq 1 ]; then
508             git rev-parse -q --verify "$1^{commit}" >/dev/null ||
509             die "'$1' does not refer to a commit"
510
511             "cmd_add_commit" "$@"
512         elif [ $# -eq 2 ]; then
513             # Technically we could accept a refspec here but we're
514             # just going to turn around and add FETCH_HEAD under the
515             # specified directory.  Allowing a refspec might be
516             # misleading because we won't do anything with any other
517             # branches fetched via the refspec.
518             ensure_valid_ref_format "$2"
519
520             "cmd_add_repository" "$@"
521         else
522             say "error: parameters were '$@'"
523             die "Provide either a commit or a repository and commit."
524         fi
525 }
526
527 cmd_add_repository()
528 {
529         echo "git fetch" "$@"
530         repository=$1
531         refspec=$2
532         git fetch "$@" || exit $?
533         revs=FETCH_HEAD
534         set -- $revs
535         cmd_add_commit "$@"
536 }
537
538 cmd_add_commit()
539 {
540         revs=$(git rev-parse $default --revs-only "$@") || exit $?
541         set -- $revs
542         rev="$1"
543         
544         debug "Adding $dir as '$rev'..."
545         git read-tree --prefix="$dir" $rev || exit $?
546         git checkout -- "$dir" || exit $?
547         tree=$(git write-tree) || exit $?
548         
549         headrev=$(git rev-parse HEAD) || exit $?
550         if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
551                 headp="-p $headrev"
552         else
553                 headp=
554         fi
555         
556         if [ -n "$squash" ]; then
557                 rev=$(new_squash_commit "" "" "$rev") || exit $?
558                 commit=$(add_squashed_msg "$rev" "$dir" |
559                          git commit-tree $tree $headp -p "$rev") || exit $?
560         else
561                 commit=$(add_msg "$dir" "$headrev" "$rev" |
562                          git commit-tree $tree $headp -p "$rev") || exit $?
563         fi
564         git reset "$commit" || exit $?
565         
566         say "Added dir '$dir'"
567 }
568
569 cmd_split()
570 {
571         debug "Splitting $dir..."
572         cache_setup || exit $?
573         
574         if [ -n "$onto" ]; then
575                 debug "Reading history for --onto=$onto..."
576                 git rev-list $onto |
577                 while read rev; do
578                         # the 'onto' history is already just the subdir, so
579                         # any parent we find there can be used verbatim
580                         debug "  cache: $rev"
581                         cache_set $rev $rev
582                 done
583         fi
584         
585         if [ -n "$ignore_joins" ]; then
586                 unrevs=
587         else
588                 unrevs="$(find_existing_splits "$dir" "$revs")"
589         fi
590         
591         # We can't restrict rev-list to only $dir here, because some of our
592         # parents have the $dir contents the root, and those won't match.
593         # (and rev-list --follow doesn't seem to solve this)
594         grl='git rev-list --topo-order --reverse --parents $revs $unrevs'
595         revmax=$(eval "$grl" | wc -l)
596         revcount=0
597         createcount=0
598         eval "$grl" |
599         while read rev parents; do
600                 revcount=$(($revcount + 1))
601                 say -n "$revcount/$revmax ($createcount)\r"
602                 debug "Processing commit: $rev"
603                 exists=$(cache_get $rev)
604                 if [ -n "$exists" ]; then
605                         debug "  prior: $exists"
606                         continue
607                 fi
608                 createcount=$(($createcount + 1))
609                 debug "  parents: $parents"
610                 newparents=$(cache_get $parents)
611                 debug "  newparents: $newparents"
612                 
613                 tree=$(subtree_for_commit $rev "$dir")
614                 debug "  tree is: $tree"
615
616                 check_parents $parents
617                 
618                 # ugly.  is there no better way to tell if this is a subtree
619                 # vs. a mainline commit?  Does it matter?
620                 if [ -z $tree ]; then
621                         set_notree $rev
622                         if [ -n "$newparents" ]; then
623                                 cache_set $rev $rev
624                         fi
625                         continue
626                 fi
627
628                 newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
629                 debug "  newrev is: $newrev"
630                 cache_set $rev $newrev
631                 cache_set latest_new $newrev
632                 cache_set latest_old $rev
633         done || exit $?
634         latest_new=$(cache_get latest_new)
635         if [ -z "$latest_new" ]; then
636                 die "No new revisions were found"
637         fi
638         
639         if [ -n "$rejoin" ]; then
640                 debug "Merging split branch into HEAD..."
641                 latest_old=$(cache_get latest_old)
642                 git merge -s ours \
643                         -m "$(rejoin_msg $dir $latest_old $latest_new)" \
644                         $latest_new >&2 || exit $?
645         fi
646         if [ -n "$branch" ]; then
647                 if rev_exists "refs/heads/$branch"; then
648                         if ! rev_is_descendant_of_branch $latest_new $branch; then
649                                 die "Branch '$branch' is not an ancestor of commit '$latest_new'."
650                         fi
651                         action='Updated'
652                 else
653                         action='Created'
654                 fi
655                 git update-ref -m 'subtree split' "refs/heads/$branch" $latest_new || exit $?
656                 say "$action branch '$branch'"
657         fi
658         echo $latest_new
659         exit 0
660 }
661
662 cmd_merge()
663 {
664         revs=$(git rev-parse $default --revs-only "$@") || exit $?
665         ensure_clean
666         
667         set -- $revs
668         if [ $# -ne 1 ]; then
669                 die "You must provide exactly one revision.  Got: '$revs'"
670         fi
671         rev="$1"
672         
673         if [ -n "$squash" ]; then
674                 first_split="$(find_latest_squash "$dir")"
675                 if [ -z "$first_split" ]; then
676                         die "Can't squash-merge: '$dir' was never added."
677                 fi
678                 set $first_split
679                 old=$1
680                 sub=$2
681                 if [ "$sub" = "$rev" ]; then
682                         say "Subtree is already at commit $rev."
683                         exit 0
684                 fi
685                 new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
686                 debug "New squash commit: $new"
687                 rev="$new"
688         fi
689
690         version=$(git version)
691         if [ "$version" \< "git version 1.7" ]; then
692                 if [ -n "$message" ]; then
693                         git merge -s subtree --message="$message" $rev
694                 else
695                         git merge -s subtree $rev
696                 fi
697         else
698                 if [ -n "$message" ]; then
699                         git merge -Xsubtree="$prefix" --message="$message" $rev
700                 else
701                         git merge -Xsubtree="$prefix" $rev
702                 fi
703         fi
704 }
705
706 cmd_pull()
707 {
708         if [ $# -ne 2 ]; then
709             die "You must provide <repository> <ref>"
710         fi
711         ensure_clean
712         ensure_valid_ref_format "$2"
713         git fetch "$@" || exit $?
714         revs=FETCH_HEAD
715         set -- $revs
716         cmd_merge "$@"
717 }
718
719 cmd_push()
720 {
721         if [ $# -ne 2 ]; then
722             die "You must provide <repository> <ref>"
723         fi
724         ensure_valid_ref_format "$2"
725         if [ -e "$dir" ]; then
726             repository=$1
727             refspec=$2
728             echo "git push using: " $repository $refspec
729             localrev=$(git subtree split --prefix="$prefix") || die
730             git push $repository $localrev:refs/heads/$refspec
731         else
732             die "'$dir' must already exist. Try 'git subtree add'."
733         fi
734 }
735
736 "cmd_$command" "$@"