subtree: use commits before rejoins for splits
[git] / contrib / subtree / git-subtree.sh
1 #!/bin/sh
2 #
3 # git-subtree.sh: split/join git repositories in subdirectories of this one
4 #
5 # Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
6 #
7 if test $# -eq 0
8 then
9         set -- -h
10 fi
11 OPTS_SPEC="\
12 git subtree add   --prefix=<prefix> <commit>
13 git subtree add   --prefix=<prefix> <repository> <ref>
14 git subtree merge --prefix=<prefix> <commit>
15 git subtree pull  --prefix=<prefix> <repository> <ref>
16 git subtree push  --prefix=<prefix> <repository> <ref>
17 git subtree split --prefix=<prefix> <commit...>
18 --
19 h,help        show the help
20 q             quiet
21 d             show debug messages
22 P,prefix=     the name of the subdir to split out
23 m,message=    use the given message as the commit message for the merge commit
24  options for 'split'
25 annotate=     add a prefix to commit message of new commits
26 b,branch=     create a new branch from the split subtree
27 ignore-joins  ignore prior --rejoin commits
28 onto=         try connecting new tree to an existing one
29 rejoin        merge the new branch back into HEAD
30  options for 'add', 'merge', and 'pull'
31 squash        merge subtree changes as a single commit
32 "
33 eval "$(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)"
34
35 PATH=$PATH:$(git --exec-path)
36 . git-sh-setup
37
38 require_work_tree
39
40 quiet=
41 branch=
42 debug=
43 command=
44 onto=
45 rejoin=
46 ignore_joins=
47 annotate=
48 squash=
49 message=
50 prefix=
51
52 debug () {
53         if test -n "$debug"
54         then
55                 printf "%s\n" "$*" >&2
56         fi
57 }
58
59 say () {
60         if test -z "$quiet"
61         then
62                 printf "%s\n" "$*" >&2
63         fi
64 }
65
66 progress () {
67         if test -z "$quiet"
68         then
69                 printf "%s\r" "$*" >&2
70         fi
71 }
72
73 assert () {
74         if ! "$@"
75         then
76                 die "assertion failed: " "$@"
77         fi
78 }
79
80
81 while test $# -gt 0
82 do
83         opt="$1"
84         shift
85
86         case "$opt" in
87         -q)
88                 quiet=1
89                 ;;
90         -d)
91                 debug=1
92                 ;;
93         --annotate)
94                 annotate="$1"
95                 shift
96                 ;;
97         --no-annotate)
98                 annotate=
99                 ;;
100         -b)
101                 branch="$1"
102                 shift
103                 ;;
104         -P)
105                 prefix="${1%/}"
106                 shift
107                 ;;
108         -m)
109                 message="$1"
110                 shift
111                 ;;
112         --no-prefix)
113                 prefix=
114                 ;;
115         --onto)
116                 onto="$1"
117                 shift
118                 ;;
119         --no-onto)
120                 onto=
121                 ;;
122         --rejoin)
123                 rejoin=1
124                 ;;
125         --no-rejoin)
126                 rejoin=
127                 ;;
128         --ignore-joins)
129                 ignore_joins=1
130                 ;;
131         --no-ignore-joins)
132                 ignore_joins=
133                 ;;
134         --squash)
135                 squash=1
136                 ;;
137         --no-squash)
138                 squash=
139                 ;;
140         --)
141                 break
142                 ;;
143         *)
144                 die "Unexpected option: $opt"
145                 ;;
146         esac
147 done
148
149 command="$1"
150 shift
151
152 case "$command" in
153 add|merge|pull)
154         default=
155         ;;
156 split|push)
157         default="--default HEAD"
158         ;;
159 *)
160         die "Unknown command '$command'"
161         ;;
162 esac
163
164 if test -z "$prefix"
165 then
166         die "You must provide the --prefix option."
167 fi
168
169 case "$command" in
170 add)
171         test -e "$prefix" &&
172                 die "prefix '$prefix' already exists."
173         ;;
174 *)
175         test -e "$prefix" ||
176                 die "'$prefix' does not exist; use 'git subtree add'"
177         ;;
178 esac
179
180 dir="$(dirname "$prefix/.")"
181
182 if test "$command" != "pull" &&
183                 test "$command" != "add" &&
184                 test "$command" != "push"
185 then
186         revs=$(git rev-parse $default --revs-only "$@") || exit $?
187         dirs=$(git rev-parse --no-revs --no-flags "$@") || exit $?
188         if test -n "$dirs"
189         then
190                 die "Error: Use --prefix instead of bare filenames."
191         fi
192 fi
193
194 debug "command: {$command}"
195 debug "quiet: {$quiet}"
196 debug "revs: {$revs}"
197 debug "dir: {$dir}"
198 debug "opts: {$*}"
199 debug
200
201 cache_setup () {
202         cachedir="$GIT_DIR/subtree-cache/$$"
203         rm -rf "$cachedir" ||
204                 die "Can't delete old cachedir: $cachedir"
205         mkdir -p "$cachedir" ||
206                 die "Can't create new cachedir: $cachedir"
207         mkdir -p "$cachedir/notree" ||
208                 die "Can't create new cachedir: $cachedir/notree"
209         debug "Using cachedir: $cachedir" >&2
210 }
211
212 cache_get () {
213         for oldrev in "$@"
214         do
215                 if test -r "$cachedir/$oldrev"
216                 then
217                         read newrev <"$cachedir/$oldrev"
218                         echo $newrev
219                 fi
220         done
221 }
222
223 cache_miss () {
224         for oldrev in "$@"
225         do
226                 if ! test -r "$cachedir/$oldrev"
227                 then
228                         echo $oldrev
229                 fi
230         done
231 }
232
233 check_parents () {
234         missed=$(cache_miss "$1")
235         local indent=$(($2 + 1))
236         for miss in $missed
237         do
238                 if ! test -r "$cachedir/notree/$miss"
239                 then
240                         debug "  incorrect order: $miss"
241                         process_split_commit "$miss" "" "$indent"
242                 fi
243         done
244 }
245
246 set_notree () {
247         echo "1" > "$cachedir/notree/$1"
248 }
249
250 cache_set () {
251         oldrev="$1"
252         newrev="$2"
253         if test "$oldrev" != "latest_old" &&
254                 test "$oldrev" != "latest_new" &&
255                 test -e "$cachedir/$oldrev"
256         then
257                 die "cache for $oldrev already exists!"
258         fi
259         echo "$newrev" >"$cachedir/$oldrev"
260 }
261
262 rev_exists () {
263         if git rev-parse "$1" >/dev/null 2>&1
264         then
265                 return 0
266         else
267                 return 1
268         fi
269 }
270
271 rev_is_descendant_of_branch () {
272         newrev="$1"
273         branch="$2"
274         branch_hash=$(git rev-parse "$branch")
275         match=$(git rev-list -1 "$branch_hash" "^$newrev")
276
277         if test -z "$match"
278         then
279                 return 0
280         else
281                 return 1
282         fi
283 }
284
285 # if a commit doesn't have a parent, this might not work.  But we only want
286 # to remove the parent from the rev-list, and since it doesn't exist, it won't
287 # be there anyway, so do nothing in that case.
288 try_remove_previous () {
289         if rev_exists "$1^"
290         then
291                 echo "^$1^"
292         fi
293 }
294
295 find_latest_squash () {
296         debug "Looking for latest squash ($dir)..."
297         dir="$1"
298         sq=
299         main=
300         sub=
301         git log --grep="^git-subtree-dir: $dir/*\$" \
302                 --no-show-signature --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
303         while read a b junk
304         do
305                 debug "$a $b $junk"
306                 debug "{{$sq/$main/$sub}}"
307                 case "$a" in
308                 START)
309                         sq="$b"
310                         ;;
311                 git-subtree-mainline:)
312                         main="$b"
313                         ;;
314                 git-subtree-split:)
315                         sub="$(git rev-parse "$b^0")" ||
316                         die "could not rev-parse split hash $b from commit $sq"
317                         ;;
318                 END)
319                         if test -n "$sub"
320                         then
321                                 if test -n "$main"
322                                 then
323                                         # a rejoin commit?
324                                         # Pretend its sub was a squash.
325                                         sq="$sub"
326                                 fi
327                                 debug "Squash found: $sq $sub"
328                                 echo "$sq" "$sub"
329                                 break
330                         fi
331                         sq=
332                         main=
333                         sub=
334                         ;;
335                 esac
336         done
337 }
338
339 find_existing_splits () {
340         debug "Looking for prior splits..."
341         dir="$1"
342         revs="$2"
343         main=
344         sub=
345         local grep_format="^git-subtree-dir: $dir/*\$"
346         if test -n "$ignore_joins"
347         then
348                 grep_format="^Add '$dir/' from commit '"
349         fi
350         git log --grep="$grep_format" \
351                 --no-show-signature --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
352         while read a b junk
353         do
354                 case "$a" in
355                 START)
356                         sq="$b"
357                         ;;
358                 git-subtree-mainline:)
359                         main="$b"
360                         ;;
361                 git-subtree-split:)
362                         sub="$(git rev-parse "$b^0")" ||
363                         die "could not rev-parse split hash $b from commit $sq"
364                         ;;
365                 END)
366                         debug "  Main is: '$main'"
367                         if test -z "$main" -a -n "$sub"
368                         then
369                                 # squash commits refer to a subtree
370                                 debug "  Squash: $sq from $sub"
371                                 cache_set "$sq" "$sub"
372                         fi
373                         if test -n "$main" -a -n "$sub"
374                         then
375                                 debug "  Prior: $main -> $sub"
376                                 cache_set $main $sub
377                                 cache_set $sub $sub
378                                 try_remove_previous "$main"
379                                 try_remove_previous "$sub"
380                         fi
381                         main=
382                         sub=
383                         ;;
384                 esac
385         done
386 }
387
388 copy_commit () {
389         # We're going to set some environment vars here, so
390         # do it in a subshell to get rid of them safely later
391         debug copy_commit "{$1}" "{$2}" "{$3}"
392         git log -1 --no-show-signature --pretty=format:'%an%n%ae%n%aD%n%cn%n%ce%n%cD%n%B' "$1" |
393         (
394                 read GIT_AUTHOR_NAME
395                 read GIT_AUTHOR_EMAIL
396                 read GIT_AUTHOR_DATE
397                 read GIT_COMMITTER_NAME
398                 read GIT_COMMITTER_EMAIL
399                 read GIT_COMMITTER_DATE
400                 export  GIT_AUTHOR_NAME \
401                         GIT_AUTHOR_EMAIL \
402                         GIT_AUTHOR_DATE \
403                         GIT_COMMITTER_NAME \
404                         GIT_COMMITTER_EMAIL \
405                         GIT_COMMITTER_DATE
406                 (
407                         printf "%s" "$annotate"
408                         cat
409                 ) |
410                 git commit-tree "$2" $3  # reads the rest of stdin
411         ) || die "Can't copy commit $1"
412 }
413
414 add_msg () {
415         dir="$1"
416         latest_old="$2"
417         latest_new="$3"
418         if test -n "$message"
419         then
420                 commit_message="$message"
421         else
422                 commit_message="Add '$dir/' from commit '$latest_new'"
423         fi
424         cat <<-EOF
425                 $commit_message
426
427                 git-subtree-dir: $dir
428                 git-subtree-mainline: $latest_old
429                 git-subtree-split: $latest_new
430         EOF
431 }
432
433 add_squashed_msg () {
434         if test -n "$message"
435         then
436                 echo "$message"
437         else
438                 echo "Merge commit '$1' as '$2'"
439         fi
440 }
441
442 rejoin_msg () {
443         dir="$1"
444         latest_old="$2"
445         latest_new="$3"
446         if test -n "$message"
447         then
448                 commit_message="$message"
449         else
450                 commit_message="Split '$dir/' into commit '$latest_new'"
451         fi
452         cat <<-EOF
453                 $commit_message
454
455                 git-subtree-dir: $dir
456                 git-subtree-mainline: $latest_old
457                 git-subtree-split: $latest_new
458         EOF
459 }
460
461 squash_msg () {
462         dir="$1"
463         oldsub="$2"
464         newsub="$3"
465         newsub_short=$(git rev-parse --short "$newsub")
466
467         if test -n "$oldsub"
468         then
469                 oldsub_short=$(git rev-parse --short "$oldsub")
470                 echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
471                 echo
472                 git log --no-show-signature --pretty=tformat:'%h %s' "$oldsub..$newsub"
473                 git log --no-show-signature --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
474         else
475                 echo "Squashed '$dir/' content from commit $newsub_short"
476         fi
477
478         echo
479         echo "git-subtree-dir: $dir"
480         echo "git-subtree-split: $newsub"
481 }
482
483 toptree_for_commit () {
484         commit="$1"
485         git rev-parse --verify "$commit^{tree}" || exit $?
486 }
487
488 subtree_for_commit () {
489         commit="$1"
490         dir="$2"
491         git ls-tree "$commit" -- "$dir" |
492         while read mode type tree name
493         do
494                 assert test "$name" = "$dir"
495                 assert test "$type" = "tree" -o "$type" = "commit"
496                 test "$type" = "commit" && continue  # ignore submodules
497                 echo $tree
498                 break
499         done
500 }
501
502 tree_changed () {
503         tree=$1
504         shift
505         if test $# -ne 1
506         then
507                 return 0   # weird parents, consider it changed
508         else
509                 ptree=$(toptree_for_commit $1)
510                 if test "$ptree" != "$tree"
511                 then
512                         return 0   # changed
513                 else
514                         return 1   # not changed
515                 fi
516         fi
517 }
518
519 new_squash_commit () {
520         old="$1"
521         oldsub="$2"
522         newsub="$3"
523         tree=$(toptree_for_commit $newsub) || exit $?
524         if test -n "$old"
525         then
526                 squash_msg "$dir" "$oldsub" "$newsub" |
527                 git commit-tree "$tree" -p "$old" || exit $?
528         else
529                 squash_msg "$dir" "" "$newsub" |
530                 git commit-tree "$tree" || exit $?
531         fi
532 }
533
534 copy_or_skip () {
535         rev="$1"
536         tree="$2"
537         newparents="$3"
538         assert test -n "$tree"
539
540         identical=
541         nonidentical=
542         p=
543         gotparents=
544         for parent in $newparents
545         do
546                 ptree=$(toptree_for_commit $parent) || exit $?
547                 test -z "$ptree" && continue
548                 if test "$ptree" = "$tree"
549                 then
550                         # an identical parent could be used in place of this rev.
551                         identical="$parent"
552                 else
553                         nonidentical="$parent"
554                 fi
555
556                 # sometimes both old parents map to the same newparent;
557                 # eliminate duplicates
558                 is_new=1
559                 for gp in $gotparents
560                 do
561                         if test "$gp" = "$parent"
562                         then
563                                 is_new=
564                                 break
565                         fi
566                 done
567                 if test -n "$is_new"
568                 then
569                         gotparents="$gotparents $parent"
570                         p="$p -p $parent"
571                 fi
572         done
573
574         copycommit=
575         if test -n "$identical" && test -n "$nonidentical"
576         then
577                 extras=$(git rev-list --count $identical..$nonidentical)
578                 if test "$extras" -ne 0
579                 then
580                         # we need to preserve history along the other branch
581                         copycommit=1
582                 fi
583         fi
584         if test -n "$identical" && test -z "$copycommit"
585         then
586                 echo $identical
587         else
588                 copy_commit "$rev" "$tree" "$p" || exit $?
589         fi
590 }
591
592 ensure_clean () {
593         if ! git diff-index HEAD --exit-code --quiet 2>&1
594         then
595                 die "Working tree has modifications.  Cannot add."
596         fi
597         if ! git diff-index --cached HEAD --exit-code --quiet 2>&1
598         then
599                 die "Index has modifications.  Cannot add."
600         fi
601 }
602
603 ensure_valid_ref_format () {
604         git check-ref-format "refs/heads/$1" ||
605                 die "'$1' does not look like a ref"
606 }
607
608 process_split_commit () {
609         local rev="$1"
610         local parents="$2"
611         local indent=$3
612
613         if test $indent -eq 0
614         then
615                 revcount=$(($revcount + 1))
616         else
617                 # processing commit without normal parent information;
618                 # fetch from repo
619                 parents=$(git show -s --pretty=%P "$rev")
620                 extracount=$(($extracount + 1))
621         fi
622
623         progress "$revcount/$revmax ($createcount) [$extracount]"
624
625         debug "Processing commit: $rev"
626         exists=$(cache_get "$rev")
627         if test -n "$exists"
628         then
629                 debug "  prior: $exists"
630                 return
631         fi
632         createcount=$(($createcount + 1))
633         debug "  parents: $parents"
634         check_parents "$parents" "$indent"
635         newparents=$(cache_get $parents)
636         debug "  newparents: $newparents"
637
638         tree=$(subtree_for_commit "$rev" "$dir")
639         debug "  tree is: $tree"
640
641         # ugly.  is there no better way to tell if this is a subtree
642         # vs. a mainline commit?  Does it matter?
643         if test -z "$tree"
644         then
645                 set_notree "$rev"
646                 if test -n "$newparents"
647                 then
648                         cache_set "$rev" "$rev"
649                 fi
650                 return
651         fi
652
653         newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
654         debug "  newrev is: $newrev"
655         cache_set "$rev" "$newrev"
656         cache_set latest_new "$newrev"
657         cache_set latest_old "$rev"
658 }
659
660 cmd_add () {
661         if test -e "$dir"
662         then
663                 die "'$dir' already exists.  Cannot add."
664         fi
665
666         ensure_clean
667
668         if test $# -eq 1
669         then
670                 git rev-parse -q --verify "$1^{commit}" >/dev/null ||
671                         die "'$1' does not refer to a commit"
672
673                 cmd_add_commit "$@"
674
675         elif test $# -eq 2
676         then
677                 # Technically we could accept a refspec here but we're
678                 # just going to turn around and add FETCH_HEAD under the
679                 # specified directory.  Allowing a refspec might be
680                 # misleading because we won't do anything with any other
681                 # branches fetched via the refspec.
682                 ensure_valid_ref_format "$2"
683
684                 cmd_add_repository "$@"
685         else
686                 say "error: parameters were '$@'"
687                 die "Provide either a commit or a repository and commit."
688         fi
689 }
690
691 cmd_add_repository () {
692         echo "git fetch" "$@"
693         repository=$1
694         refspec=$2
695         git fetch "$@" || exit $?
696         revs=FETCH_HEAD
697         set -- $revs
698         cmd_add_commit "$@"
699 }
700
701 cmd_add_commit () {
702         revs=$(git rev-parse $default --revs-only "$@") || exit $?
703         set -- $revs
704         rev="$1"
705
706         debug "Adding $dir as '$rev'..."
707         git read-tree --prefix="$dir" $rev || exit $?
708         git checkout -- "$dir" || exit $?
709         tree=$(git write-tree) || exit $?
710
711         headrev=$(git rev-parse HEAD) || exit $?
712         if test -n "$headrev" && test "$headrev" != "$rev"
713         then
714                 headp="-p $headrev"
715         else
716                 headp=
717         fi
718
719         if test -n "$squash"
720         then
721                 rev=$(new_squash_commit "" "" "$rev") || exit $?
722                 commit=$(add_squashed_msg "$rev" "$dir" |
723                         git commit-tree "$tree" $headp -p "$rev") || exit $?
724         else
725                 revp=$(peel_committish "$rev") &&
726                 commit=$(add_msg "$dir" $headrev "$rev" |
727                         git commit-tree "$tree" $headp -p "$revp") || exit $?
728         fi
729         git reset "$commit" || exit $?
730
731         say "Added dir '$dir'"
732 }
733
734 cmd_split () {
735         debug "Splitting $dir..."
736         cache_setup || exit $?
737
738         if test -n "$onto"
739         then
740                 debug "Reading history for --onto=$onto..."
741                 git rev-list $onto |
742                 while read rev
743                 do
744                         # the 'onto' history is already just the subdir, so
745                         # any parent we find there can be used verbatim
746                         debug "  cache: $rev"
747                         cache_set "$rev" "$rev"
748                 done
749         fi
750
751         unrevs="$(find_existing_splits "$dir" "$revs")"
752
753         # We can't restrict rev-list to only $dir here, because some of our
754         # parents have the $dir contents the root, and those won't match.
755         # (and rev-list --follow doesn't seem to solve this)
756         grl='git rev-list --topo-order --reverse --parents $revs $unrevs'
757         revmax=$(eval "$grl" | wc -l)
758         revcount=0
759         createcount=0
760         extracount=0
761         eval "$grl" |
762         while read rev parents
763         do
764                 process_split_commit "$rev" "$parents" 0
765         done || exit $?
766
767         latest_new=$(cache_get latest_new)
768         if test -z "$latest_new"
769         then
770                 die "No new revisions were found"
771         fi
772
773         if test -n "$rejoin"
774         then
775                 debug "Merging split branch into HEAD..."
776                 latest_old=$(cache_get latest_old)
777                 git merge -s ours \
778                         --allow-unrelated-histories \
779                         -m "$(rejoin_msg "$dir" "$latest_old" "$latest_new")" \
780                         "$latest_new" >&2 || exit $?
781         fi
782         if test -n "$branch"
783         then
784                 if rev_exists "refs/heads/$branch"
785                 then
786                         if ! rev_is_descendant_of_branch "$latest_new" "$branch"
787                         then
788                                 die "Branch '$branch' is not an ancestor of commit '$latest_new'."
789                         fi
790                         action='Updated'
791                 else
792                         action='Created'
793                 fi
794                 git update-ref -m 'subtree split' \
795                         "refs/heads/$branch" "$latest_new" || exit $?
796                 say "$action branch '$branch'"
797         fi
798         echo "$latest_new"
799         exit 0
800 }
801
802 cmd_merge () {
803         revs=$(git rev-parse $default --revs-only "$@") || exit $?
804         ensure_clean
805
806         set -- $revs
807         if test $# -ne 1
808         then
809                 die "You must provide exactly one revision.  Got: '$revs'"
810         fi
811         rev="$1"
812
813         if test -n "$squash"
814         then
815                 first_split="$(find_latest_squash "$dir")"
816                 if test -z "$first_split"
817                 then
818                         die "Can't squash-merge: '$dir' was never added."
819                 fi
820                 set $first_split
821                 old=$1
822                 sub=$2
823                 if test "$sub" = "$rev"
824                 then
825                         say "Subtree is already at commit $rev."
826                         exit 0
827                 fi
828                 new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
829                 debug "New squash commit: $new"
830                 rev="$new"
831         fi
832
833         version=$(git version)
834         if test "$version" \< "git version 1.7"
835         then
836                 if test -n "$message"
837                 then
838                         git merge -s subtree --message="$message" "$rev"
839                 else
840                         git merge -s subtree "$rev"
841                 fi
842         else
843                 if test -n "$message"
844                 then
845                         git merge -Xsubtree="$prefix" \
846                                 --message="$message" "$rev"
847                 else
848                         git merge -Xsubtree="$prefix" $rev
849                 fi
850         fi
851 }
852
853 cmd_pull () {
854         if test $# -ne 2
855         then
856                 die "You must provide <repository> <ref>"
857         fi
858         ensure_clean
859         ensure_valid_ref_format "$2"
860         git fetch "$@" || exit $?
861         revs=FETCH_HEAD
862         set -- $revs
863         cmd_merge "$@"
864 }
865
866 cmd_push () {
867         if test $# -ne 2
868         then
869                 die "You must provide <repository> <ref>"
870         fi
871         ensure_valid_ref_format "$2"
872         if test -e "$dir"
873         then
874                 repository=$1
875                 refspec=$2
876                 echo "git push using: " "$repository" "$refspec"
877                 localrev=$(git subtree split --prefix="$prefix") || die
878                 git push "$repository" "$localrev":"refs/heads/$refspec"
879         else
880                 die "'$dir' must already exist. Try 'git subtree add'."
881         fi
882 }
883
884 "cmd_$command" "$@"