Merge branch 'dt/initial-ref-xn-commit-doc'
[git] / contrib / subtree / git-subtree.sh
1 #!/bin/sh
2 #
3 # git-subtree.sh: split/join git repositories in subdirectories of this one
4 #
5 # Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
6 #
7 if [ $# -eq 0 ]; then
8     set -- -h
9 fi
10 OPTS_SPEC="\
11 git subtree add   --prefix=<prefix> <commit>
12 git subtree add   --prefix=<prefix> <repository> <ref>
13 git subtree merge --prefix=<prefix> <commit>
14 git subtree pull  --prefix=<prefix> <repository> <ref>
15 git subtree push  --prefix=<prefix> <repository> <ref>
16 git subtree split --prefix=<prefix> <commit...>
17 --
18 h,help        show the help
19 q             quiet
20 d             show debug messages
21 P,prefix=     the name of the subdir to split out
22 m,message=    use the given message as the commit message for the merge commit
23  options for 'split'
24 annotate=     add a prefix to commit message of new commits
25 b,branch=     create a new branch from the split subtree
26 ignore-joins  ignore prior --rejoin commits
27 onto=         try connecting new tree to an existing one
28 rejoin        merge the new branch back into HEAD
29  options for 'add', 'merge', and 'pull'
30 squash        merge subtree changes as a single commit
31 "
32 eval "$(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)"
33
34 PATH=$PATH:$(git --exec-path)
35 . git-sh-setup
36
37 require_work_tree
38
39 quiet=
40 branch=
41 debug=
42 command=
43 onto=
44 rejoin=
45 ignore_joins=
46 annotate=
47 squash=
48 message=
49 prefix=
50
51 debug()
52 {
53         if [ -n "$debug" ]; then
54                 printf "%s\n" "$*" >&2
55         fi
56 }
57
58 say()
59 {
60         if [ -z "$quiet" ]; then
61                 printf "%s\n" "$*" >&2
62         fi
63 }
64
65 progress()
66 {
67         if [ -z "$quiet" ]; then
68                 printf "%s\r" "$*" >&2
69         fi
70 }
71
72 assert()
73 {
74         if "$@"; then
75                 :
76         else
77                 die "assertion failed: " "$@"
78         fi
79 }
80
81
82 #echo "Options: $*"
83
84 while [ $# -gt 0 ]; do
85         opt="$1"
86         shift
87         case "$opt" in
88                 -q) quiet=1 ;;
89                 -d) debug=1 ;;
90                 --annotate) annotate="$1"; shift ;;
91                 --no-annotate) annotate= ;;
92                 -b) branch="$1"; shift ;;
93                 -P) prefix="${1%/}"; shift ;;
94                 -m) message="$1"; shift ;;
95                 --no-prefix) prefix= ;;
96                 --onto) onto="$1"; shift ;;
97                 --no-onto) onto= ;;
98                 --rejoin) rejoin=1 ;;
99                 --no-rejoin) rejoin= ;;
100                 --ignore-joins) ignore_joins=1 ;;
101                 --no-ignore-joins) ignore_joins= ;;
102                 --squash) squash=1 ;;
103                 --no-squash) squash= ;;
104                 --) break ;;
105                 *) die "Unexpected option: $opt" ;;
106         esac
107 done
108
109 command="$1"
110 shift
111 case "$command" in
112         add|merge|pull) default= ;;
113         split|push) default="--default HEAD" ;;
114         *) die "Unknown command '$command'" ;;
115 esac
116
117 if [ -z "$prefix" ]; then
118         die "You must provide the --prefix option."
119 fi
120
121 case "$command" in
122         add) [ -e "$prefix" ] && 
123                 die "prefix '$prefix' already exists." ;;
124         *)   [ -e "$prefix" ] || 
125                 die "'$prefix' does not exist; use 'git subtree add'" ;;
126 esac
127
128 dir="$(dirname "$prefix/.")"
129
130 if [ "$command" != "pull" -a "$command" != "add" -a "$command" != "push" ]; then
131         revs=$(git rev-parse $default --revs-only "$@") || exit $?
132         dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
133         if [ -n "$dirs" ]; then
134                 die "Error: Use --prefix instead of bare filenames."
135         fi
136 fi
137
138 debug "command: {$command}"
139 debug "quiet: {$quiet}"
140 debug "revs: {$revs}"
141 debug "dir: {$dir}"
142 debug "opts: {$*}"
143 debug
144
145 cache_setup()
146 {
147         cachedir="$GIT_DIR/subtree-cache/$$"
148         rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
149         mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
150         mkdir -p "$cachedir/notree" || die "Can't create new cachedir: $cachedir/notree"
151         debug "Using cachedir: $cachedir" >&2
152 }
153
154 cache_get()
155 {
156         for oldrev in $*; do
157                 if [ -r "$cachedir/$oldrev" ]; then
158                         read newrev <"$cachedir/$oldrev"
159                         echo $newrev
160                 fi
161         done
162 }
163
164 cache_miss()
165 {
166         for oldrev in $*; do
167                 if [ ! -r "$cachedir/$oldrev" ]; then
168                         echo $oldrev
169                 fi
170         done
171 }
172
173 check_parents()
174 {
175         missed=$(cache_miss $*)
176         for miss in $missed; do
177                 if [ ! -r "$cachedir/notree/$miss" ]; then
178                         debug "  incorrect order: $miss"
179                 fi
180         done
181 }
182
183 set_notree()
184 {
185         echo "1" > "$cachedir/notree/$1"
186 }
187
188 cache_set()
189 {
190         oldrev="$1"
191         newrev="$2"
192         if [ "$oldrev" != "latest_old" \
193              -a "$oldrev" != "latest_new" \
194              -a -e "$cachedir/$oldrev" ]; then
195                 die "cache for $oldrev already exists!"
196         fi
197         echo "$newrev" >"$cachedir/$oldrev"
198 }
199
200 rev_exists()
201 {
202         if git rev-parse "$1" >/dev/null 2>&1; then
203                 return 0
204         else
205                 return 1
206         fi
207 }
208
209 rev_is_descendant_of_branch()
210 {
211         newrev="$1"
212         branch="$2"
213         branch_hash=$(git rev-parse $branch)
214         match=$(git rev-list -1 $branch_hash ^$newrev)
215
216         if [ -z "$match" ]; then
217                 return 0
218         else
219                 return 1
220         fi
221 }
222
223 # if a commit doesn't have a parent, this might not work.  But we only want
224 # to remove the parent from the rev-list, and since it doesn't exist, it won't
225 # be there anyway, so do nothing in that case.
226 try_remove_previous()
227 {
228         if rev_exists "$1^"; then
229                 echo "^$1^"
230         fi
231 }
232
233 find_latest_squash()
234 {
235         debug "Looking for latest squash ($dir)..."
236         dir="$1"
237         sq=
238         main=
239         sub=
240         git log --grep="^git-subtree-dir: $dir/*\$" \
241                 --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
242         while read a b junk; do
243                 debug "$a $b $junk"
244                 debug "{{$sq/$main/$sub}}"
245                 case "$a" in
246                         START) sq="$b" ;;
247                         git-subtree-mainline:) main="$b" ;;
248                         git-subtree-split:)
249                                 sub="$(git rev-parse "$b^0")" ||
250                                     die "could not rev-parse split hash $b from commit $sq"
251                                 ;;
252                         END)
253                                 if [ -n "$sub" ]; then
254                                         if [ -n "$main" ]; then
255                                                 # a rejoin commit?
256                                                 # Pretend its sub was a squash.
257                                                 sq="$sub"
258                                         fi
259                                         debug "Squash found: $sq $sub"
260                                         echo "$sq" "$sub"
261                                         break
262                                 fi
263                                 sq=
264                                 main=
265                                 sub=
266                                 ;;
267                 esac
268         done
269 }
270
271 find_existing_splits()
272 {
273         debug "Looking for prior splits..."
274         dir="$1"
275         revs="$2"
276         main=
277         sub=
278         git log --grep="^git-subtree-dir: $dir/*\$" \
279                 --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
280         while read a b junk; do
281                 case "$a" in
282                         START) sq="$b" ;;
283                         git-subtree-mainline:) main="$b" ;;
284                         git-subtree-split:)
285                                 sub="$(git rev-parse "$b^0")" ||
286                                     die "could not rev-parse split hash $b from commit $sq"
287                                 ;;
288                         END)
289                                 debug "  Main is: '$main'"
290                                 if [ -z "$main" -a -n "$sub" ]; then
291                                         # squash commits refer to a subtree
292                                         debug "  Squash: $sq from $sub"
293                                         cache_set "$sq" "$sub"
294                                 fi
295                                 if [ -n "$main" -a -n "$sub" ]; then
296                                         debug "  Prior: $main -> $sub"
297                                         cache_set $main $sub
298                                         cache_set $sub $sub
299                                         try_remove_previous "$main"
300                                         try_remove_previous "$sub"
301                                 fi
302                                 main=
303                                 sub=
304                                 ;;
305                 esac
306         done
307 }
308
309 copy_commit()
310 {
311         # We're going to set some environment vars here, so
312         # do it in a subshell to get rid of them safely later
313         debug copy_commit "{$1}" "{$2}" "{$3}"
314         git log -1 --pretty=format:'%an%n%ae%n%aD%n%cn%n%ce%n%cD%n%B' "$1" |
315         (
316                 read GIT_AUTHOR_NAME
317                 read GIT_AUTHOR_EMAIL
318                 read GIT_AUTHOR_DATE
319                 read GIT_COMMITTER_NAME
320                 read GIT_COMMITTER_EMAIL
321                 read GIT_COMMITTER_DATE
322                 export  GIT_AUTHOR_NAME \
323                         GIT_AUTHOR_EMAIL \
324                         GIT_AUTHOR_DATE \
325                         GIT_COMMITTER_NAME \
326                         GIT_COMMITTER_EMAIL \
327                         GIT_COMMITTER_DATE
328                 (printf "%s" "$annotate"; cat ) |
329                 git commit-tree "$2" $3  # reads the rest of stdin
330         ) || die "Can't copy commit $1"
331 }
332
333 add_msg()
334 {
335         dir="$1"
336         latest_old="$2"
337         latest_new="$3"
338         if [ -n "$message" ]; then
339                 commit_message="$message"
340         else
341                 commit_message="Add '$dir/' from commit '$latest_new'"
342         fi
343         cat <<-EOF
344                 $commit_message
345                 
346                 git-subtree-dir: $dir
347                 git-subtree-mainline: $latest_old
348                 git-subtree-split: $latest_new
349         EOF
350 }
351
352 add_squashed_msg()
353 {
354         if [ -n "$message" ]; then
355                 echo "$message"
356         else
357                 echo "Merge commit '$1' as '$2'"
358         fi
359 }
360
361 rejoin_msg()
362 {
363         dir="$1"
364         latest_old="$2"
365         latest_new="$3"
366         if [ -n "$message" ]; then
367                 commit_message="$message"
368         else
369                 commit_message="Split '$dir/' into commit '$latest_new'"
370         fi
371         cat <<-EOF
372                 $commit_message
373                 
374                 git-subtree-dir: $dir
375                 git-subtree-mainline: $latest_old
376                 git-subtree-split: $latest_new
377         EOF
378 }
379
380 squash_msg()
381 {
382         dir="$1"
383         oldsub="$2"
384         newsub="$3"
385         newsub_short=$(git rev-parse --short "$newsub")
386         
387         if [ -n "$oldsub" ]; then
388                 oldsub_short=$(git rev-parse --short "$oldsub")
389                 echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
390                 echo
391                 git log --pretty=tformat:'%h %s' "$oldsub..$newsub"
392                 git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
393         else
394                 echo "Squashed '$dir/' content from commit $newsub_short"
395         fi
396         
397         echo
398         echo "git-subtree-dir: $dir"
399         echo "git-subtree-split: $newsub"
400 }
401
402 toptree_for_commit()
403 {
404         commit="$1"
405         git log -1 --pretty=format:'%T' "$commit" -- || exit $?
406 }
407
408 subtree_for_commit()
409 {
410         commit="$1"
411         dir="$2"
412         git ls-tree "$commit" -- "$dir" |
413         while read mode type tree name; do
414                 assert [ "$name" = "$dir" ]
415                 assert [ "$type" = "tree" -o "$type" = "commit" ]
416                 [ "$type" = "commit" ] && continue  # ignore submodules
417                 echo $tree
418                 break
419         done
420 }
421
422 tree_changed()
423 {
424         tree=$1
425         shift
426         if [ $# -ne 1 ]; then
427                 return 0   # weird parents, consider it changed
428         else
429                 ptree=$(toptree_for_commit $1)
430                 if [ "$ptree" != "$tree" ]; then
431                         return 0   # changed
432                 else
433                         return 1   # not changed
434                 fi
435         fi
436 }
437
438 new_squash_commit()
439 {
440         old="$1"
441         oldsub="$2"
442         newsub="$3"
443         tree=$(toptree_for_commit $newsub) || exit $?
444         if [ -n "$old" ]; then
445                 squash_msg "$dir" "$oldsub" "$newsub" | 
446                         git commit-tree "$tree" -p "$old" || exit $?
447         else
448                 squash_msg "$dir" "" "$newsub" |
449                         git commit-tree "$tree" || exit $?
450         fi
451 }
452
453 copy_or_skip()
454 {
455         rev="$1"
456         tree="$2"
457         newparents="$3"
458         assert [ -n "$tree" ]
459
460         identical=
461         nonidentical=
462         p=
463         gotparents=
464         for parent in $newparents; do
465                 ptree=$(toptree_for_commit $parent) || exit $?
466                 [ -z "$ptree" ] && continue
467                 if [ "$ptree" = "$tree" ]; then
468                         # an identical parent could be used in place of this rev.
469                         identical="$parent"
470                 else
471                         nonidentical="$parent"
472                 fi
473                 
474                 # sometimes both old parents map to the same newparent;
475                 # eliminate duplicates
476                 is_new=1
477                 for gp in $gotparents; do
478                         if [ "$gp" = "$parent" ]; then
479                                 is_new=
480                                 break
481                         fi
482                 done
483                 if [ -n "$is_new" ]; then
484                         gotparents="$gotparents $parent"
485                         p="$p -p $parent"
486                 fi
487         done
488
489         copycommit=
490         if [ -n "$identical" ] && [ -n "$nonidentical" ]; then
491                 extras=$(git rev-list --count $identical..$nonidentical)
492                 if [ "$extras" -ne 0 ]; then
493                         # we need to preserve history along the other branch
494                         copycommit=1
495                 fi
496         fi
497         if [ -n "$identical" ] && [ -z "$copycommit" ]; then
498                 echo $identical
499         else
500                 copy_commit $rev $tree "$p" || exit $?
501         fi
502 }
503
504 ensure_clean()
505 {
506         if ! git diff-index HEAD --exit-code --quiet 2>&1; then
507                 die "Working tree has modifications.  Cannot add."
508         fi
509         if ! git diff-index --cached HEAD --exit-code --quiet 2>&1; then
510                 die "Index has modifications.  Cannot add."
511         fi
512 }
513
514 ensure_valid_ref_format()
515 {
516         git check-ref-format "refs/heads/$1" ||
517             die "'$1' does not look like a ref"
518 }
519
520 cmd_add()
521 {
522         if [ -e "$dir" ]; then
523                 die "'$dir' already exists.  Cannot add."
524         fi
525
526         ensure_clean
527         
528         if [ $# -eq 1 ]; then
529             git rev-parse -q --verify "$1^{commit}" >/dev/null ||
530             die "'$1' does not refer to a commit"
531
532             "cmd_add_commit" "$@"
533         elif [ $# -eq 2 ]; then
534             # Technically we could accept a refspec here but we're
535             # just going to turn around and add FETCH_HEAD under the
536             # specified directory.  Allowing a refspec might be
537             # misleading because we won't do anything with any other
538             # branches fetched via the refspec.
539             ensure_valid_ref_format "$2"
540
541             "cmd_add_repository" "$@"
542         else
543             say "error: parameters were '$@'"
544             die "Provide either a commit or a repository and commit."
545         fi
546 }
547
548 cmd_add_repository()
549 {
550         echo "git fetch" "$@"
551         repository=$1
552         refspec=$2
553         git fetch "$@" || exit $?
554         revs=FETCH_HEAD
555         set -- $revs
556         cmd_add_commit "$@"
557 }
558
559 cmd_add_commit()
560 {
561         revs=$(git rev-parse $default --revs-only "$@") || exit $?
562         set -- $revs
563         rev="$1"
564         
565         debug "Adding $dir as '$rev'..."
566         git read-tree --prefix="$dir" $rev || exit $?
567         git checkout -- "$dir" || exit $?
568         tree=$(git write-tree) || exit $?
569         
570         headrev=$(git rev-parse HEAD) || exit $?
571         if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
572                 headp="-p $headrev"
573         else
574                 headp=
575         fi
576         
577         if [ -n "$squash" ]; then
578                 rev=$(new_squash_commit "" "" "$rev") || exit $?
579                 commit=$(add_squashed_msg "$rev" "$dir" |
580                          git commit-tree $tree $headp -p "$rev") || exit $?
581         else
582                 revp=$(peel_committish "$rev") &&
583                 commit=$(add_msg "$dir" "$headrev" "$rev" |
584                          git commit-tree $tree $headp -p "$revp") || exit $?
585         fi
586         git reset "$commit" || exit $?
587         
588         say "Added dir '$dir'"
589 }
590
591 cmd_split()
592 {
593         debug "Splitting $dir..."
594         cache_setup || exit $?
595         
596         if [ -n "$onto" ]; then
597                 debug "Reading history for --onto=$onto..."
598                 git rev-list $onto |
599                 while read rev; do
600                         # the 'onto' history is already just the subdir, so
601                         # any parent we find there can be used verbatim
602                         debug "  cache: $rev"
603                         cache_set $rev $rev
604                 done
605         fi
606         
607         if [ -n "$ignore_joins" ]; then
608                 unrevs=
609         else
610                 unrevs="$(find_existing_splits "$dir" "$revs")"
611         fi
612         
613         # We can't restrict rev-list to only $dir here, because some of our
614         # parents have the $dir contents the root, and those won't match.
615         # (and rev-list --follow doesn't seem to solve this)
616         grl='git rev-list --topo-order --reverse --parents $revs $unrevs'
617         revmax=$(eval "$grl" | wc -l)
618         revcount=0
619         createcount=0
620         eval "$grl" |
621         while read rev parents; do
622                 revcount=$(($revcount + 1))
623                 progress "$revcount/$revmax ($createcount)"
624                 debug "Processing commit: $rev"
625                 exists=$(cache_get $rev)
626                 if [ -n "$exists" ]; then
627                         debug "  prior: $exists"
628                         continue
629                 fi
630                 createcount=$(($createcount + 1))
631                 debug "  parents: $parents"
632                 newparents=$(cache_get $parents)
633                 debug "  newparents: $newparents"
634                 
635                 tree=$(subtree_for_commit $rev "$dir")
636                 debug "  tree is: $tree"
637
638                 check_parents $parents
639                 
640                 # ugly.  is there no better way to tell if this is a subtree
641                 # vs. a mainline commit?  Does it matter?
642                 if [ -z $tree ]; then
643                         set_notree $rev
644                         if [ -n "$newparents" ]; then
645                                 cache_set $rev $rev
646                         fi
647                         continue
648                 fi
649
650                 newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
651                 debug "  newrev is: $newrev"
652                 cache_set $rev $newrev
653                 cache_set latest_new $newrev
654                 cache_set latest_old $rev
655         done || exit $?
656         latest_new=$(cache_get latest_new)
657         if [ -z "$latest_new" ]; then
658                 die "No new revisions were found"
659         fi
660         
661         if [ -n "$rejoin" ]; then
662                 debug "Merging split branch into HEAD..."
663                 latest_old=$(cache_get latest_old)
664                 git merge -s ours \
665                         -m "$(rejoin_msg "$dir" $latest_old $latest_new)" \
666                         $latest_new >&2 || exit $?
667         fi
668         if [ -n "$branch" ]; then
669                 if rev_exists "refs/heads/$branch"; then
670                         if ! rev_is_descendant_of_branch $latest_new $branch; then
671                                 die "Branch '$branch' is not an ancestor of commit '$latest_new'."
672                         fi
673                         action='Updated'
674                 else
675                         action='Created'
676                 fi
677                 git update-ref -m 'subtree split' "refs/heads/$branch" $latest_new || exit $?
678                 say "$action branch '$branch'"
679         fi
680         echo $latest_new
681         exit 0
682 }
683
684 cmd_merge()
685 {
686         revs=$(git rev-parse $default --revs-only "$@") || exit $?
687         ensure_clean
688         
689         set -- $revs
690         if [ $# -ne 1 ]; then
691                 die "You must provide exactly one revision.  Got: '$revs'"
692         fi
693         rev="$1"
694         
695         if [ -n "$squash" ]; then
696                 first_split="$(find_latest_squash "$dir")"
697                 if [ -z "$first_split" ]; then
698                         die "Can't squash-merge: '$dir' was never added."
699                 fi
700                 set $first_split
701                 old=$1
702                 sub=$2
703                 if [ "$sub" = "$rev" ]; then
704                         say "Subtree is already at commit $rev."
705                         exit 0
706                 fi
707                 new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
708                 debug "New squash commit: $new"
709                 rev="$new"
710         fi
711
712         version=$(git version)
713         if [ "$version" \< "git version 1.7" ]; then
714                 if [ -n "$message" ]; then
715                         git merge -s subtree --message="$message" $rev
716                 else
717                         git merge -s subtree $rev
718                 fi
719         else
720                 if [ -n "$message" ]; then
721                         git merge -Xsubtree="$prefix" --message="$message" $rev
722                 else
723                         git merge -Xsubtree="$prefix" $rev
724                 fi
725         fi
726 }
727
728 cmd_pull()
729 {
730         if [ $# -ne 2 ]; then
731             die "You must provide <repository> <ref>"
732         fi
733         ensure_clean
734         ensure_valid_ref_format "$2"
735         git fetch "$@" || exit $?
736         revs=FETCH_HEAD
737         set -- $revs
738         cmd_merge "$@"
739 }
740
741 cmd_push()
742 {
743         if [ $# -ne 2 ]; then
744             die "You must provide <repository> <ref>"
745         fi
746         ensure_valid_ref_format "$2"
747         if [ -e "$dir" ]; then
748             repository=$1
749             refspec=$2
750             echo "git push using: " $repository $refspec
751             localrev=$(git subtree split --prefix="$prefix") || die
752             git push "$repository" $localrev:refs/heads/$refspec
753         else
754             die "'$dir' must already exist. Try 'git subtree add'."
755         fi
756 }
757
758 "cmd_$command" "$@"