contrib/subtree: better error handling for 'subtree add'
[git] / contrib / subtree / git-subtree.sh
1 #!/bin/bash
2 #
3 # git-subtree.sh: split/join git repositories in subdirectories of this one
4 #
5 # Copyright (C) 2009 Avery Pennarun <apenwarr@gmail.com>
6 #
7 if [ $# -eq 0 ]; then
8     set -- -h
9 fi
10 OPTS_SPEC="\
11 git subtree add   --prefix=<prefix> <commit>
12 git subtree merge --prefix=<prefix> <commit>
13 git subtree pull  --prefix=<prefix> <repository> <refspec...>
14 git subtree push  --prefix=<prefix> <repository> <refspec...>
15 git subtree split --prefix=<prefix> <commit...>
16 --
17 h,help        show the help
18 q             quiet
19 d             show debug messages
20 P,prefix=     the name of the subdir to split out
21 m,message=    use the given message as the commit message for the merge commit
22  options for 'split'
23 annotate=     add a prefix to commit message of new commits
24 b,branch=     create a new branch from the split subtree
25 ignore-joins  ignore prior --rejoin commits
26 onto=         try connecting new tree to an existing one
27 rejoin        merge the new branch back into HEAD
28  options for 'add', 'merge', 'pull' and 'push'
29 squash        merge subtree changes as a single commit
30 "
31 eval "$(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?)"
32
33 PATH=$PATH:$(git --exec-path)
34 . git-sh-setup
35
36 require_work_tree
37
38 quiet=
39 branch=
40 debug=
41 command=
42 onto=
43 rejoin=
44 ignore_joins=
45 annotate=
46 squash=
47 message=
48
49 debug()
50 {
51         if [ -n "$debug" ]; then
52                 echo "$@" >&2
53         fi
54 }
55
56 say()
57 {
58         if [ -z "$quiet" ]; then
59                 echo "$@" >&2
60         fi
61 }
62
63 assert()
64 {
65         if "$@"; then
66                 :
67         else
68                 die "assertion failed: " "$@"
69         fi
70 }
71
72
73 #echo "Options: $*"
74
75 while [ $# -gt 0 ]; do
76         opt="$1"
77         shift
78         case "$opt" in
79                 -q) quiet=1 ;;
80                 -d) debug=1 ;;
81                 --annotate) annotate="$1"; shift ;;
82                 --no-annotate) annotate= ;;
83                 -b) branch="$1"; shift ;;
84                 -P) prefix="$1"; shift ;;
85                 -m) message="$1"; shift ;;
86                 --no-prefix) prefix= ;;
87                 --onto) onto="$1"; shift ;;
88                 --no-onto) onto= ;;
89                 --rejoin) rejoin=1 ;;
90                 --no-rejoin) rejoin= ;;
91                 --ignore-joins) ignore_joins=1 ;;
92                 --no-ignore-joins) ignore_joins= ;;
93                 --squash) squash=1 ;;
94                 --no-squash) squash= ;;
95                 --) break ;;
96                 *) die "Unexpected option: $opt" ;;
97         esac
98 done
99
100 command="$1"
101 shift
102 case "$command" in
103         add|merge|pull) default= ;;
104         split|push) default="--default HEAD" ;;
105         *) die "Unknown command '$command'" ;;
106 esac
107
108 if [ -z "$prefix" ]; then
109         die "You must provide the --prefix option."
110 fi
111
112 case "$command" in
113         add) [ -e "$prefix" ] && 
114                 die "prefix '$prefix' already exists." ;;
115         *)   [ -e "$prefix" ] || 
116                 die "'$prefix' does not exist; use 'git subtree add'" ;;
117 esac
118
119 dir="$(dirname "$prefix/.")"
120
121 if [ "$command" != "pull" -a "$command" != "add" -a "$command" != "push" ]; then
122         revs=$(git rev-parse $default --revs-only "$@") || exit $?
123         dirs="$(git rev-parse --no-revs --no-flags "$@")" || exit $?
124         if [ -n "$dirs" ]; then
125                 die "Error: Use --prefix instead of bare filenames."
126         fi
127 fi
128
129 debug "command: {$command}"
130 debug "quiet: {$quiet}"
131 debug "revs: {$revs}"
132 debug "dir: {$dir}"
133 debug "opts: {$*}"
134 debug
135
136 cache_setup()
137 {
138         cachedir="$GIT_DIR/subtree-cache/$$"
139         rm -rf "$cachedir" || die "Can't delete old cachedir: $cachedir"
140         mkdir -p "$cachedir" || die "Can't create new cachedir: $cachedir"
141         mkdir -p "$cachedir/notree" || die "Can't create new cachedir: $cachedir/notree"
142         debug "Using cachedir: $cachedir" >&2
143 }
144
145 cache_get()
146 {
147         for oldrev in $*; do
148                 if [ -r "$cachedir/$oldrev" ]; then
149                         read newrev <"$cachedir/$oldrev"
150                         echo $newrev
151                 fi
152         done
153 }
154
155 cache_miss()
156 {
157         for oldrev in $*; do
158                 if [ ! -r "$cachedir/$oldrev" ]; then
159                         echo $oldrev
160                 fi
161         done
162 }
163
164 check_parents()
165 {
166         missed=$(cache_miss $*)
167         for miss in $missed; do
168                 if [ ! -r "$cachedir/notree/$miss" ]; then
169                         debug "  incorrect order: $miss"
170                 fi
171         done
172 }
173
174 set_notree()
175 {
176         echo "1" > "$cachedir/notree/$1"
177 }
178
179 cache_set()
180 {
181         oldrev="$1"
182         newrev="$2"
183         if [ "$oldrev" != "latest_old" \
184              -a "$oldrev" != "latest_new" \
185              -a -e "$cachedir/$oldrev" ]; then
186                 die "cache for $oldrev already exists!"
187         fi
188         echo "$newrev" >"$cachedir/$oldrev"
189 }
190
191 rev_exists()
192 {
193         if git rev-parse "$1" >/dev/null 2>&1; then
194                 return 0
195         else
196                 return 1
197         fi
198 }
199
200 rev_is_descendant_of_branch()
201 {
202         newrev="$1"
203         branch="$2"
204         branch_hash=$(git rev-parse $branch)
205         match=$(git rev-list -1 $branch_hash ^$newrev)
206
207         if [ -z "$match" ]; then
208                 return 0
209         else
210                 return 1
211         fi
212 }
213
214 # if a commit doesn't have a parent, this might not work.  But we only want
215 # to remove the parent from the rev-list, and since it doesn't exist, it won't
216 # be there anyway, so do nothing in that case.
217 try_remove_previous()
218 {
219         if rev_exists "$1^"; then
220                 echo "^$1^"
221         fi
222 }
223
224 find_latest_squash()
225 {
226         debug "Looking for latest squash ($dir)..."
227         dir="$1"
228         sq=
229         main=
230         sub=
231         git log --grep="^git-subtree-dir: $dir/*\$" \
232                 --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD |
233         while read a b junk; do
234                 debug "$a $b $junk"
235                 debug "{{$sq/$main/$sub}}"
236                 case "$a" in
237                         START) sq="$b" ;;
238                         git-subtree-mainline:) main="$b" ;;
239                         git-subtree-split:) sub="$b" ;;
240                         END)
241                                 if [ -n "$sub" ]; then
242                                         if [ -n "$main" ]; then
243                                                 # a rejoin commit?
244                                                 # Pretend its sub was a squash.
245                                                 sq="$sub"
246                                         fi
247                                         debug "Squash found: $sq $sub"
248                                         echo "$sq" "$sub"
249                                         break
250                                 fi
251                                 sq=
252                                 main=
253                                 sub=
254                                 ;;
255                 esac
256         done
257 }
258
259 find_existing_splits()
260 {
261         debug "Looking for prior splits..."
262         dir="$1"
263         revs="$2"
264         main=
265         sub=
266         git log --grep="^git-subtree-dir: $dir/*\$" \
267                 --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs |
268         while read a b junk; do
269                 case "$a" in
270                         START) sq="$b" ;;
271                         git-subtree-mainline:) main="$b" ;;
272                         git-subtree-split:) sub="$b" ;;
273                         END)
274                                 debug "  Main is: '$main'"
275                                 if [ -z "$main" -a -n "$sub" ]; then
276                                         # squash commits refer to a subtree
277                                         debug "  Squash: $sq from $sub"
278                                         cache_set "$sq" "$sub"
279                                 fi
280                                 if [ -n "$main" -a -n "$sub" ]; then
281                                         debug "  Prior: $main -> $sub"
282                                         cache_set $main $sub
283                                         cache_set $sub $sub
284                                         try_remove_previous "$main"
285                                         try_remove_previous "$sub"
286                                 fi
287                                 main=
288                                 sub=
289                                 ;;
290                 esac
291         done
292 }
293
294 copy_commit()
295 {
296         # We're going to set some environment vars here, so
297         # do it in a subshell to get rid of them safely later
298         debug copy_commit "{$1}" "{$2}" "{$3}"
299         git log -1 --pretty=format:'%an%n%ae%n%ad%n%cn%n%ce%n%cd%n%B' "$1" |
300         (
301                 read GIT_AUTHOR_NAME
302                 read GIT_AUTHOR_EMAIL
303                 read GIT_AUTHOR_DATE
304                 read GIT_COMMITTER_NAME
305                 read GIT_COMMITTER_EMAIL
306                 read GIT_COMMITTER_DATE
307                 export  GIT_AUTHOR_NAME \
308                         GIT_AUTHOR_EMAIL \
309                         GIT_AUTHOR_DATE \
310                         GIT_COMMITTER_NAME \
311                         GIT_COMMITTER_EMAIL \
312                         GIT_COMMITTER_DATE
313                 (echo -n "$annotate"; cat ) |
314                 git commit-tree "$2" $3  # reads the rest of stdin
315         ) || die "Can't copy commit $1"
316 }
317
318 add_msg()
319 {
320         dir="$1"
321         latest_old="$2"
322         latest_new="$3"
323         if [ -n "$message" ]; then
324                 commit_message="$message"
325         else
326                 commit_message="Add '$dir/' from commit '$latest_new'"
327         fi
328         cat <<-EOF
329                 $commit_message
330                 
331                 git-subtree-dir: $dir
332                 git-subtree-mainline: $latest_old
333                 git-subtree-split: $latest_new
334         EOF
335 }
336
337 add_squashed_msg()
338 {
339         if [ -n "$message" ]; then
340                 echo "$message"
341         else
342                 echo "Merge commit '$1' as '$2'"
343         fi
344 }
345
346 rejoin_msg()
347 {
348         dir="$1"
349         latest_old="$2"
350         latest_new="$3"
351         if [ -n "$message" ]; then
352                 commit_message="$message"
353         else
354                 commit_message="Split '$dir/' into commit '$latest_new'"
355         fi
356         cat <<-EOF
357                 $commit_message
358                 
359                 git-subtree-dir: $dir
360                 git-subtree-mainline: $latest_old
361                 git-subtree-split: $latest_new
362         EOF
363 }
364
365 squash_msg()
366 {
367         dir="$1"
368         oldsub="$2"
369         newsub="$3"
370         newsub_short=$(git rev-parse --short "$newsub")
371         
372         if [ -n "$oldsub" ]; then
373                 oldsub_short=$(git rev-parse --short "$oldsub")
374                 echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short"
375                 echo
376                 git log --pretty=tformat:'%h %s' "$oldsub..$newsub"
377                 git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub"
378         else
379                 echo "Squashed '$dir/' content from commit $newsub_short"
380         fi
381         
382         echo
383         echo "git-subtree-dir: $dir"
384         echo "git-subtree-split: $newsub"
385 }
386
387 toptree_for_commit()
388 {
389         commit="$1"
390         git log -1 --pretty=format:'%T' "$commit" -- || exit $?
391 }
392
393 subtree_for_commit()
394 {
395         commit="$1"
396         dir="$2"
397         git ls-tree "$commit" -- "$dir" |
398         while read mode type tree name; do
399                 assert [ "$name" = "$dir" ]
400                 assert [ "$type" = "tree" -o "$type" = "commit" ]
401                 [ "$type" = "commit" ] && continue  # ignore submodules
402                 echo $tree
403                 break
404         done
405 }
406
407 tree_changed()
408 {
409         tree=$1
410         shift
411         if [ $# -ne 1 ]; then
412                 return 0   # weird parents, consider it changed
413         else
414                 ptree=$(toptree_for_commit $1)
415                 if [ "$ptree" != "$tree" ]; then
416                         return 0   # changed
417                 else
418                         return 1   # not changed
419                 fi
420         fi
421 }
422
423 new_squash_commit()
424 {
425         old="$1"
426         oldsub="$2"
427         newsub="$3"
428         tree=$(toptree_for_commit $newsub) || exit $?
429         if [ -n "$old" ]; then
430                 squash_msg "$dir" "$oldsub" "$newsub" | 
431                         git commit-tree "$tree" -p "$old" || exit $?
432         else
433                 squash_msg "$dir" "" "$newsub" |
434                         git commit-tree "$tree" || exit $?
435         fi
436 }
437
438 copy_or_skip()
439 {
440         rev="$1"
441         tree="$2"
442         newparents="$3"
443         assert [ -n "$tree" ]
444
445         identical=
446         nonidentical=
447         p=
448         gotparents=
449         for parent in $newparents; do
450                 ptree=$(toptree_for_commit $parent) || exit $?
451                 [ -z "$ptree" ] && continue
452                 if [ "$ptree" = "$tree" ]; then
453                         # an identical parent could be used in place of this rev.
454                         identical="$parent"
455                 else
456                         nonidentical="$parent"
457                 fi
458                 
459                 # sometimes both old parents map to the same newparent;
460                 # eliminate duplicates
461                 is_new=1
462                 for gp in $gotparents; do
463                         if [ "$gp" = "$parent" ]; then
464                                 is_new=
465                                 break
466                         fi
467                 done
468                 if [ -n "$is_new" ]; then
469                         gotparents="$gotparents $parent"
470                         p="$p -p $parent"
471                 fi
472         done
473         
474         if [ -n "$identical" ]; then
475                 echo $identical
476         else
477                 copy_commit $rev $tree "$p" || exit $?
478         fi
479 }
480
481 ensure_clean()
482 {
483         if ! git diff-index HEAD --exit-code --quiet 2>&1; then
484                 die "Working tree has modifications.  Cannot add."
485         fi
486         if ! git diff-index --cached HEAD --exit-code --quiet 2>&1; then
487                 die "Index has modifications.  Cannot add."
488         fi
489 }
490
491 cmd_add()
492 {
493         if [ -e "$dir" ]; then
494                 die "'$dir' already exists.  Cannot add."
495         fi
496
497         ensure_clean
498         
499         if [ $# -eq 1 ]; then
500             git rev-parse -q --verify "$1^{commit}" >/dev/null ||
501             die "'$1' does not refer to a commit"
502
503             "cmd_add_commit" "$@"
504         elif [ $# -eq 2 ]; then
505             git rev-parse -q --verify "$2^{commit}" >/dev/null ||
506             die "'$2' does not refer to a commit"
507
508             "cmd_add_repository" "$@"
509         else
510             say "error: parameters were '$@'"
511             die "Provide either a commit or a repository and commit."
512         fi
513 }
514
515 cmd_add_repository()
516 {
517         echo "git fetch" "$@"
518         repository=$1
519         refspec=$2
520         git fetch "$@" || exit $?
521         revs=FETCH_HEAD
522         set -- $revs
523         cmd_add_commit "$@"
524 }
525
526 cmd_add_commit()
527 {
528         revs=$(git rev-parse $default --revs-only "$@") || exit $?
529         set -- $revs
530         rev="$1"
531         
532         debug "Adding $dir as '$rev'..."
533         git read-tree --prefix="$dir" $rev || exit $?
534         git checkout -- "$dir" || exit $?
535         tree=$(git write-tree) || exit $?
536         
537         headrev=$(git rev-parse HEAD) || exit $?
538         if [ -n "$headrev" -a "$headrev" != "$rev" ]; then
539                 headp="-p $headrev"
540         else
541                 headp=
542         fi
543         
544         if [ -n "$squash" ]; then
545                 rev=$(new_squash_commit "" "" "$rev") || exit $?
546                 commit=$(add_squashed_msg "$rev" "$dir" |
547                          git commit-tree $tree $headp -p "$rev") || exit $?
548         else
549                 commit=$(add_msg "$dir" "$headrev" "$rev" |
550                          git commit-tree $tree $headp -p "$rev") || exit $?
551         fi
552         git reset "$commit" || exit $?
553         
554         say "Added dir '$dir'"
555 }
556
557 cmd_split()
558 {
559         debug "Splitting $dir..."
560         cache_setup || exit $?
561         
562         if [ -n "$onto" ]; then
563                 debug "Reading history for --onto=$onto..."
564                 git rev-list $onto |
565                 while read rev; do
566                         # the 'onto' history is already just the subdir, so
567                         # any parent we find there can be used verbatim
568                         debug "  cache: $rev"
569                         cache_set $rev $rev
570                 done
571         fi
572         
573         if [ -n "$ignore_joins" ]; then
574                 unrevs=
575         else
576                 unrevs="$(find_existing_splits "$dir" "$revs")"
577         fi
578         
579         # We can't restrict rev-list to only $dir here, because some of our
580         # parents have the $dir contents the root, and those won't match.
581         # (and rev-list --follow doesn't seem to solve this)
582         grl='git rev-list --topo-order --reverse --parents $revs $unrevs'
583         revmax=$(eval "$grl" | wc -l)
584         revcount=0
585         createcount=0
586         eval "$grl" |
587         while read rev parents; do
588                 revcount=$(($revcount + 1))
589                 say -n "$revcount/$revmax ($createcount)\r"
590                 debug "Processing commit: $rev"
591                 exists=$(cache_get $rev)
592                 if [ -n "$exists" ]; then
593                         debug "  prior: $exists"
594                         continue
595                 fi
596                 createcount=$(($createcount + 1))
597                 debug "  parents: $parents"
598                 newparents=$(cache_get $parents)
599                 debug "  newparents: $newparents"
600                 
601                 tree=$(subtree_for_commit $rev "$dir")
602                 debug "  tree is: $tree"
603
604                 check_parents $parents
605                 
606                 # ugly.  is there no better way to tell if this is a subtree
607                 # vs. a mainline commit?  Does it matter?
608                 if [ -z $tree ]; then
609                         set_notree $rev
610                         if [ -n "$newparents" ]; then
611                                 cache_set $rev $rev
612                         fi
613                         continue
614                 fi
615
616                 newrev=$(copy_or_skip "$rev" "$tree" "$newparents") || exit $?
617                 debug "  newrev is: $newrev"
618                 cache_set $rev $newrev
619                 cache_set latest_new $newrev
620                 cache_set latest_old $rev
621         done || exit $?
622         latest_new=$(cache_get latest_new)
623         if [ -z "$latest_new" ]; then
624                 die "No new revisions were found"
625         fi
626         
627         if [ -n "$rejoin" ]; then
628                 debug "Merging split branch into HEAD..."
629                 latest_old=$(cache_get latest_old)
630                 git merge -s ours \
631                         -m "$(rejoin_msg $dir $latest_old $latest_new)" \
632                         $latest_new >&2 || exit $?
633         fi
634         if [ -n "$branch" ]; then
635                 if rev_exists "refs/heads/$branch"; then
636                         if ! rev_is_descendant_of_branch $latest_new $branch; then
637                                 die "Branch '$branch' is not an ancestor of commit '$latest_new'."
638                         fi
639                         action='Updated'
640                 else
641                         action='Created'
642                 fi
643                 git update-ref -m 'subtree split' "refs/heads/$branch" $latest_new || exit $?
644                 say "$action branch '$branch'"
645         fi
646         echo $latest_new
647         exit 0
648 }
649
650 cmd_merge()
651 {
652         revs=$(git rev-parse $default --revs-only "$@") || exit $?
653         ensure_clean
654         
655         set -- $revs
656         if [ $# -ne 1 ]; then
657                 die "You must provide exactly one revision.  Got: '$revs'"
658         fi
659         rev="$1"
660         
661         if [ -n "$squash" ]; then
662                 first_split="$(find_latest_squash "$dir")"
663                 if [ -z "$first_split" ]; then
664                         die "Can't squash-merge: '$dir' was never added."
665                 fi
666                 set $first_split
667                 old=$1
668                 sub=$2
669                 if [ "$sub" = "$rev" ]; then
670                         say "Subtree is already at commit $rev."
671                         exit 0
672                 fi
673                 new=$(new_squash_commit "$old" "$sub" "$rev") || exit $?
674                 debug "New squash commit: $new"
675                 rev="$new"
676         fi
677
678         version=$(git version)
679         if [ "$version" \< "git version 1.7" ]; then
680                 if [ -n "$message" ]; then
681                         git merge -s subtree --message="$message" $rev
682                 else
683                         git merge -s subtree $rev
684                 fi
685         else
686                 if [ -n "$message" ]; then
687                         git merge -Xsubtree="$prefix" --message="$message" $rev
688                 else
689                         git merge -Xsubtree="$prefix" $rev
690                 fi
691         fi
692 }
693
694 cmd_pull()
695 {
696         ensure_clean
697         git fetch "$@" || exit $?
698         revs=FETCH_HEAD
699         set -- $revs
700         cmd_merge "$@"
701 }
702
703 cmd_push()
704 {
705         if [ $# -ne 2 ]; then
706             die "You must provide <repository> <refspec>"
707         fi
708         if [ -e "$dir" ]; then
709             repository=$1
710             refspec=$2
711             echo "git push using: " $repository $refspec
712             git push $repository $(git subtree split --prefix=$prefix):refs/heads/$refspec
713         else
714             die "'$dir' must already exist. Try 'git subtree add'."
715         fi
716 }
717
718 "cmd_$command" "$@"