Implement the patience diff algorithm
[git] / xdiff / xmerge.c
1 /*
2  *  LibXDiff by Davide Libenzi ( File Differential Library )
3  *  Copyright (C) 2003-2006 Davide Libenzi, Johannes E. Schindelin
4  *
5  *  This library is free software; you can redistribute it and/or
6  *  modify it under the terms of the GNU Lesser General Public
7  *  License as published by the Free Software Foundation; either
8  *  version 2.1 of the License, or (at your option) any later version.
9  *
10  *  This library is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  *  Lesser General Public License for more details.
14  *
15  *  You should have received a copy of the GNU Lesser General Public
16  *  License along with this library; if not, write to the Free Software
17  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  *
19  *  Davide Libenzi <davidel@xmailserver.org>
20  *
21  */
22
23 #include "xinclude.h"
24
25 typedef struct s_xdmerge {
26         struct s_xdmerge *next;
27         /*
28          * 0 = conflict,
29          * 1 = no conflict, take first,
30          * 2 = no conflict, take second.
31          */
32         int mode;
33         /*
34          * These point at the respective postimages.  E.g. <i1,chg1> is
35          * how side #1 wants to change the common ancestor; if there is no
36          * overlap, lines before i1 in the postimage of side #1 appear
37          * in the merge result as a region touched by neither side.
38          */
39         long i1, i2;
40         long chg1, chg2;
41         /*
42          * These point at the preimage; of course there is just one
43          * preimage, that is from the shared common ancestor.
44          */
45         long i0;
46         long chg0;
47 } xdmerge_t;
48
49 static int xdl_append_merge(xdmerge_t **merge, int mode,
50                             long i0, long chg0,
51                             long i1, long chg1,
52                             long i2, long chg2)
53 {
54         xdmerge_t *m = *merge;
55         if (m && (i1 <= m->i1 + m->chg1 || i2 <= m->i2 + m->chg2)) {
56                 if (mode != m->mode)
57                         m->mode = 0;
58                 m->chg0 = i0 + chg0 - m->i0;
59                 m->chg1 = i1 + chg1 - m->i1;
60                 m->chg2 = i2 + chg2 - m->i2;
61         } else {
62                 m = xdl_malloc(sizeof(xdmerge_t));
63                 if (!m)
64                         return -1;
65                 m->next = NULL;
66                 m->mode = mode;
67                 m->i0 = i0;
68                 m->chg0 = chg0;
69                 m->i1 = i1;
70                 m->chg1 = chg1;
71                 m->i2 = i2;
72                 m->chg2 = chg2;
73                 if (*merge)
74                         (*merge)->next = m;
75                 *merge = m;
76         }
77         return 0;
78 }
79
80 static int xdl_cleanup_merge(xdmerge_t *c)
81 {
82         int count = 0;
83         xdmerge_t *next_c;
84
85         /* were there conflicts? */
86         for (; c; c = next_c) {
87                 if (c->mode == 0)
88                         count++;
89                 next_c = c->next;
90                 free(c);
91         }
92         return count;
93 }
94
95 static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2,
96                 int line_count, long flags)
97 {
98         int i;
99         xrecord_t **rec1 = xe1->xdf2.recs + i1;
100         xrecord_t **rec2 = xe2->xdf2.recs + i2;
101
102         for (i = 0; i < line_count; i++) {
103                 int result = xdl_recmatch(rec1[i]->ptr, rec1[i]->size,
104                         rec2[i]->ptr, rec2[i]->size, flags);
105                 if (!result)
106                         return -1;
107         }
108         return 0;
109 }
110
111 static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int add_nl, char *dest)
112 {
113         xrecord_t **recs;
114         int size = 0;
115
116         recs = (use_orig ? xe->xdf1.recs : xe->xdf2.recs) + i;
117
118         if (count < 1)
119                 return 0;
120
121         for (i = 0; i < count; size += recs[i++]->size)
122                 if (dest)
123                         memcpy(dest + size, recs[i]->ptr, recs[i]->size);
124         if (add_nl) {
125                 i = recs[count - 1]->size;
126                 if (i == 0 || recs[count - 1]->ptr[i - 1] != '\n') {
127                         if (dest)
128                                 dest[size] = '\n';
129                         size++;
130                 }
131         }
132         return size;
133 }
134
135 static int xdl_recs_copy(xdfenv_t *xe, int i, int count, int add_nl, char *dest)
136 {
137         return xdl_recs_copy_0(0, xe, i, count, add_nl, dest);
138 }
139
140 static int xdl_orig_copy(xdfenv_t *xe, int i, int count, int add_nl, char *dest)
141 {
142         return xdl_recs_copy_0(1, xe, i, count, add_nl, dest);
143 }
144
145 static int fill_conflict_hunk(xdfenv_t *xe1, const char *name1,
146                               xdfenv_t *xe2, const char *name2,
147                               int size, int i, int style,
148                               xdmerge_t *m, char *dest)
149 {
150         const int marker_size = 7;
151         int marker1_size = (name1 ? strlen(name1) + 1 : 0);
152         int marker2_size = (name2 ? strlen(name2) + 1 : 0);
153         int j;
154
155         /* Before conflicting part */
156         size += xdl_recs_copy(xe1, i, m->i1 - i, 0,
157                               dest ? dest + size : NULL);
158
159         if (!dest) {
160                 size += marker_size + 1 + marker1_size;
161         } else {
162                 for (j = 0; j < marker_size; j++)
163                         dest[size++] = '<';
164                 if (marker1_size) {
165                         dest[size] = ' ';
166                         memcpy(dest + size + 1, name1, marker1_size - 1);
167                         size += marker1_size;
168                 }
169                 dest[size++] = '\n';
170         }
171
172         /* Postimage from side #1 */
173         size += xdl_recs_copy(xe1, m->i1, m->chg1, 1,
174                               dest ? dest + size : NULL);
175
176         if (style == XDL_MERGE_DIFF3) {
177                 /* Shared preimage */
178                 if (!dest) {
179                         size += marker_size + 1;
180                 } else {
181                         for (j = 0; j < marker_size; j++)
182                                 dest[size++] = '|';
183                         dest[size++] = '\n';
184                 }
185                 size += xdl_orig_copy(xe1, m->i0, m->chg0, 1,
186                                       dest ? dest + size : NULL);
187         }
188
189         if (!dest) {
190                 size += marker_size + 1;
191         } else {
192                 for (j = 0; j < marker_size; j++)
193                         dest[size++] = '=';
194                 dest[size++] = '\n';
195         }
196
197         /* Postimage from side #2 */
198         size += xdl_recs_copy(xe2, m->i2, m->chg2, 1,
199                               dest ? dest + size : NULL);
200         if (!dest) {
201                 size += marker_size + 1 + marker2_size;
202         } else {
203                 for (j = 0; j < marker_size; j++)
204                         dest[size++] = '>';
205                 if (marker2_size) {
206                         dest[size] = ' ';
207                         memcpy(dest + size + 1, name2, marker2_size - 1);
208                         size += marker2_size;
209                 }
210                 dest[size++] = '\n';
211         }
212         return size;
213 }
214
215 static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1,
216                                  xdfenv_t *xe2, const char *name2,
217                                  xdmerge_t *m, char *dest, int style)
218 {
219         int size, i;
220
221         for (size = i = 0; m; m = m->next) {
222                 if (m->mode == 0)
223                         size = fill_conflict_hunk(xe1, name1, xe2, name2,
224                                                   size, i, style, m, dest);
225                 else if (m->mode == 1)
226                         size += xdl_recs_copy(xe1, i, m->i1 + m->chg1 - i, 0,
227                                               dest ? dest + size : NULL);
228                 else if (m->mode == 2)
229                         size += xdl_recs_copy(xe2, m->i2 - m->i1 + i,
230                                               m->i1 + m->chg2 - i, 0,
231                                               dest ? dest + size : NULL);
232                 else
233                         continue;
234                 i = m->i1 + m->chg1;
235         }
236         size += xdl_recs_copy(xe1, i, xe1->xdf2.nrec - i, 0,
237                               dest ? dest + size : NULL);
238         return size;
239 }
240
241 /*
242  * Sometimes, changes are not quite identical, but differ in only a few
243  * lines. Try hard to show only these few lines as conflicting.
244  */
245 static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
246                 xpparam_t const *xpp)
247 {
248         for (; m; m = m->next) {
249                 mmfile_t t1, t2;
250                 xdfenv_t xe;
251                 xdchange_t *xscr, *x;
252                 int i1 = m->i1, i2 = m->i2;
253
254                 /* let's handle just the conflicts */
255                 if (m->mode)
256                         continue;
257
258                 /* no sense refining a conflict when one side is empty */
259                 if (m->chg1 == 0 || m->chg2 == 0)
260                         continue;
261
262                 /*
263                  * This probably does not work outside git, since
264                  * we have a very simple mmfile structure.
265                  */
266                 t1.ptr = (char *)xe1->xdf2.recs[m->i1]->ptr;
267                 t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1]->ptr
268                         + xe1->xdf2.recs[m->i1 + m->chg1 - 1]->size - t1.ptr;
269                 t2.ptr = (char *)xe2->xdf2.recs[m->i2]->ptr;
270                 t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1]->ptr
271                         + xe2->xdf2.recs[m->i2 + m->chg2 - 1]->size - t2.ptr;
272                 if (xdl_do_diff(&t1, &t2, xpp, &xe) < 0)
273                         return -1;
274                 if (xdl_change_compact(&xe.xdf1, &xe.xdf2, xpp->flags) < 0 ||
275                     xdl_change_compact(&xe.xdf2, &xe.xdf1, xpp->flags) < 0 ||
276                     xdl_build_script(&xe, &xscr) < 0) {
277                         xdl_free_env(&xe);
278                         return -1;
279                 }
280                 if (!xscr) {
281                         /* If this happens, the changes are identical. */
282                         xdl_free_env(&xe);
283                         m->mode = 4;
284                         continue;
285                 }
286                 x = xscr;
287                 m->i1 = xscr->i1 + i1;
288                 m->chg1 = xscr->chg1;
289                 m->i2 = xscr->i2 + i2;
290                 m->chg2 = xscr->chg2;
291                 while (xscr->next) {
292                         xdmerge_t *m2 = xdl_malloc(sizeof(xdmerge_t));
293                         if (!m2) {
294                                 xdl_free_env(&xe);
295                                 xdl_free_script(x);
296                                 return -1;
297                         }
298                         xscr = xscr->next;
299                         m2->next = m->next;
300                         m->next = m2;
301                         m = m2;
302                         m->mode = 0;
303                         m->i1 = xscr->i1 + i1;
304                         m->chg1 = xscr->chg1;
305                         m->i2 = xscr->i2 + i2;
306                         m->chg2 = xscr->chg2;
307                 }
308                 xdl_free_env(&xe);
309                 xdl_free_script(x);
310         }
311         return 0;
312 }
313
314 static int line_contains_alnum(const char *ptr, long size)
315 {
316         while (size--)
317                 if (isalnum(*(ptr++)))
318                         return 1;
319         return 0;
320 }
321
322 static int lines_contain_alnum(xdfenv_t *xe, int i, int chg)
323 {
324         for (; chg; chg--, i++)
325                 if (line_contains_alnum(xe->xdf2.recs[i]->ptr,
326                                 xe->xdf2.recs[i]->size))
327                         return 1;
328         return 0;
329 }
330
331 /*
332  * This function merges m and m->next, marking everything between those hunks
333  * as conflicting, too.
334  */
335 static void xdl_merge_two_conflicts(xdmerge_t *m)
336 {
337         xdmerge_t *next_m = m->next;
338         m->chg1 = next_m->i1 + next_m->chg1 - m->i1;
339         m->chg2 = next_m->i2 + next_m->chg2 - m->i2;
340         m->next = next_m->next;
341         free(next_m);
342 }
343
344 /*
345  * If there are less than 3 non-conflicting lines between conflicts,
346  * it appears simpler -- because it takes up less (or as many) lines --
347  * if the lines are moved into the conflicts.
348  */
349 static int xdl_simplify_non_conflicts(xdfenv_t *xe1, xdmerge_t *m,
350                                       int simplify_if_no_alnum)
351 {
352         int result = 0;
353
354         if (!m)
355                 return result;
356         for (;;) {
357                 xdmerge_t *next_m = m->next;
358                 int begin, end;
359
360                 if (!next_m)
361                         return result;
362
363                 begin = m->i1 + m->chg1;
364                 end = next_m->i1;
365
366                 if (m->mode != 0 || next_m->mode != 0 ||
367                     (end - begin > 3 &&
368                      (!simplify_if_no_alnum ||
369                       lines_contain_alnum(xe1, begin, end - begin)))) {
370                         m = next_m;
371                 } else {
372                         result++;
373                         xdl_merge_two_conflicts(m);
374                 }
375         }
376 }
377
378 /*
379  * level == 0: mark all overlapping changes as conflict
380  * level == 1: mark overlapping changes as conflict only if not identical
381  * level == 2: analyze non-identical changes for minimal conflict set
382  * level == 3: analyze non-identical changes for minimal conflict set, but
383  *             treat hunks not containing any letter or number as conflicting
384  *
385  * returns < 0 on error, == 0 for no conflicts, else number of conflicts
386  */
387 static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, const char *name1,
388                 xdfenv_t *xe2, xdchange_t *xscr2, const char *name2,
389                 int flags, xpparam_t const *xpp, mmbuffer_t *result) {
390         xdmerge_t *changes, *c;
391         int i0, i1, i2, chg0, chg1, chg2;
392         int level = flags & XDL_MERGE_LEVEL_MASK;
393         int style = flags & XDL_MERGE_STYLE_MASK;
394
395         if (style == XDL_MERGE_DIFF3) {
396                 /*
397                  * "diff3 -m" output does not make sense for anything
398                  * more aggressive than XDL_MERGE_EAGER.
399                  */
400                 if (XDL_MERGE_EAGER < level)
401                         level = XDL_MERGE_EAGER;
402         }
403
404         c = changes = NULL;
405
406         while (xscr1 && xscr2) {
407                 if (!changes)
408                         changes = c;
409                 if (xscr1->i1 + xscr1->chg1 < xscr2->i1) {
410                         i0 = xscr1->i1;
411                         i1 = xscr1->i2;
412                         i2 = xscr2->i2 - xscr2->i1 + xscr1->i1;
413                         chg0 = xscr1->chg1;
414                         chg1 = xscr1->chg2;
415                         chg2 = xscr1->chg1;
416                         if (xdl_append_merge(&c, 1,
417                                              i0, chg0, i1, chg1, i2, chg2)) {
418                                 xdl_cleanup_merge(changes);
419                                 return -1;
420                         }
421                         xscr1 = xscr1->next;
422                         continue;
423                 }
424                 if (xscr2->i1 + xscr2->chg1 < xscr1->i1) {
425                         i0 = xscr2->i1;
426                         i1 = xscr1->i2 - xscr1->i1 + xscr2->i1;
427                         i2 = xscr2->i2;
428                         chg0 = xscr2->chg1;
429                         chg1 = xscr2->chg1;
430                         chg2 = xscr2->chg2;
431                         if (xdl_append_merge(&c, 2,
432                                              i0, chg0, i1, chg1, i2, chg2)) {
433                                 xdl_cleanup_merge(changes);
434                                 return -1;
435                         }
436                         xscr2 = xscr2->next;
437                         continue;
438                 }
439                 if (level == XDL_MERGE_MINIMAL || xscr1->i1 != xscr2->i1 ||
440                                 xscr1->chg1 != xscr2->chg1 ||
441                                 xscr1->chg2 != xscr2->chg2 ||
442                                 xdl_merge_cmp_lines(xe1, xscr1->i2,
443                                         xe2, xscr2->i2,
444                                         xscr1->chg2, xpp->flags)) {
445                         /* conflict */
446                         int off = xscr1->i1 - xscr2->i1;
447                         int ffo = off + xscr1->chg1 - xscr2->chg1;
448
449                         i0 = xscr1->i1;
450                         i1 = xscr1->i2;
451                         i2 = xscr2->i2;
452                         if (off > 0) {
453                                 i0 -= off;
454                                 i1 -= off;
455                         }
456                         else
457                                 i2 += off;
458                         chg0 = xscr1->i1 + xscr1->chg1 - i0;
459                         chg1 = xscr1->i2 + xscr1->chg2 - i1;
460                         chg2 = xscr2->i2 + xscr2->chg2 - i2;
461                         if (ffo < 0) {
462                                 chg0 -= ffo;
463                                 chg1 -= ffo;
464                         } else
465                                 chg2 += ffo;
466                         if (xdl_append_merge(&c, 0,
467                                              i0, chg0, i1, chg1, i2, chg2)) {
468                                 xdl_cleanup_merge(changes);
469                                 return -1;
470                         }
471                 }
472
473                 i1 = xscr1->i1 + xscr1->chg1;
474                 i2 = xscr2->i1 + xscr2->chg1;
475
476                 if (i1 >= i2)
477                         xscr2 = xscr2->next;
478                 if (i2 >= i1)
479                         xscr1 = xscr1->next;
480         }
481         while (xscr1) {
482                 if (!changes)
483                         changes = c;
484                 i0 = xscr1->i1;
485                 i1 = xscr1->i2;
486                 i2 = xscr1->i1 + xe2->xdf2.nrec - xe2->xdf1.nrec;
487                 chg0 = xscr1->chg1;
488                 chg1 = xscr1->chg2;
489                 chg2 = xscr1->chg1;
490                 if (xdl_append_merge(&c, 1,
491                                      i0, chg0, i1, chg1, i2, chg2)) {
492                         xdl_cleanup_merge(changes);
493                         return -1;
494                 }
495                 xscr1 = xscr1->next;
496         }
497         while (xscr2) {
498                 if (!changes)
499                         changes = c;
500                 i0 = xscr2->i1;
501                 i1 = xscr2->i1 + xe1->xdf2.nrec - xe1->xdf1.nrec;
502                 i2 = xscr2->i2;
503                 chg0 = xscr2->chg1;
504                 chg1 = xscr2->chg1;
505                 chg2 = xscr2->chg2;
506                 if (xdl_append_merge(&c, 2,
507                                      i0, chg0, i1, chg1, i2, chg2)) {
508                         xdl_cleanup_merge(changes);
509                         return -1;
510                 }
511                 xscr2 = xscr2->next;
512         }
513         if (!changes)
514                 changes = c;
515         /* refine conflicts */
516         if (XDL_MERGE_ZEALOUS <= level &&
517             (xdl_refine_conflicts(xe1, xe2, changes, xpp) < 0 ||
518              xdl_simplify_non_conflicts(xe1, changes,
519                                         XDL_MERGE_ZEALOUS < level) < 0)) {
520                 xdl_cleanup_merge(changes);
521                 return -1;
522         }
523         /* output */
524         if (result) {
525                 int size = xdl_fill_merge_buffer(xe1, name1, xe2, name2,
526                         changes, NULL, style);
527                 result->ptr = xdl_malloc(size);
528                 if (!result->ptr) {
529                         xdl_cleanup_merge(changes);
530                         return -1;
531                 }
532                 result->size = size;
533                 xdl_fill_merge_buffer(xe1, name1, xe2, name2, changes,
534                                       result->ptr, style);
535         }
536         return xdl_cleanup_merge(changes);
537 }
538
539 int xdl_merge(mmfile_t *orig, mmfile_t *mf1, const char *name1,
540                 mmfile_t *mf2, const char *name2,
541                 xpparam_t const *xpp, int flags, mmbuffer_t *result) {
542         xdchange_t *xscr1, *xscr2;
543         xdfenv_t xe1, xe2;
544         int status;
545
546         result->ptr = NULL;
547         result->size = 0;
548
549         if (xdl_do_diff(orig, mf1, xpp, &xe1) < 0 ||
550                         xdl_do_diff(orig, mf2, xpp, &xe2) < 0) {
551                 return -1;
552         }
553         if (xdl_change_compact(&xe1.xdf1, &xe1.xdf2, xpp->flags) < 0 ||
554             xdl_change_compact(&xe1.xdf2, &xe1.xdf1, xpp->flags) < 0 ||
555             xdl_build_script(&xe1, &xscr1) < 0) {
556                 xdl_free_env(&xe1);
557                 return -1;
558         }
559         if (xdl_change_compact(&xe2.xdf1, &xe2.xdf2, xpp->flags) < 0 ||
560             xdl_change_compact(&xe2.xdf2, &xe2.xdf1, xpp->flags) < 0 ||
561             xdl_build_script(&xe2, &xscr2) < 0) {
562                 xdl_free_env(&xe2);
563                 return -1;
564         }
565         status = 0;
566         if (xscr1 || xscr2) {
567                 if (!xscr1) {
568                         result->ptr = xdl_malloc(mf2->size);
569                         memcpy(result->ptr, mf2->ptr, mf2->size);
570                         result->size = mf2->size;
571                 } else if (!xscr2) {
572                         result->ptr = xdl_malloc(mf1->size);
573                         memcpy(result->ptr, mf1->ptr, mf1->size);
574                         result->size = mf1->size;
575                 } else {
576                         status = xdl_do_merge(&xe1, xscr1, name1,
577                                               &xe2, xscr2, name2,
578                                               flags, xpp, result);
579                 }
580                 xdl_free_script(xscr1);
581                 xdl_free_script(xscr2);
582         }
583         xdl_free_env(&xe1);
584         xdl_free_env(&xe2);
585
586         return status;
587 }