7 # define mkstemp(p) _open(_mktemp(p), _O_CREAT | _O_SHORT_LIVED | _O_EXCL)
11 * The bulk of this software is derived from Plan 9 and is thus distributed
12 * under the Lucent Public License, Version 1.02, reproduced below.
14 * ===================================================================
16 * Lucent Public License Version 1.02
18 * THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS PUBLIC
19 * LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE
20 * PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
24 * "Contribution" means:
26 * a. in the case of Lucent Technologies Inc. ("LUCENT"), the Original
28 * b. in the case of each Contributor,
30 * i. changes to the Program, and
31 * ii. additions to the Program;
33 * where such changes and/or additions to the Program were added to the
34 * Program by such Contributor itself or anyone acting on such
35 * Contributor's behalf, and the Contributor explicitly consents, in
36 * accordance with Section 3C, to characterization of the changes and/or
37 * additions as Contributions.
39 * "Contributor" means LUCENT and any other entity that has Contributed a
40 * Contribution to the Program.
42 * "Distributor" means a Recipient that distributes the Program,
43 * modifications to the Program, or any part thereof.
45 * "Licensed Patents" mean patent claims licensable by a Contributor
46 * which are necessarily infringed by the use or sale of its Contribution
47 * alone or when combined with the Program.
49 * "Original Program" means the original version of the software
50 * accompanying this Agreement as released by LUCENT, including source
51 * code, object code and documentation, if any.
53 * "Program" means the Original Program and Contributions or any part
56 * "Recipient" means anyone who receives the Program under this
57 * Agreement, including all Contributors.
61 * a. Subject to the terms of this Agreement, each Contributor hereby
62 * grants Recipient a non-exclusive, worldwide, royalty-free copyright
63 * license to reproduce, prepare derivative works of, publicly display,
64 * publicly perform, distribute and sublicense the Contribution of such
65 * Contributor, if any, and such derivative works, in source code and
68 * b. Subject to the terms of this Agreement, each Contributor hereby
69 * grants Recipient a non-exclusive, worldwide, royalty-free patent
70 * license under Licensed Patents to make, use, sell, offer to sell,
71 * import and otherwise transfer the Contribution of such Contributor, if
72 * any, in source code and object code form. The patent license granted
73 * by a Contributor shall also apply to the combination of the
74 * Contribution of that Contributor and the Program if, at the time the
75 * Contribution is added by the Contributor, such addition of the
76 * Contribution causes such combination to be covered by the Licensed
77 * Patents. The patent license granted by a Contributor shall not apply
78 * to (i) any other combinations which include the Contribution, nor to
79 * (ii) Contributions of other Contributors. No hardware per se is
82 * c. Recipient understands that although each Contributor grants the
83 * licenses to its Contributions set forth herein, no assurances are
84 * provided by any Contributor that the Program does not infringe the
85 * patent or other intellectual property rights of any other entity. Each
86 * Contributor disclaims any liability to Recipient for claims brought by
87 * any other entity based on infringement of intellectual property rights
88 * or otherwise. As a condition to exercising the rights and licenses
89 * granted hereunder, each Recipient hereby assumes sole responsibility
90 * to secure any other intellectual property rights needed, if any. For
91 * example, if a third party patent license is required to allow
92 * Recipient to distribute the Program, it is Recipient's responsibility
93 * to acquire that license before distributing the Program.
95 * d. Each Contributor represents that to its knowledge it has sufficient
96 * copyright rights in its Contribution, if any, to grant the copyright
97 * license set forth in this Agreement.
101 * A. Distributor may choose to distribute the Program in any form under
102 * this Agreement or under its own license agreement, provided that:
104 * a. it complies with the terms and conditions of this Agreement;
106 * b. if the Program is distributed in source code or other tangible
107 * form, a copy of this Agreement or Distributor's own license agreement
108 * is included with each copy of the Program; and
110 * c. if distributed under Distributor's own license agreement, such
113 * i. effectively disclaims on behalf of all Contributors all warranties
114 * and conditions, express and implied, including warranties or
115 * conditions of title and non-infringement, and implied warranties or
116 * conditions of merchantability and fitness for a particular purpose;
117 * ii. effectively excludes on behalf of all Contributors all liability
118 * for damages, including direct, indirect, special, incidental and
119 * consequential damages, such as lost profits; and
120 * iii. states that any provisions which differ from this Agreement are
121 * offered by that Contributor alone and not by any other party.
123 * B. Each Distributor must include the following in a conspicuous
124 * location in the Program:
126 * Copyright (C) 2003, Lucent Technologies Inc. and others. All Rights
129 * C. In addition, each Contributor must identify itself as the
130 * originator of its Contribution in a manner that reasonably allows
131 * subsequent Recipients to identify the originator of the Contribution.
132 * Also, each Contributor must agree that the additions and/or changes
133 * are intended to be a Contribution. Once a Contribution is contributed,
134 * it may not thereafter be revoked.
136 * 4. COMMERCIAL DISTRIBUTION
138 * Commercial distributors of software may accept certain
139 * responsibilities with respect to end users, business partners and the
140 * like. While this license is intended to facilitate the commercial use
141 * of the Program, the Distributor who includes the Program in a
142 * commercial product offering should do so in a manner which does not
143 * create potential liability for Contributors. Therefore, if a
144 * Distributor includes the Program in a commercial product offering,
145 * such Distributor ("Commercial Distributor") hereby agrees to defend
146 * and indemnify every Contributor ("Indemnified Contributor") against
147 * any losses, damages and costs (collectively"Losses") arising from
148 * claims, lawsuits and other legal actions brought by a third party
149 * against the Indemnified Contributor to the extent caused by the acts
150 * or omissions of such Commercial Distributor in connection with its
151 * distribution of the Program in a commercial product offering. The
152 * obligations in this section do not apply to any claims or Losses
153 * relating to any actual or alleged intellectual property infringement.
154 * In order to qualify, an Indemnified Contributor must: a) promptly
155 * notify the Commercial Distributor in writing of such claim, and b)
156 * allow the Commercial Distributor to control, and cooperate with the
157 * Commercial Distributor in, the defense and any related settlement
158 * negotiations. The Indemnified Contributor may participate in any such
159 * claim at its own expense.
161 * For example, a Distributor might include the Program in a commercial
162 * product offering, Product X. That Distributor is then a Commercial
163 * Distributor. If that Commercial Distributor then makes performance
164 * claims, or offers warranties related to Product X, those performance
165 * claims and warranties are such Commercial Distributor's responsibility
166 * alone. Under this section, the Commercial Distributor would have to
167 * defend claims against the Contributors related to those performance
168 * claims and warranties, and if a court requires any Contributor to pay
169 * any damages as a result, the Commercial Distributor must pay those
174 * EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS
175 * PROVIDED ON AN"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
176 * KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY
177 * WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY
178 * OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely
179 * responsible for determining the appropriateness of using and
180 * distributing the Program and assumes all risks associated with its
181 * exercise of rights under this Agreement, including but not limited to
182 * the risks and costs of program errors, compliance with applicable
183 * laws, damage to or loss of data, programs or equipment, and
184 * unavailability or interruption of operations.
186 * 6. DISCLAIMER OF LIABILITY
188 * EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
189 * ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
190 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
191 * WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
192 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
193 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
194 * DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
195 * HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
199 * Recipient agrees that Recipient alone is responsible for compliance
200 * with the United States export administration regulations (and the
201 * export control laws and regulation of any other countries).
205 * If any provision of this Agreement is invalid or unenforceable under
206 * applicable law, it shall not affect the validity or enforceability of
207 * the remainder of the terms of this Agreement, and without further
208 * action by the parties hereto, such provision shall be reformed to the
209 * minimum extent necessary to make such provision valid and enforceable.
211 * If Recipient institutes patent litigation against a Contributor with
212 * respect to a patent applicable to software (including a cross-claim or
213 * counterclaim in a lawsuit), then any patent licenses granted by that
214 * Contributor to such Recipient under this Agreement shall terminate as
215 * of the date such litigation is filed. In addition, if Recipient
216 * institutes patent litigation against any entity (including a
217 * cross-claim or counterclaim in a lawsuit) alleging that the Program
218 * itself (excluding combinations of the Program with other software or
219 * hardware) infringes such Recipient's patent(s), then such Recipient's
220 * rights granted under Section 2(b) shall terminate as of the date such
221 * litigation is filed.
223 * All Recipient's rights under this Agreement shall terminate if it
224 * fails to comply with any of the material terms or conditions of this
225 * Agreement and does not cure such failure in a reasonable period of
226 * time after becoming aware of such noncompliance. If all Recipient's
227 * rights under this Agreement terminate, Recipient agrees to cease use
228 * and distribution of the Program as soon as reasonably practicable.
229 * However, Recipient's obligations under this Agreement and any licenses
230 * granted by Recipient relating to the Program shall continue and
233 * LUCENT may publish new versions (including revisions) of this
234 * Agreement from time to time. Each new version of the Agreement will be
235 * given a distinguishing version number. The Program (including
236 * Contributions) may always be distributed subject to the version of the
237 * Agreement under which it was received. In addition, after a new
238 * version of the Agreement is published, Contributor may elect to
239 * distribute the Program (including its Contributions) under the new
240 * version. No one other than LUCENT has the right to modify this
241 * Agreement. Except as expressly stated in Sections 2(a) and 2(b) above,
242 * Recipient receives no rights or licenses to the intellectual property
243 * of any Contributor under this Agreement, whether expressly, by
244 * implication, estoppel or otherwise. All rights in the Program not
245 * expressly granted under this Agreement are reserved.
247 * This Agreement is governed by the laws of the State of New York and
248 * the intellectual property laws of the United States of America. No
249 * party to this Agreement will bring a legal action under this Agreement
250 * more than one year after the cause of action arose. Each party waives
251 * its rights to a jury trial in any resulting litigation.
271 struct line *file[2];
272 struct line *sfile[2]; // shortened by pruning common prefix and suffix
274 int pref, suff; // length of prefix and suffix
275 int *class; // will be overlaid on file[0]
276 int *member; // will be overlaid on file[1]
277 int *klist; // will be overlaid on file[0] after class
278 struct cand *clist; // merely a free storage pot for candidates
280 int *J; // will be overlaid on class
283 #define low(x) (x&((1L<<HALFLONG)-1))
284 #define high(x) (x>>HALFLONG)
287 * Returns a computed hash for a given string.
288 * Hashing has the effect of arranging line in 7-bit bytes and then summing 1-s
289 * complement in 16-bit hunks.
290 * @param line The line of a buffer to hash.
292 static int hash(char *line) {
303 sum += (long)*p++ << (shift &= (HALFLONG-1));
306 sum = low(sum) + high(sum);
307 return ((short)low(sum) + (short)high(sum));
311 * Hashes each line in the given string buffer and stores it internally.
312 * @param i 0 for diff 'from', 1 for diff 'to'.
313 * @param buf The string buffer to prepare from.
315 void prepare(int i, const char *buf) {
318 char bufcpy[strlen(buf)];
321 p = malloc(3*sizeof(struct line));
323 strncpy(bufcpy, buf, strlen(buf));
324 bufcpy[strlen(buf)] = 0;
325 l = strtok(bufcpy, "\n");
327 p = realloc(p, (++j+3)*sizeof(struct line));
328 p[j].value = hash(l);
329 l = strtok(NULL, "\n");
336 * Adds to the count of lines added and removed for this diff.
337 * Diff 'from' chunks are counted as lines removed and diff 'to' chunks are
338 * counted as lines added.
339 * @param a The diff 'from' chunk's beginning line number.
340 * @param b The diff 'from' chunk's ending line number.
341 * @param c The diff 'to' chunk's beginning line number.
342 * @param d The diff 'to' chunk's ending line number.
343 * @param added Int pointer to the running number of lines added for this diff.
344 * @param removed Int pointer to the running number of lines removed for this
347 void change(int a, int b, int c, int d, int *added, int *removed) {
356 *removed += b - a + 1;
367 * diff - differential file comparison
369 * Uses an algorithm due to Harold Stone, which finds
370 * a pair of longest identical subsequences in the two
373 * The major goal is to generate the match vector J.
374 * J[i] is the index of the line in file1 corresponding
375 * to line i file0. J[i] = 0 if there is no
376 * such line in file1.
378 * Lines are hashed so as to work in core. All potential
379 * matches are located by sorting the lines of each file
380 * on the hash (called value). In particular, this
381 * collects the equivalence classes in file1 together.
382 * Subroutine equiv replaces the value of each line in
383 * file0 by the index of the first element of its
384 * matching equivalence in (the reordered) file1.
385 * To save space equiv squeezes file1 into a single
386 * array member in which the equivalence classes
387 * are simply concatenated, except that their first
388 * members are flagged by changing sign.
390 * Next the indices that point into member are unsorted into
391 * array class according to the original order of file0.
393 * The cleverness lies in routine stone. This marches
394 * through the lines of file0, developing a vector klist
395 * of "k-candidates". At step i a k-candidate is a matched
396 * pair of lines x,y (x in file0 y in file1) such that
397 * there is a common subsequence of lenght k
398 * between the first i lines of file0 and the first y
399 * lines of file1, but there is no such subsequence for
400 * any smaller y. x is the earliest possible mate to y
401 * that occurs in such a subsequence.
403 * Whenever any of the members of the equivalence class of
404 * lines in file1 matable to a line in file0 has serial number
405 * less than the y of some k-candidate, that k-candidate
406 * with the smallest such y is replaced. The new
407 * k-candidate is chained (via pred) to the current
408 * k-1 candidate so that the actual subsequence can
409 * be recovered. When a member has serial number greater
410 * that the y of all k-candidates, the klist is extended.
411 * At the end, the longest subsequence is pulled out
412 * and placed in the array J by unravel.
414 * With J in hand, the matches there recorded are
415 * check'ed against reality to assure that no spurious
416 * matches have crept in due to hashing. If they have,
417 * they are broken, and "jackpot " is recorded--a harmless
418 * matter except that a true match for a spuriously
419 * mated line may now be unnecessarily reported as a change.
421 * Much of the complexity of the program comes simply
422 * from trying to minimize core utilization and
423 * maximize the range of doable problems by dynamically
424 * allocating what is needed and reusing what is not.
425 * The core requirements for problems larger than somewhat
426 * are (in words) 2*length(file0) + length(file1) +
427 * 3*(number of k-candidates installed), typically about
428 * 6n words for files of length n.
431 static void sort(struct line *a, int n) { /*shellsort CACM #201*/
433 struct line *ai, *aim, *j, *k;
438 for (i = 1; i <= n; i *= 2)
440 for (m /= 2; m != 0; m /= 2) {
442 for (j = a+1; j <= k; j++) {
446 if (aim->value > ai->value ||
447 aim->value == ai->value &&
448 aim->serial > ai->serial)
456 } while (ai > a && aim >= ai);
461 static void unsort(struct line *f, int l, int *b) {
465 a = malloc((l+1)*sizeof(int));
467 a[f[i].serial] = f[i].value;
473 static void prune(void) {
476 for(pref=0;pref<len[0]&&pref<len[1]&&
477 file[0][pref+1].value==file[1][pref+1].value;
479 for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
480 file[0][len[0]-suff].value==file[1][len[1]-suff].value;
483 sfile[j] = file[j]+pref;
484 slen[j] = len[j]-pref-suff;
485 for(i=0;i<=slen[j];i++)
486 sfile[j][i].serial = i;
490 static void equiv(struct line *a, int n, struct line *b, int m, int *c) {
494 while(i<=n && j<=m) {
495 if(a[i].value < b[j].value)
497 else if(a[i].value == b[j].value)
508 while(b[j+1].value == b[j].value) {
516 static int newcand(int x, int y, int pred) {
519 clist = realloc(clist, (clen+1)*sizeof(struct cand));
527 static int search(int *c, int k, int y) {
531 if(clist[c[k]].y < y) /*quick look for typical case*/
535 while((l=(i+j)/2) > i) {
547 static int stone(int *a, int n, int *b, int *c) {
554 c[0] = newcand(0,0,0);
555 for(i=1; i<=n; i++) {
563 if(y <= clist[oldc].y)
569 if(clist[c[l]].y <= y)
572 c[l] = newcand(i,y,oldc);
576 c[l] = newcand(i,y,oldc);
580 } while((y=b[++j]) > 0);
585 static void unravel(int p) {
589 for(i=0; i<=len[0]; i++) {
592 else if (i > len[0]-suff)
593 J[i] = i+len[1]-len[0];
597 for(q=clist+p;q->y!=0;q=clist+q->pred)
598 J[q->x+pref] = q->y+pref;
601 static void output(int *added, int *removed) {
602 int m, i0, i1, j0, j1;
607 for (i0 = 1; i0 <= m; i0 = i1+1) {
608 while (i0 <= m && J[i0] == J[i0-1]+1)
612 while (i1 < m && J[i1+1] == 0)
616 change(i0, i1, j0, j1, added, removed);
619 change(1, 0, 1, len[1], added, removed);
622 // create and return the path of a tmp_file filled with buf
623 // caller must delete file if that's desired
624 char *tmp_file_from_buf(const char *buf) {
625 char *template = "/tmp/ohcount_diff_XXXXXXX";
626 char *path = strdup(template);
628 int fd = mkstemp(path);
629 if (write(fd, buf, strlen(buf)) != strlen(buf)) {
630 fprintf(stderr, "src/diff.c: Could not write temporary file %s.\n", path);
641 void ohcount_calc_diff_with_disk(
646 *added = *removed = 0;
647 char *from_tmp = tmp_file_from_buf(from);
648 char *to_tmp = tmp_file_from_buf(to);
651 sprintf(command, "diff -d --normal --suppress-common-lines --new-file '%s' '%s'", from_tmp, to_tmp);
652 FILE *f = popen(command, "r");
655 while(fgets(line, sizeof(line), f)) {
663 if (unlink(from_tmp)) {
664 printf("error unlinking %s: %d", from_tmp, errno);
666 if (unlink(to_tmp)) {
667 printf("error unlinking %s: %d", from_tmp, errno);
671 #define USE_DISK_IF_LARGER 100000
672 void ohcount_calc_diff(const char *from,
673 const char *to, int *added, int *removed) {
676 if (strlen(from) > USE_DISK_IF_LARGER || strlen(to) > USE_DISK_IF_LARGER)
677 return ohcount_calc_diff_with_disk(from, to, added, removed);
683 sort(sfile[0], slen[0]);
684 sort(sfile[1], slen[1]);
686 member = (int *)file[1];
687 equiv(sfile[0], slen[0], sfile[1], slen[1], member);
688 member = realloc(member, (slen[1]+2)*sizeof(int));
690 class = (int *)file[0];
691 unsort(sfile[0], slen[0], class);
692 class = realloc(class, (slen[0]+2)*sizeof(int));
694 klist = malloc((slen[0]+2)*sizeof(int));
695 clist = malloc(sizeof(struct cand));
696 k = stone(class, slen[0], member, klist);
700 J = malloc((len[0]+2)*sizeof(int));
705 *added = *removed = 0;
706 output(added, removed);