diff.js
author Tony Garnock-Jones <tonyg@kcbbs.gen.nz>
Mon Dec 21 23:52:38 2009 +0000 (2 years ago)
changeset 123 bc4c90cbd778
parent 10144d8139cabca
permissions -rw-r--r--
For OS X
     1 // Copyright (c) 2006, 2008 Tony Garnock-Jones <tonyg@lshift.net>
     2 // Copyright (c) 2006, 2008 LShift Ltd. <query@lshift.net>
     3 //
     4 // Permission is hereby granted, free of charge, to any person
     5 // obtaining a copy of this software and associated documentation files
     6 // (the "Software"), to deal in the Software without restriction,
     7 // including without limitation the rights to use, copy, modify, merge,
     8 // publish, distribute, sublicense, and/or sell copies of the Software,
     9 // and to permit persons to whom the Software is furnished to do so,
    10 // subject to the following conditions:
    11 //
    12 // The above copyright notice and this permission notice shall be
    13 // included in all copies or substantial portions of the Software.
    14 //
    15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    16 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    17 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    18 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
    19 // BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
    20 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    21 // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    22 // SOFTWARE.
    23 
    24 Diff = {
    25     longest_common_subsequence: function(file1, file2) {
    26         /* Text diff algorithm following Hunt and McIlroy 1976.
    27          * J. W. Hunt and M. D. McIlroy, An algorithm for differential file
    28          * comparison, Bell Telephone Laboratories CSTR #41 (1976)
    29          * http://www.cs.dartmouth.edu/~doug/
    30          *
    31          * Expects two arrays of strings.
    32          */
    33         var equivalenceClasses;
    34         var file2indices;
    35         var newCandidate;
    36         var candidates;
    37         var line;
    38         var c, i, j, jX, r, s;
    39 
    40         equivalenceClasses = {};
    41         for (j = 0; j < file2.length; j++) {
    42             line = file2[j];
    43             if (equivalenceClasses[line]) {
    44                 equivalenceClasses[line].push(j);
    45             } else {
    46                 equivalenceClasses[line] = [j];
    47             }
    48         }
    49 
    50         candidates = [{file1index: -1,
    51                        file2index: -1,
    52                        chain: null}];
    53 
    54         for (i = 0; i < file1.length; i++) {
    55             line = file1[i];
    56             file2indices = equivalenceClasses[line] || [];
    57 
    58             r = 0;
    59             c = candidates[0];
    60 
    61             for (jX = 0; jX < file2indices.length; jX++) {
    62                 j = file2indices[jX];
    63 
    64                 for (s = 0; s < candidates.length; s++) {
    65                     if ((candidates[s].file2index < j) &&
    66                         ((s == candidates.length - 1) ||
    67                          (candidates[s + 1].file2index > j)))
    68                         break;
    69                 }
    70 
    71                 if (s < candidates.length) {
    72                     newCandidate = {file1index: i,
    73                                     file2index: j,
    74                                     chain: candidates[s]};
    75                     if (r == candidates.length) {
    76                         candidates.push(c);
    77                     } else {
    78                         candidates[r] = c;
    79                     }
    80                     r = s + 1;
    81                     c = newCandidate;
    82                     if (r == candidates.length) {
    83                         break; // no point in examining further (j)s
    84                     }
    85                 }
    86             }
    87 
    88             candidates[r] = c;
    89         }
    90 
    91         // At this point, we know the LCS: it's in the reverse of the
    92         // linked-list through .chain of
    93         // candidates[candidates.length - 1].
    94 
    95         return candidates[candidates.length - 1];
    96     },
    97 
    98     diff_comm: function(file1, file2) {
    99         // We apply the LCS to build a "comm"-style picture of the
   100         // differences between file1 and file2.
   101 
   102         var result = [];
   103         var tail1 = file1.length;
   104         var tail2 = file2.length;
   105         var common = {common: []};
   106 
   107         function processCommon() {
   108             if (common.common.length) {
   109                 common.common.reverse();
   110                 result.push(common);
   111                 common = {common: []};
   112             }
   113         }
   114 
   115         for (var candidate = Diff.longest_common_subsequence(file1, file2);
   116              candidate !== null;
   117              candidate = candidate.chain)
   118         {
   119             var different = {file1: [], file2: []};
   120 
   121             while (--tail1 > candidate.file1index) {
   122                 different.file1.push(file1[tail1]);
   123             }
   124 
   125             while (--tail2 > candidate.file2index) {
   126                 different.file2.push(file2[tail2]);
   127             }
   128 
   129             if (different.file1.length || different.file2.length) {
   130                 processCommon();
   131                 different.file1.reverse();
   132                 different.file2.reverse();
   133                 result.push(different);
   134             }
   135 
   136             if (tail1 >= 0) {
   137                 common.common.push(file1[tail1]);
   138             }
   139         }
   140 
   141         processCommon();
   142 
   143         result.reverse();
   144         return result;
   145     },
   146 
   147     diff_patch: function(file1, file2) {
   148         // We apply the LCD to build a JSON representation of a
   149         // diff(1)-style patch.
   150 
   151         var result = [];
   152         var tail1 = file1.length;
   153         var tail2 = file2.length;
   154 
   155         function chunkDescription(file, offset, length) {
   156             var chunk = [];
   157             for (var i = 0; i < length; i++) {
   158                 chunk.push(file[offset + i]);
   159             }
   160             return {offset: offset,
   161                     length: length,
   162                     chunk: chunk};
   163         }
   164 
   165         for (var candidate = Diff.longest_common_subsequence(file1, file2);
   166              candidate !== null;
   167              candidate = candidate.chain)
   168         {
   169             var mismatchLength1 = tail1 - candidate.file1index - 1;
   170             var mismatchLength2 = tail2 - candidate.file2index - 1;
   171             tail1 = candidate.file1index;
   172             tail2 = candidate.file2index;
   173 
   174             if (mismatchLength1 || mismatchLength2) {
   175                 result.push({file1: chunkDescription(file1,
   176                                                      candidate.file1index + 1,
   177                                                      mismatchLength1),
   178                              file2: chunkDescription(file2,
   179                                                      candidate.file2index + 1,
   180                                                      mismatchLength2)});
   181             }
   182         }
   183 
   184         result.reverse();
   185         return result;
   186     },
   187 
   188     strip_patch: function(patch) {
   189 	// Takes the output of Diff.diff_patch(), and removes
   190 	// information from it. It can still be used by patch(),
   191 	// below, but can no longer be inverted.
   192 	var newpatch = [];
   193 	for (var i = 0; i < patch.length; i++) {
   194 	    var chunk = patch[i];
   195 	    newpatch.push({file1: {offset: chunk.file1.offset,
   196 				   length: chunk.file1.length},
   197 			   file2: {chunk: chunk.file2.chunk}});
   198 	}
   199 	return newpatch;
   200     },
   201 
   202     invert_patch: function(patch) {
   203         // Takes the output of Diff.diff_patch(), and inverts the
   204         // sense of it, so that it can be applied to file2 to give
   205         // file1 rather than the other way around.
   206 
   207         for (var i = 0; i < patch.length; i++) {
   208             var chunk = patch[i];
   209             var tmp = chunk.file1;
   210             chunk.file1 = chunk.file2;
   211             chunk.file2 = tmp;
   212         }
   213     },
   214 
   215     patch: function (file, patch) {
   216         // Applies a patch to a file.
   217         //
   218         // Given file1 and file2, Diff.patch(file1,
   219         // Diff.diff_patch(file1, file2)) should give file2.
   220 
   221         var result = [];
   222         var commonOffset = 0;
   223 
   224         function copyCommon(targetOffset) {
   225             while (commonOffset < targetOffset) {
   226                 result.push(file[commonOffset]);
   227                 commonOffset++;
   228             }
   229         }
   230 
   231         for (var chunkIndex = 0; chunkIndex < patch.length; chunkIndex++) {
   232             var chunk = patch[chunkIndex];
   233             copyCommon(chunk.file1.offset);
   234             for (var lineIndex = 0; lineIndex < chunk.file2.chunk.length; lineIndex++) {
   235                 result.push(chunk.file2.chunk[lineIndex]);
   236             }
   237             commonOffset += chunk.file1.length;
   238         }
   239 
   240         copyCommon(file.length);
   241         return result;
   242     },
   243 
   244     diff_indices: function(file1, file2) {
   245         // We apply the LCS to give a simple representation of the
   246         // offsets and lengths of mismatched chunks in the input
   247         // files. This is used by diff3_merge_indices below.
   248 
   249         var result = [];
   250         var tail1 = file1.length;
   251         var tail2 = file2.length;
   252 
   253         for (var candidate = Diff.longest_common_subsequence(file1, file2);
   254              candidate !== null;
   255              candidate = candidate.chain)
   256         {
   257             var mismatchLength1 = tail1 - candidate.file1index - 1;
   258             var mismatchLength2 = tail2 - candidate.file2index - 1;
   259             tail1 = candidate.file1index;
   260             tail2 = candidate.file2index;
   261 
   262             if (mismatchLength1 || mismatchLength2) {
   263                 result.push({file1: [tail1 + 1, mismatchLength1],
   264                              file2: [tail2 + 1, mismatchLength2]});
   265             }
   266         }
   267 
   268         result.reverse();
   269         return result;
   270     },
   271 
   272     diff3_merge_indices: function (a, o, b) {
   273         // Given three files, A, O, and B, where both A and B are
   274         // independently derived from O, returns a fairly complicated
   275         // internal representation of merge decisions it's taken. The
   276         // interested reader may wish to consult
   277         //
   278         // Sanjeev Khanna, Keshav Kunal, and Benjamin C. Pierce. "A
   279         // Formal Investigation of Diff3." In Arvind and Prasad,
   280         // editors, Foundations of Software Technology and Theoretical
   281         // Computer Science (FSTTCS), December 2007.
   282         //
   283         // (http://www.cis.upenn.edu/~bcpierce/papers/diff3-short.pdf)
   284         var i;
   285 
   286         var m1 = Diff.diff_indices(o, a);
   287         var m2 = Diff.diff_indices(o, b);
   288 
   289         var hunks = [];
   290         function addHunk(h, side) {
   291             hunks.push([h.file1[0], side, h.file1[1], h.file2[0], h.file2[1]]);
   292         }
   293         for (i = 0; i < m1.length; i++) { addHunk(m1[i], 0); }
   294         for (i = 0; i < m2.length; i++) { addHunk(m2[i], 2); }
   295         hunks.sort();
   296 
   297         var result = [];
   298         var commonOffset = 0;
   299         function copyCommon(targetOffset) {
   300             if (targetOffset > commonOffset) {
   301                 result.push([1, commonOffset, targetOffset - commonOffset]);
   302                 commonOffset = targetOffset;
   303             }
   304         }
   305 
   306         for (var hunkIndex = 0; hunkIndex < hunks.length; hunkIndex++) {
   307             var firstHunkIndex = hunkIndex;
   308             var hunk = hunks[hunkIndex];
   309             var regionLhs = hunk[0];
   310             var regionRhs = regionLhs + hunk[2];
   311             while (hunkIndex < hunks.length - 1) {
   312                 var maybeOverlapping = hunks[hunkIndex + 1];
   313                 var maybeLhs = maybeOverlapping[0];
   314                 if (maybeLhs > regionRhs) break;
   315                 regionRhs = maybeLhs + maybeOverlapping[2];
   316                 hunkIndex++;
   317             }
   318 
   319             copyCommon(regionLhs);
   320             if (firstHunkIndex == hunkIndex) {
   321 		// The "overlap" was only one hunk long, meaning that
   322 		// there's no conflict here. Either a and o were the
   323 		// same, or b and o were the same.
   324                 if (hunk[4] > 0) {
   325                     result.push([hunk[1], hunk[3], hunk[4]]);
   326                 }
   327             } else {
   328 		// A proper conflict. Determine the extents of the
   329 		// regions involved from a, o and b. Effectively merge
   330 		// all the hunks on the left into one giant hunk, and
   331 		// do the same for the right; then, correct for skew
   332 		// in the regions of o that each side changed, and
   333 		// report appropriate spans for the three sides.
   334 		var regions = {
   335 		    0: [a.length, -1, o.length, -1],
   336 		    2: [b.length, -1, o.length, -1]
   337 		};
   338                 for (i = firstHunkIndex; i <= hunkIndex; i++) {
   339 		    hunk = hunks[i];
   340                     var side = hunk[1];
   341 		    var r = regions[side];
   342 		    var oLhs = hunk[0];
   343 		    var oRhs = oLhs + hunk[2];
   344                     var abLhs = hunk[3];
   345                     var abRhs = abLhs + hunk[4];
   346 		    r[0] = Math.min(abLhs, r[0]);
   347 		    r[1] = Math.max(abRhs, r[1]);
   348 		    r[2] = Math.min(oLhs, r[2]);
   349 		    r[3] = Math.max(oRhs, r[3]);
   350                 }
   351 		var aLhs = regions[0][0] + (regionLhs - regions[0][2]);
   352 		var aRhs = regions[0][1] + (regionRhs - regions[0][3]);
   353 		var bLhs = regions[2][0] + (regionLhs - regions[2][2]);
   354 		var bRhs = regions[2][1] + (regionRhs - regions[2][3]);
   355                 result.push([-1,
   356 			     aLhs,      aRhs      - aLhs,
   357 			     regionLhs, regionRhs - regionLhs,
   358 			     bLhs,      bRhs      - bLhs]);
   359             }
   360             commonOffset = regionRhs;
   361         }
   362 
   363         copyCommon(o.length);
   364         return result;
   365     },
   366 
   367     diff3_merge: function (a, o, b, excludeFalseConflicts) {
   368         // Applies the output of Diff.diff3_merge_indices to actually
   369         // construct the merged file; the returned result alternates
   370         // between "ok" and "conflict" blocks.
   371 
   372         var result = [];
   373         var files = [a, o, b];
   374         var indices = Diff.diff3_merge_indices(a, o, b);
   375 
   376         var okLines = [];
   377         function flushOk() {
   378             if (okLines.length) {
   379                 result.push({ok: okLines});
   380             }
   381             okLines = [];
   382         }
   383         function pushOk(xs) {
   384             for (var j = 0; j < xs.length; j++) {
   385                 okLines.push(xs[j]);
   386             }
   387         }
   388 
   389         function isTrueConflict(rec) {
   390             if (rec[2] != rec[6]) return true;
   391             var aoff = rec[1];
   392             var boff = rec[5];
   393             for (var j = 0; j < rec[2]; j++) {
   394                 if (a[j + aoff] != b[j + boff]) return true;
   395             }
   396             return false;
   397         }
   398 
   399         for (var i = 0; i < indices.length; i++) {
   400             var x = indices[i];
   401             var side = x[0];
   402             if (side == -1) {
   403                 if (excludeFalseConflicts && !isTrueConflict(x)) {
   404                     pushOk(files[0].slice(x[1], x[1] + x[2]));
   405                 } else {
   406                     flushOk();
   407                     result.push({conflict: {a: a.slice(x[1], x[1] + x[2]),
   408                                             aIndex: x[1],
   409                                             o: o.slice(x[3], x[3] + x[4]),
   410                                             oIndex: x[3],
   411                                             b: b.slice(x[5], x[5] + x[6]),
   412                                             bIndex: x[5]}});
   413                 }
   414             } else {
   415                 pushOk(files[side].slice(x[1], x[1] + x[2]));
   416             }
   417         }
   418 
   419         flushOk();
   420         return result;
   421     }
   422 };