286 lines
9.1 KiB
JavaScript
286 lines
9.1 KiB
JavaScript
/**
|
|
* This diff utility is taken from:
|
|
* https://github.com/Slava/diff.js
|
|
*
|
|
* The MIT License (MIT)
|
|
*
|
|
* Copyright (c) 2014 Slava
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*/
|
|
|
|
/**
|
|
* USAGE:
|
|
* diff(text1, text2);
|
|
*/
|
|
|
|
/**
|
|
* Longest Common Subsequence
|
|
*
|
|
* @param A - sequence of atoms - Array
|
|
* @param B - sequence of atoms - Array
|
|
* @param equals - optional comparator of atoms - returns true or false,
|
|
* if not specified, triple equals operator is used
|
|
* @returns Array - sequence of atoms, one of LCSs, edit script from A to B
|
|
*/
|
|
var LCS = function (A, B, /* optional */ equals) {
|
|
// We just compare atoms with default equals operator by default
|
|
if (equals === undefined)
|
|
equals = function (a, b) { return a === b; };
|
|
|
|
// NOTE: all intervals from now on are both sides inclusive
|
|
// Get the points in Edit Graph, one of the LCS paths goes through.
|
|
// The points are located on the same diagonal and represent the middle
|
|
// snake ([D/2] out of D+1) in the optimal edit path in edit graph.
|
|
// @param startA, endA - substring of A we are working on
|
|
// @param startB, endB - substring of B we are working on
|
|
// @returns Array - [
|
|
// [x, y], - beginning of the middle snake
|
|
// [u, v], - end of the middle snake
|
|
// D, - optimal edit distance
|
|
// LCS ] - length of LCS
|
|
var findMidSnake = function (startA, endA, startB, endB) {
|
|
var N = endA - startA + 1;
|
|
var M = endB - startB + 1;
|
|
var Max = N + M;
|
|
var Delta = N - M;
|
|
var halfMaxCeil = (Max + 1) / 2 | 0;
|
|
|
|
var foundOverlap = false;
|
|
var overlap = null;
|
|
|
|
// Maps -Max .. 0 .. +Max, diagonal index to endpoints for furthest reaching
|
|
// D-path on current iteration.
|
|
var V = {};
|
|
// Same but for reversed paths.
|
|
var U = {};
|
|
|
|
// Special case for the base case, D = 0, k = 0, x = y = 0
|
|
V[1] = 0;
|
|
// Special case for the base case reversed, D = 0, k = 0, x = N, y = M
|
|
U[Delta - 1] = N;
|
|
|
|
// Iterate over each possible length of edit script
|
|
for (var D = 0; D <= halfMaxCeil; D++) {
|
|
// Iterate over each diagonal
|
|
for (var k = -D; k <= D && !overlap; k += 2) {
|
|
// Positions in sequences A and B of furthest going D-path on diagonal k.
|
|
var x, y;
|
|
|
|
// Choose from each diagonal we extend
|
|
if (k === -D || (k !== D && V[k - 1] < V[k + 1]))
|
|
// Extending path one point down, that's why x doesn't change, y
|
|
// increases implicitly
|
|
x = V[k + 1];
|
|
else
|
|
// Extending path one point to the right, x increases
|
|
x = V[k - 1] + 1;
|
|
|
|
// We can calculate the y out of x and diagonal index.
|
|
y = x - k;
|
|
|
|
if (isNaN(y) || x > N || y > M)
|
|
continue;
|
|
|
|
var xx = x;
|
|
// Try to extend the D-path with diagonal paths. Possible only if atoms
|
|
// A_x match B_y
|
|
while (x < N && y < M // if there are atoms to compare
|
|
&& equals(A[startA + x], B[startB + y])) {
|
|
x++; y++;
|
|
}
|
|
|
|
// We can safely update diagonal k, since on every iteration we consider
|
|
// only even or only odd diagonals and the result of one depends only on
|
|
// diagonals of different iteration.
|
|
V[k] = x;
|
|
|
|
// Check feasibility, Delta is checked for being odd.
|
|
if ((Delta & 1) === 1 && inRange(k, Delta - (D - 1), Delta + (D - 1)))
|
|
// Forward D-path can overlap with reversed D-1-path
|
|
if (V[k] >= U[k])
|
|
// Found an overlap, the middle snake, convert X-components to dots
|
|
overlap = [xx, x].map(toPoint, k); // XXX ES5
|
|
}
|
|
|
|
if (overlap)
|
|
var SES = D * 2 - 1;
|
|
|
|
// Iterate over each diagonal for reversed case
|
|
for (var k = -D; k <= D && !overlap; k += 2) {
|
|
// The real diagonal we are looking for is k + Delta
|
|
var K = k + Delta;
|
|
var x, y;
|
|
if (k === D || (k !== -D && U[K - 1] < U[K + 1]))
|
|
x = U[K - 1];
|
|
else
|
|
x = U[K + 1] - 1;
|
|
|
|
y = x - K;
|
|
if (isNaN(y) || x < 0 || y < 0)
|
|
continue;
|
|
var xx = x;
|
|
while (x > 0 && y > 0 && equals(A[startA + x - 1], B[startB + y - 1])) {
|
|
x--; y--;
|
|
}
|
|
U[K] = x;
|
|
|
|
if (Delta % 2 === 0 && inRange(K, -D, D))
|
|
if (U[K] <= V[K])
|
|
overlap = [x, xx].map(toPoint, K); // XXX ES5
|
|
}
|
|
|
|
if (overlap) {
|
|
SES = SES || D * 2;
|
|
// Remember we had offset of each sequence?
|
|
for (var i = 0; i < 2; i++) for (var j = 0; j < 2; j++)
|
|
overlap[i][j] += [startA, startB][j] - i;
|
|
return overlap.concat([ SES, (Max - SES) / 2 ]);
|
|
}
|
|
}
|
|
};
|
|
|
|
var lcsAtoms = [];
|
|
var lcs = function (startA, endA, startB, endB) {
|
|
var N = endA - startA + 1;
|
|
var M = endB - startB + 1;
|
|
|
|
if (N > 0 && M > 0) {
|
|
var middleSnake = findMidSnake(startA, endA, startB, endB);
|
|
// A[x;u] == B[y,v] and is part of LCS
|
|
var x = middleSnake[0][0], y = middleSnake[0][1];
|
|
var u = middleSnake[1][0], v = middleSnake[1][1];
|
|
var D = middleSnake[2];
|
|
|
|
if (D > 1) {
|
|
lcs(startA, x - 1, startB, y - 1);
|
|
if (x <= u) {
|
|
[].push.apply(lcsAtoms, A.slice(x, u + 1));
|
|
}
|
|
lcs(u + 1, endA, v + 1, endB);
|
|
} else if (M > N)
|
|
[].push.apply(lcsAtoms, A.slice(startA, endA + 1));
|
|
else
|
|
[].push.apply(lcsAtoms, B.slice(startB, endB + 1));
|
|
}
|
|
};
|
|
|
|
lcs(0, A.length - 1, 0, B.length - 1);
|
|
return lcsAtoms;
|
|
};
|
|
|
|
// Helpers
|
|
var inRange = function (x, l, r) {
|
|
return (l <= x && x <= r) || (r <= x && x <= l);
|
|
};
|
|
|
|
// Takes X-component as argument, diagonal as context,
|
|
// returns array-pair of form x, y
|
|
var toPoint = function (x) {
|
|
return [x, x - this]; // XXX context is not the best way to pass diagonal
|
|
};
|
|
|
|
// Wrappers
|
|
LCS.StringLCS = function (A, B) {
|
|
return LCS(A.split(''), B.split('')).join('');
|
|
};
|
|
|
|
/**
|
|
* Diff sequence
|
|
*
|
|
* @param A - sequence of atoms - Array
|
|
* @param B - sequence of atoms - Array
|
|
* @param equals - optional comparator of atoms - returns true or false,
|
|
* if not specified, triple equals operator is used
|
|
* @returns Array - sequence of objects in a form of:
|
|
* - operation: one of "none", "add", "delete"
|
|
* - atom: the atom found in either A or B
|
|
* Applying operations from diff sequence you should be able to transform A to B
|
|
*/
|
|
function diff(A, B, equals) {
|
|
// We just compare atoms with default equals operator by default
|
|
if (equals === undefined)
|
|
equals = function (a, b) { return a === b; };
|
|
|
|
var diff = [];
|
|
var i = 0, j = 0;
|
|
var N = A.length, M = B.length, K = 0;
|
|
|
|
while (i < N && j < M && equals(A[i], B[j]))
|
|
i++, j++;
|
|
|
|
while (i < N && j < M && equals(A[N-1], B[M-1]))
|
|
N--, M--, K++;
|
|
|
|
[].push.apply(diff, A.slice(0, i).map(function (atom) {
|
|
return { operation: "none", atom: atom }; }));
|
|
|
|
var lcs = LCS(A.slice(i, N), B.slice(j, M), equals);
|
|
|
|
for (var k = 0; k < lcs.length; k++) {
|
|
var atom = lcs[k];
|
|
var ni = customIndexOf.call(A, atom, i, equals);
|
|
var nj = customIndexOf.call(B, atom, j, equals);
|
|
|
|
// XXX ES5 map
|
|
// Delete unmatched atoms from A
|
|
[].push.apply(diff, A.slice(i, ni).map(function (atom) {
|
|
return { operation: "delete", atom: atom };
|
|
}));
|
|
|
|
// Add unmatched atoms from B
|
|
[].push.apply(diff, B.slice(j, nj).map(function (atom) {
|
|
return { operation: "add", atom: atom };
|
|
}));
|
|
|
|
// Add the atom found in both sequences
|
|
diff.push({ operation: "none", atom: atom });
|
|
|
|
i = ni + 1;
|
|
j = nj + 1;
|
|
}
|
|
|
|
// Don't forget about the rest
|
|
|
|
[].push.apply(diff, A.slice(i, N).map(function (atom) {
|
|
return { operation: "delete", atom: atom };
|
|
}));
|
|
|
|
[].push.apply(diff, B.slice(j, M).map(function (atom) {
|
|
return { operation: "add", atom: atom };
|
|
}));
|
|
|
|
[].push.apply(diff, A.slice(N, N + K).map(function (atom) {
|
|
return { operation: "none", atom: atom }; }));
|
|
|
|
return diff;
|
|
};
|
|
|
|
// Accepts custom comparator
|
|
var customIndexOf = function(item, start, equals){
|
|
var arr = this;
|
|
for (var i = start; i < arr.length; i++)
|
|
if (equals(item, arr[i]))
|
|
return i;
|
|
return -1;
|
|
};
|
|
|
|
function textDiff(text1, text2) {
|
|
return diff(text1.split("\n"), text2.split("\n"));
|
|
}
|