mirror of
https://github.com/samuelclay/NewsBlur.git
synced 2025-08-05 16:58:59 +00:00
1479 lines
49 KiB
JavaScript
1479 lines
49 KiB
JavaScript
// Generated by CoffeeScript 1.3.1
|
|
|
|
/*
|
|
Module difflib -- helpers for computing deltas between objects.
|
|
|
|
Function getCloseMatches(word, possibilities, n=3, cutoff=0.6):
|
|
Use SequenceMatcher to return list of the best "good enough" matches.
|
|
|
|
Function contextDiff(a, b):
|
|
For two lists of strings, return a delta in context diff format.
|
|
|
|
Function ndiff(a, b):
|
|
Return a delta: the difference between `a` and `b` (lists of strings).
|
|
|
|
Function restore(delta, which):
|
|
Return one of the two sequences that generated an ndiff delta.
|
|
|
|
Function unifiedDiff(a, b):
|
|
For two lists of strings, return a delta in unified diff format.
|
|
|
|
Class SequenceMatcher:
|
|
A flexible class for comparing pairs of sequences of any type.
|
|
|
|
Class Differ:
|
|
For producing human-readable deltas from sequences of lines of text.
|
|
*/
|
|
|
|
|
|
(function() {
|
|
var Differ, Heap, IS_CHARACTER_JUNK, IS_LINE_JUNK, SequenceMatcher, contextDiff, floor, getCloseMatches, max, min, ndiff, restore, unifiedDiff, _any, _arrayCmp, _calculateRatio, _countLeading, _formatRangeContext, _formatRangeUnified, _has,
|
|
__indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i } return -1 }
|
|
|
|
floor = Math.floor, max = Math.max, min = Math.min
|
|
|
|
Heap = require('heap')
|
|
|
|
_calculateRatio = function(matches, length) {
|
|
if (length) {
|
|
return 2.0 * matches / length
|
|
} else {
|
|
return 1.0
|
|
}
|
|
}
|
|
|
|
_arrayCmp = function(a, b) {
|
|
var i, la, lb, _i, _ref, _ref1
|
|
_ref = [a.length, b.length], la = _ref[0], lb = _ref[1]
|
|
for (i = _i = 0, _ref1 = min(la, lb); 0 <= _ref1 ? _i < _ref1 : _i > _ref1; i = 0 <= _ref1 ? ++_i : --_i) {
|
|
if (a[i] < b[i]) {
|
|
return -1
|
|
}
|
|
if (a[i] > b[i]) {
|
|
return 1
|
|
}
|
|
}
|
|
return la - lb
|
|
}
|
|
|
|
_has = function(obj, key) {
|
|
return Object.prototype.hasOwnProperty.call(obj, key)
|
|
}
|
|
|
|
_any = function(items) {
|
|
var item, _i, _len
|
|
for (_i = 0, _len = items.length; _i < _len; _i++) {
|
|
item = items[_i]
|
|
if (item) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
SequenceMatcher = (function() {
|
|
|
|
/*
|
|
SequenceMatcher is a flexible class for comparing pairs of sequences of
|
|
any type, so long as the sequence elements are hashable. The basic
|
|
algorithm predates, and is a little fancier than, an algorithm
|
|
published in the late 1980's by Ratcliff and Obershelp under the
|
|
hyperbolic name "gestalt pattern matching". The basic idea is to find
|
|
the longest contiguous matching subsequence that contains no "junk"
|
|
elements (R-O doesn't address junk). The same idea is then applied
|
|
recursively to the pieces of the sequences to the left and to the right
|
|
of the matching subsequence. This does not yield minimal edit
|
|
sequences, but does tend to yield matches that "look right" to people.
|
|
|
|
SequenceMatcher tries to compute a "human-friendly diff" between two
|
|
sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the
|
|
longest *contiguous* & junk-free matching subsequence. That's what
|
|
catches peoples' eyes. The Windows(tm) windiff has another interesting
|
|
notion, pairing up elements that appear uniquely in each sequence.
|
|
That, and the method here, appear to yield more intuitive difference
|
|
reports than does diff. This method appears to be the least vulnerable
|
|
to synching up on blocks of "junk lines", though (like blank lines in
|
|
ordinary text files, or maybe "<P>" lines in HTML files). That may be
|
|
because this is the only method of the 3 that has a *concept* of
|
|
"junk" <wink>.
|
|
|
|
Example, comparing two strings, and considering blanks to be "junk":
|
|
|
|
>>> isjunk = (c) -> c is ' '
|
|
>>> s = new SequenceMatcher(isjunk,
|
|
'private Thread currentThread;',
|
|
'private volatile Thread currentThread;')
|
|
|
|
.ratio() returns a float in [0, 1], measuring the "similarity" of the
|
|
sequences. As a rule of thumb, a .ratio() value over 0.6 means the
|
|
sequences are close matches:
|
|
|
|
>>> s.ratio().toPrecision(3)
|
|
'0.866'
|
|
|
|
If you're only interested in where the sequences match,
|
|
.getMatchingBlocks() is handy:
|
|
|
|
>>> for [a, b, size] in s.getMatchingBlocks()
|
|
... console.log("a[#{a}] and b[#{b}] match for #{size} elements");
|
|
a[0] and b[0] match for 8 elements
|
|
a[8] and b[17] match for 21 elements
|
|
a[29] and b[38] match for 0 elements
|
|
|
|
Note that the last tuple returned by .get_matching_blocks() is always a
|
|
dummy, (len(a), len(b), 0), and this is the only case in which the last
|
|
tuple element (number of elements matched) is 0.
|
|
|
|
If you want to know how to change the first sequence into the second,
|
|
use .get_opcodes():
|
|
|
|
>>> for [op, a1, a2, b1, b2] in s.getOpcodes()
|
|
... console.log "#{op} a[#{a1}:#{a2}] b[#{b1}:#{b2}]"
|
|
equal a[0:8] b[0:8]
|
|
insert a[8:8] b[8:17]
|
|
equal a[8:29] b[17:38]
|
|
|
|
See the Differ class for a fancy human-friendly file differencer, which
|
|
uses SequenceMatcher both to compare sequences of lines, and to compare
|
|
sequences of characters within similar (near-matching) lines.
|
|
|
|
See also function getCloseMatches() in this module, which shows how
|
|
simple code building on SequenceMatcher can be used to do useful work.
|
|
|
|
Timing: Basic R-O is cubic time worst case and quadratic time expected
|
|
case. SequenceMatcher is quadratic time for the worst case and has
|
|
expected-case behavior dependent in a complicated way on how many
|
|
elements the sequences have in common; best case time is linear.
|
|
|
|
Methods:
|
|
|
|
constructor(isjunk=null, a='', b='')
|
|
Construct a SequenceMatcher.
|
|
|
|
setSeqs(a, b)
|
|
Set the two sequences to be compared.
|
|
|
|
setSeq1(a)
|
|
Set the first sequence to be compared.
|
|
|
|
setSeq2(b)
|
|
Set the second sequence to be compared.
|
|
|
|
findLongestMatch(alo, ahi, blo, bhi)
|
|
Find longest matching block in a[alo:ahi] and b[blo:bhi].
|
|
|
|
getMatchingBlocks()
|
|
Return list of triples describing matching subsequences.
|
|
|
|
getOpcodes()
|
|
Return list of 5-tuples describing how to turn a into b.
|
|
|
|
ratio()
|
|
Return a measure of the sequences' similarity (float in [0,1]).
|
|
|
|
quickRatio()
|
|
Return an upper bound on .ratio() relatively quickly.
|
|
|
|
realQuickRatio()
|
|
Return an upper bound on ratio() very quickly.
|
|
*/
|
|
|
|
|
|
function SequenceMatcher(isjunk, a, b, autojunk) {
|
|
this.isjunk = isjunk
|
|
if (a == null) {
|
|
a = ''
|
|
}
|
|
if (b == null) {
|
|
b = ''
|
|
}
|
|
this.autojunk = autojunk != null ? autojunk : true
|
|
/*
|
|
Construct a SequenceMatcher.
|
|
|
|
Optional arg isjunk is null (the default), or a one-argument
|
|
function that takes a sequence element and returns true iff the
|
|
element is junk. Null is equivalent to passing "(x) -> 0", i.e.
|
|
no elements are considered to be junk. For example, pass
|
|
(x) -> x in ' \t'
|
|
if you're comparing lines as sequences of characters, and don't
|
|
want to synch up on blanks or hard tabs.
|
|
|
|
Optional arg a is the first of two sequences to be compared. By
|
|
default, an empty string. The elements of a must be hashable. See
|
|
also .setSeqs() and .setSeq1().
|
|
|
|
Optional arg b is the second of two sequences to be compared. By
|
|
default, an empty string. The elements of b must be hashable. See
|
|
also .setSeqs() and .setSeq2().
|
|
|
|
Optional arg autojunk should be set to false to disable the
|
|
"automatic junk heuristic" that treats popular elements as junk
|
|
(see module documentation for more information).
|
|
*/
|
|
|
|
this.a = this.b = null
|
|
this.setSeqs(a, b)
|
|
}
|
|
|
|
SequenceMatcher.prototype.setSeqs = function(a, b) {
|
|
/*
|
|
Set the two sequences to be compared.
|
|
|
|
>>> s = new SequenceMatcher()
|
|
>>> s.setSeqs('abcd', 'bcde')
|
|
>>> s.ratio()
|
|
0.75
|
|
*/
|
|
this.setSeq1(a)
|
|
return this.setSeq2(b)
|
|
}
|
|
|
|
SequenceMatcher.prototype.setSeq1 = function(a) {
|
|
/*
|
|
Set the first sequence to be compared.
|
|
|
|
The second sequence to be compared is not changed.
|
|
|
|
>>> s = new SequenceMatcher(null, 'abcd', 'bcde')
|
|
>>> s.ratio()
|
|
0.75
|
|
>>> s.setSeq1('bcde')
|
|
>>> s.ratio()
|
|
1.0
|
|
|
|
SequenceMatcher computes and caches detailed information about the
|
|
second sequence, so if you want to compare one sequence S against
|
|
many sequences, use .setSeq2(S) once and call .setSeq1(x)
|
|
repeatedly for each of the other sequences.
|
|
|
|
See also setSeqs() and setSeq2().
|
|
*/
|
|
if (a === this.a) {
|
|
return
|
|
}
|
|
this.a = a
|
|
return this.matchingBlocks = this.opcodes = null
|
|
}
|
|
|
|
SequenceMatcher.prototype.setSeq2 = function(b) {
|
|
/*
|
|
Set the second sequence to be compared.
|
|
|
|
The first sequence to be compared is not changed.
|
|
|
|
>>> s = new SequenceMatcher(null, 'abcd', 'bcde')
|
|
>>> s.ratio()
|
|
0.75
|
|
>>> s.setSeq2('abcd')
|
|
>>> s.ratio()
|
|
1.0
|
|
|
|
SequenceMatcher computes and caches detailed information about the
|
|
second sequence, so if you want to compare one sequence S against
|
|
many sequences, use .setSeq2(S) once and call .setSeq1(x)
|
|
repeatedly for each of the other sequences.
|
|
|
|
See also setSeqs() and setSeq1().
|
|
*/
|
|
if (b === this.b) {
|
|
return
|
|
}
|
|
this.b = b
|
|
this.matchingBlocks = this.opcodes = null
|
|
this.fullbcount = null
|
|
return this._chainB()
|
|
}
|
|
|
|
SequenceMatcher.prototype._chainB = function() {
|
|
var b, b2j, elt, i, idxs, indices, isjunk, junk, n, ntest, popular, _i, _j, _len, _len1, _ref
|
|
b = this.b
|
|
this.b2j = b2j = {}
|
|
for (i = _i = 0, _len = b.length; _i < _len; i = ++_i) {
|
|
elt = b[i]
|
|
indices = _has(b2j, elt) ? b2j[elt] : b2j[elt] = []
|
|
indices.push(i)
|
|
}
|
|
junk = {}
|
|
isjunk = this.isjunk
|
|
if (isjunk) {
|
|
_ref = Object.keys(b2j)
|
|
for (_j = 0, _len1 = _ref.length; _j < _len1; _j++) {
|
|
elt = _ref[_j]
|
|
if (isjunk(elt)) {
|
|
junk[elt] = true
|
|
delete b2j[elt]
|
|
}
|
|
}
|
|
}
|
|
popular = {}
|
|
n = b.length
|
|
if (this.autojunk && n >= 200) {
|
|
ntest = floor(n / 100) + 1
|
|
for (elt in b2j) {
|
|
idxs = b2j[elt]
|
|
if (idxs.length > ntest) {
|
|
popular[elt] = true
|
|
delete b2j[elt]
|
|
}
|
|
}
|
|
}
|
|
this.isbjunk = function(b) {
|
|
return _has(junk, b)
|
|
}
|
|
return this.isbpopular = function(b) {
|
|
return _has(popular, b)
|
|
}
|
|
}
|
|
|
|
SequenceMatcher.prototype.findLongestMatch = function(alo, ahi, blo, bhi) {
|
|
/*
|
|
Find longest matching block in a[alo...ahi] and b[blo...bhi].
|
|
|
|
If isjunk is not defined:
|
|
|
|
Return [i,j,k] such that a[i...i+k] is equal to b[j...j+k], where
|
|
alo <= i <= i+k <= ahi
|
|
blo <= j <= j+k <= bhi
|
|
and for all [i',j',k'] meeting those conditions,
|
|
k >= k'
|
|
i <= i'
|
|
and if i == i', j <= j'
|
|
|
|
In other words, of all maximal matching blocks, return one that
|
|
starts earliest in a, and of all those maximal matching blocks that
|
|
start earliest in a, return the one that starts earliest in b.
|
|
|
|
>>> isjunk = (x) -> x is ' '
|
|
>>> s = new SequenceMatcher(isjunk, ' abcd', 'abcd abcd')
|
|
>>> s.findLongestMatch(0, 5, 0, 9)
|
|
[1, 0, 4]
|
|
|
|
>>> s = new SequenceMatcher(null, 'ab', 'c')
|
|
>>> s.findLongestMatch(0, 2, 0, 1)
|
|
[0, 0, 0]
|
|
*/
|
|
|
|
var a, b, b2j, besti, bestj, bestsize, i, isbjunk, j, j2len, k, newj2len, _i, _j, _len, _ref, _ref1, _ref2, _ref3, _ref4, _ref5
|
|
_ref = [this.a, this.b, this.b2j, this.isbjunk], a = _ref[0], b = _ref[1], b2j = _ref[2], isbjunk = _ref[3]
|
|
_ref1 = [alo, blo, 0], besti = _ref1[0], bestj = _ref1[1], bestsize = _ref1[2]
|
|
j2len = {}
|
|
for (i = _i = alo; alo <= ahi ? _i < ahi : _i > ahi; i = alo <= ahi ? ++_i : --_i) {
|
|
newj2len = {}
|
|
_ref2 = (_has(b2j, a[i]) ? b2j[a[i]] : [])
|
|
for (_j = 0, _len = _ref2.length; _j < _len; _j++) {
|
|
j = _ref2[_j]
|
|
if (j < blo) {
|
|
continue
|
|
}
|
|
if (j >= bhi) {
|
|
break
|
|
}
|
|
k = newj2len[j] = (j2len[j - 1] || 0) + 1
|
|
if (k > bestsize) {
|
|
_ref3 = [i - k + 1, j - k + 1, k], besti = _ref3[0], bestj = _ref3[1], bestsize = _ref3[2]
|
|
}
|
|
}
|
|
j2len = newj2len
|
|
}
|
|
while (besti > alo && bestj > blo && !isbjunk(b[bestj - 1]) && a[besti - 1] === b[bestj - 1]) {
|
|
_ref4 = [besti - 1, bestj - 1, bestsize + 1], besti = _ref4[0], bestj = _ref4[1], bestsize = _ref4[2]
|
|
}
|
|
while (besti + bestsize < ahi && bestj + bestsize < bhi && !isbjunk(b[bestj + bestsize]) && a[besti + bestsize] === b[bestj + bestsize]) {
|
|
bestsize++
|
|
}
|
|
while (besti > alo && bestj > blo && isbjunk(b[bestj - 1]) && a[besti - 1] === b[bestj - 1]) {
|
|
_ref5 = [besti - 1, bestj - 1, bestsize + 1], besti = _ref5[0], bestj = _ref5[1], bestsize = _ref5[2]
|
|
}
|
|
while (besti + bestsize < ahi && bestj + bestsize < bhi && isbjunk(b[bestj + bestsize]) && a[besti + bestsize] === b[bestj + bestsize]) {
|
|
bestsize++
|
|
}
|
|
return [besti, bestj, bestsize]
|
|
}
|
|
|
|
SequenceMatcher.prototype.getMatchingBlocks = function() {
|
|
/*
|
|
Return list of triples describing matching subsequences.
|
|
|
|
Each triple is of the form [i, j, n], and means that
|
|
a[i...i+n] == b[j...j+n]. The triples are monotonically increasing in
|
|
i and in j. it's also guaranteed that if
|
|
[i, j, n] and [i', j', n'] are adjacent triples in the list, and
|
|
the second is not the last triple in the list, then i+n != i' or
|
|
j+n != j'. IOW, adjacent triples never describe adjacent equal
|
|
blocks.
|
|
|
|
The last triple is a dummy, [a.length, b.length, 0], and is the only
|
|
triple with n==0.
|
|
|
|
>>> s = new SequenceMatcher(null, 'abxcd', 'abcd')
|
|
>>> s.getMatchingBlocks()
|
|
[[0, 0, 2], [3, 2, 2], [5, 4, 0]]
|
|
*/
|
|
|
|
var ahi, alo, bhi, blo, i, i1, i2, j, j1, j2, k, k1, k2, la, lb, matchingBlocks, nonAdjacent, queue, x, _i, _len, _ref, _ref1, _ref2, _ref3, _ref4
|
|
if (this.matchingBlocks) {
|
|
return this.matchingBlocks
|
|
}
|
|
_ref = [this.a.length, this.b.length], la = _ref[0], lb = _ref[1]
|
|
queue = [[0, la, 0, lb]]
|
|
matchingBlocks = []
|
|
while (queue.length) {
|
|
_ref1 = queue.pop(), alo = _ref1[0], ahi = _ref1[1], blo = _ref1[2], bhi = _ref1[3]
|
|
_ref2 = x = this.findLongestMatch(alo, ahi, blo, bhi), i = _ref2[0], j = _ref2[1], k = _ref2[2]
|
|
if (k) {
|
|
matchingBlocks.push(x)
|
|
if (alo < i && blo < j) {
|
|
queue.push([alo, i, blo, j])
|
|
}
|
|
if (i + k < ahi && j + k < bhi) {
|
|
queue.push([i + k, ahi, j + k, bhi])
|
|
}
|
|
}
|
|
}
|
|
matchingBlocks.sort(_arrayCmp)
|
|
i1 = j1 = k1 = 0
|
|
nonAdjacent = []
|
|
for (_i = 0, _len = matchingBlocks.length; _i < _len; _i++) {
|
|
_ref3 = matchingBlocks[_i], i2 = _ref3[0], j2 = _ref3[1], k2 = _ref3[2]
|
|
if (i1 + k1 === i2 && j1 + k1 === j2) {
|
|
k1 += k2
|
|
} else {
|
|
if (k1) {
|
|
nonAdjacent.push([i1, j1, k1])
|
|
}
|
|
_ref4 = [i2, j2, k2], i1 = _ref4[0], j1 = _ref4[1], k1 = _ref4[2]
|
|
}
|
|
}
|
|
if (k1) {
|
|
nonAdjacent.push([i1, j1, k1])
|
|
}
|
|
nonAdjacent.push([la, lb, 0])
|
|
return this.matchingBlocks = nonAdjacent
|
|
}
|
|
|
|
SequenceMatcher.prototype.getOpcodes = function() {
|
|
/*
|
|
Return list of 5-tuples describing how to turn a into b.
|
|
|
|
Each tuple is of the form [tag, i1, i2, j1, j2]. The first tuple
|
|
has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the
|
|
tuple preceding it, and likewise for j1 == the previous j2.
|
|
|
|
The tags are strings, with these meanings:
|
|
|
|
'replace': a[i1...i2] should be replaced by b[j1...j2]
|
|
'delete': a[i1...i2] should be deleted.
|
|
Note that j1==j2 in this case.
|
|
'insert': b[j1...j2] should be inserted at a[i1...i1].
|
|
Note that i1==i2 in this case.
|
|
'equal': a[i1...i2] == b[j1...j2]
|
|
|
|
>>> s = new SequenceMatcher(null, 'qabxcd', 'abycdf')
|
|
>>> s.getOpcodes()
|
|
[ [ 'delete' , 0 , 1 , 0 , 0 ] ,
|
|
[ 'equal' , 1 , 3 , 0 , 2 ] ,
|
|
[ 'replace' , 3 , 4 , 2 , 3 ] ,
|
|
[ 'equal' , 4 , 6 , 3 , 5 ] ,
|
|
[ 'insert' , 6 , 6 , 5 , 6 ] ]
|
|
*/
|
|
|
|
var ai, answer, bj, i, j, size, tag, _i, _len, _ref, _ref1, _ref2
|
|
if (this.opcodes) {
|
|
return this.opcodes
|
|
}
|
|
i = j = 0
|
|
this.opcodes = answer = []
|
|
_ref = this.getMatchingBlocks()
|
|
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
|
|
_ref1 = _ref[_i], ai = _ref1[0], bj = _ref1[1], size = _ref1[2]
|
|
tag = ''
|
|
if (i < ai && j < bj) {
|
|
tag = 'replace'
|
|
} else if (i < ai) {
|
|
tag = 'delete'
|
|
} else if (j < bj) {
|
|
tag = 'insert'
|
|
}
|
|
if (tag) {
|
|
answer.push([tag, i, ai, j, bj])
|
|
}
|
|
_ref2 = [ai + size, bj + size], i = _ref2[0], j = _ref2[1]
|
|
if (size) {
|
|
answer.push(['equal', ai, i, bj, j])
|
|
}
|
|
}
|
|
return answer
|
|
}
|
|
|
|
SequenceMatcher.prototype.getGroupedOpcodes = function(n) {
|
|
var codes, group, groups, i1, i2, j1, j2, nn, tag, _i, _len, _ref, _ref1, _ref2, _ref3
|
|
if (n == null) {
|
|
n = 3
|
|
}
|
|
/*
|
|
Isolate change clusters by eliminating ranges with no changes.
|
|
|
|
Return a list groups with upto n lines of context.
|
|
Each group is in the same format as returned by get_opcodes().
|
|
|
|
>>> a = [1...40].map(String)
|
|
>>> b = a.slice()
|
|
>>> b[8...8] = 'i'
|
|
>>> b[20] += 'x'
|
|
>>> b[23...28] = []
|
|
>>> b[30] += 'y'
|
|
>>> s = new SequenceMatcher(null, a, b)
|
|
>>> s.getGroupedOpcodes()
|
|
[ [ [ 'equal' , 5 , 8 , 5 , 8 ],
|
|
[ 'insert' , 8 , 8 , 8 , 9 ],
|
|
[ 'equal' , 8 , 11 , 9 , 12 ] ],
|
|
[ [ 'equal' , 16 , 19 , 17 , 20 ],
|
|
[ 'replace' , 19 , 20 , 20 , 21 ],
|
|
[ 'equal' , 20 , 22 , 21 , 23 ],
|
|
[ 'delete' , 22 , 27 , 23 , 23 ],
|
|
[ 'equal' , 27 , 30 , 23 , 26 ] ],
|
|
[ [ 'equal' , 31 , 34 , 27 , 30 ],
|
|
[ 'replace' , 34 , 35 , 30 , 31 ],
|
|
[ 'equal' , 35 , 38 , 31 , 34 ] ] ]
|
|
*/
|
|
|
|
codes = this.getOpcodes()
|
|
if (!codes.length) {
|
|
codes = [['equal', 0, 1, 0, 1]]
|
|
}
|
|
if (codes[0][0] === 'equal') {
|
|
_ref = codes[0], tag = _ref[0], i1 = _ref[1], i2 = _ref[2], j1 = _ref[3], j2 = _ref[4]
|
|
codes[0] = [tag, max(i1, i2 - n), i2, max(j1, j2 - n), j2]
|
|
}
|
|
if (codes[codes.length - 1][0] === 'equal') {
|
|
_ref1 = codes[codes.length - 1], tag = _ref1[0], i1 = _ref1[1], i2 = _ref1[2], j1 = _ref1[3], j2 = _ref1[4]
|
|
codes[codes.length - 1] = [tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)]
|
|
}
|
|
nn = n + n
|
|
groups = []
|
|
group = []
|
|
for (_i = 0, _len = codes.length; _i < _len; _i++) {
|
|
_ref2 = codes[_i], tag = _ref2[0], i1 = _ref2[1], i2 = _ref2[2], j1 = _ref2[3], j2 = _ref2[4]
|
|
if (tag === 'equal' && i2 - i1 > nn) {
|
|
group.push([tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)])
|
|
groups.push(group)
|
|
group = []
|
|
_ref3 = [max(i1, i2 - n), max(j1, j2 - n)], i1 = _ref3[0], j1 = _ref3[1]
|
|
}
|
|
group.push([tag, i1, i2, j1, j2])
|
|
}
|
|
if (group.length && !(group.length === 1 && group[0][0] === 'equal')) {
|
|
groups.push(group)
|
|
}
|
|
return groups
|
|
}
|
|
|
|
SequenceMatcher.prototype.ratio = function() {
|
|
/*
|
|
Return a measure of the sequences' similarity (float in [0,1]).
|
|
|
|
Where T is the total number of elements in both sequences, and
|
|
M is the number of matches, this is 2.0*M / T.
|
|
Note that this is 1 if the sequences are identical, and 0 if
|
|
they have nothing in common.
|
|
|
|
.ratio() is expensive to compute if you haven't already computed
|
|
.getMatchingBlocks() or .getOpcodes(), in which case you may
|
|
want to try .quickRatio() or .realQuickRatio() first to get an
|
|
upper bound.
|
|
|
|
>>> s = new SequenceMatcher(null, 'abcd', 'bcde')
|
|
>>> s.ratio()
|
|
0.75
|
|
>>> s.quickRatio()
|
|
0.75
|
|
>>> s.realQuickRatio()
|
|
1.0
|
|
*/
|
|
|
|
var match, matches, _i, _len, _ref
|
|
matches = 0
|
|
_ref = this.getMatchingBlocks()
|
|
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
|
|
match = _ref[_i]
|
|
matches += match[2]
|
|
}
|
|
return _calculateRatio(matches, this.a.length + this.b.length)
|
|
}
|
|
|
|
SequenceMatcher.prototype.quickRatio = function() {
|
|
/*
|
|
Return an upper bound on ratio() relatively quickly.
|
|
|
|
This isn't defined beyond that it is an upper bound on .ratio(), and
|
|
is faster to compute.
|
|
*/
|
|
|
|
var avail, elt, fullbcount, matches, numb, _i, _j, _len, _len1, _ref, _ref1
|
|
if (!this.fullbcount) {
|
|
this.fullbcount = fullbcount = {}
|
|
_ref = this.b
|
|
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
|
|
elt = _ref[_i]
|
|
fullbcount[elt] = (fullbcount[elt] || 0) + 1
|
|
}
|
|
}
|
|
fullbcount = this.fullbcount
|
|
avail = {}
|
|
matches = 0
|
|
_ref1 = this.a
|
|
for (_j = 0, _len1 = _ref1.length; _j < _len1; _j++) {
|
|
elt = _ref1[_j]
|
|
if (_has(avail, elt)) {
|
|
numb = avail[elt]
|
|
} else {
|
|
numb = fullbcount[elt] || 0
|
|
}
|
|
avail[elt] = numb - 1
|
|
if (numb > 0) {
|
|
matches++
|
|
}
|
|
}
|
|
return _calculateRatio(matches, this.a.length + this.b.length)
|
|
}
|
|
|
|
SequenceMatcher.prototype.realQuickRatio = function() {
|
|
/*
|
|
Return an upper bound on ratio() very quickly.
|
|
|
|
This isn't defined beyond that it is an upper bound on .ratio(), and
|
|
is faster to compute than either .ratio() or .quickRatio().
|
|
*/
|
|
|
|
var la, lb, _ref
|
|
_ref = [this.a.length, this.b.length], la = _ref[0], lb = _ref[1]
|
|
return _calculateRatio(min(la, lb), la + lb)
|
|
}
|
|
|
|
return SequenceMatcher
|
|
|
|
})()
|
|
|
|
getCloseMatches = function(word, possibilities, n, cutoff) {
|
|
var result, s, score, x, _i, _j, _len, _len1, _ref, _results
|
|
if (n == null) {
|
|
n = 3
|
|
}
|
|
if (cutoff == null) {
|
|
cutoff = 0.6
|
|
}
|
|
/*
|
|
Use SequenceMatcher to return list of the best "good enough" matches.
|
|
|
|
word is a sequence for which close matches are desired (typically a
|
|
string).
|
|
|
|
possibilities is a list of sequences against which to match word
|
|
(typically a list of strings).
|
|
|
|
Optional arg n (default 3) is the maximum number of close matches to
|
|
return. n must be > 0.
|
|
|
|
Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities
|
|
that don't score at least that similar to word are ignored.
|
|
|
|
The best (no more than n) matches among the possibilities are returned
|
|
in a list, sorted by similarity score, most similar first.
|
|
|
|
>>> getCloseMatches('appel', ['ape', 'apple', 'peach', 'puppy'])
|
|
['apple', 'ape']
|
|
>>> KEYWORDS = require('coffee-script').RESERVED
|
|
>>> getCloseMatches('wheel', KEYWORDS)
|
|
['when', 'while']
|
|
>>> getCloseMatches('accost', KEYWORDS)
|
|
['const']
|
|
*/
|
|
|
|
if (!(n > 0)) {
|
|
throw new Error("n must be > 0: (" + n + ")")
|
|
}
|
|
if (!((0.0 <= cutoff && cutoff <= 1.0))) {
|
|
throw new Error("cutoff must be in [0.0, 1.0]: (" + cutoff + ")")
|
|
}
|
|
result = []
|
|
s = new SequenceMatcher()
|
|
s.setSeq2(word)
|
|
for (_i = 0, _len = possibilities.length; _i < _len; _i++) {
|
|
x = possibilities[_i]
|
|
s.setSeq1(x)
|
|
if (s.realQuickRatio() >= cutoff && s.quickRatio() >= cutoff && s.ratio() >= cutoff) {
|
|
result.push([s.ratio(), x])
|
|
}
|
|
}
|
|
result = Heap.nlargest(result, n, _arrayCmp)
|
|
_results = []
|
|
for (_j = 0, _len1 = result.length; _j < _len1; _j++) {
|
|
_ref = result[_j], score = _ref[0], x = _ref[1]
|
|
_results.push(x)
|
|
}
|
|
return _results
|
|
}
|
|
|
|
_countLeading = function(line, ch) {
|
|
/*
|
|
Return number of `ch` characters at the start of `line`.
|
|
|
|
>>> _countLeading(' abc', ' ')
|
|
3
|
|
*/
|
|
|
|
var i, n, _ref
|
|
_ref = [0, line.length], i = _ref[0], n = _ref[1]
|
|
while (i < n && line[i] === ch) {
|
|
i++
|
|
}
|
|
return i
|
|
}
|
|
|
|
Differ = (function() {
|
|
|
|
/*
|
|
Differ is a class for comparing sequences of lines of text, and
|
|
producing human-readable differences or deltas. Differ uses
|
|
SequenceMatcher both to compare sequences of lines, and to compare
|
|
sequences of characters within similar (near-matching) lines.
|
|
|
|
Each line of a Differ delta begins with a two-letter code:
|
|
|
|
'- ' line unique to sequence 1
|
|
'+ ' line unique to sequence 2
|
|
' ' line common to both sequences
|
|
'? ' line not present in either input sequence
|
|
|
|
Lines beginning with '? ' attempt to guide the eye to intraline
|
|
differences, and were not present in either input sequence. These lines
|
|
can be confusing if the sequences contain tab characters.
|
|
|
|
Note that Differ makes no claim to produce a *minimal* diff. To the
|
|
contrary, minimal diffs are often counter-intuitive, because they synch
|
|
up anywhere possible, sometimes accidental matches 100 pages apart.
|
|
Restricting synch points to contiguous matches preserves some notion of
|
|
locality, at the occasional cost of producing a longer diff.
|
|
|
|
Example: Comparing two texts.
|
|
|
|
>>> text1 = ['1. Beautiful is better than ugly.\n',
|
|
... '2. Explicit is better than implicit.\n',
|
|
... '3. Simple is better than complex.\n',
|
|
... '4. Complex is better than complicated.\n']
|
|
>>> text1.length
|
|
4
|
|
>>> text2 = ['1. Beautiful is better than ugly.\n',
|
|
... '3. Simple is better than complex.\n',
|
|
... '4. Complicated is better than complex.\n',
|
|
... '5. Flat is better than nested.\n']
|
|
|
|
Next we instantiate a Differ object:
|
|
|
|
>>> d = new Differ()
|
|
|
|
Note that when instantiating a Differ object we may pass functions to
|
|
filter out line and character 'junk'.
|
|
|
|
Finally, we compare the two:
|
|
|
|
>>> result = d.compare(text1, text2)
|
|
[ ' 1. Beautiful is better than ugly.\n',
|
|
'- 2. Explicit is better than implicit.\n',
|
|
'- 3. Simple is better than complex.\n',
|
|
'+ 3. Simple is better than complex.\n',
|
|
'? ++\n',
|
|
'- 4. Complex is better than complicated.\n',
|
|
'? ^ ---- ^\n',
|
|
'+ 4. Complicated is better than complex.\n',
|
|
'? ++++ ^ ^\n',
|
|
'+ 5. Flat is better than nested.\n' ]
|
|
|
|
Methods:
|
|
|
|
constructor(linejunk=null, charjunk=null)
|
|
Construct a text differencer, with optional filters.
|
|
compare(a, b)
|
|
Compare two sequences of lines; generate the resulting delta.
|
|
*/
|
|
|
|
|
|
function Differ(linejunk, charjunk) {
|
|
this.linejunk = linejunk
|
|
this.charjunk = charjunk
|
|
/*
|
|
Construct a text differencer, with optional filters.
|
|
|
|
The two optional keyword parameters are for filter functions:
|
|
|
|
- `linejunk`: A function that should accept a single string argument,
|
|
and return true iff the string is junk. The module-level function
|
|
`IS_LINE_JUNK` may be used to filter out lines without visible
|
|
characters, except for at most one splat ('#'). It is recommended
|
|
to leave linejunk null.
|
|
|
|
- `charjunk`: A function that should accept a string of length 1. The
|
|
module-level function `IS_CHARACTER_JUNK` may be used to filter out
|
|
whitespace characters (a blank or tab; **note**: bad idea to include
|
|
newline in this!). Use of IS_CHARACTER_JUNK is recommended.
|
|
*/
|
|
|
|
}
|
|
|
|
Differ.prototype.compare = function(a, b) {
|
|
/*
|
|
Compare two sequences of lines; generate the resulting delta.
|
|
|
|
Each sequence must contain individual single-line strings ending with
|
|
newlines. Such sequences can be obtained from the `readlines()` method
|
|
of file-like objects. The delta generated also consists of newline-
|
|
terminated strings, ready to be printed as-is via the writeline()
|
|
method of a file-like object.
|
|
|
|
Example:
|
|
|
|
>>> d = new Differ
|
|
>>> d.compare(['one\n', 'two\n', 'three\n'],
|
|
... ['ore\n', 'tree\n', 'emu\n'])
|
|
[ '- one\n',
|
|
'? ^\n',
|
|
'+ ore\n',
|
|
'? ^\n',
|
|
'- two\n',
|
|
'- three\n',
|
|
'? -\n',
|
|
'+ tree\n',
|
|
'+ emu\n' ]
|
|
*/
|
|
|
|
var ahi, alo, bhi, blo, cruncher, g, line, lines, tag, _i, _j, _len, _len1, _ref, _ref1
|
|
cruncher = new SequenceMatcher(this.linejunk, a, b)
|
|
lines = []
|
|
_ref = cruncher.getOpcodes()
|
|
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
|
|
_ref1 = _ref[_i], tag = _ref1[0], alo = _ref1[1], ahi = _ref1[2], blo = _ref1[3], bhi = _ref1[4]
|
|
switch (tag) {
|
|
case 'replace':
|
|
g = this._fancyReplace(a, alo, ahi, b, blo, bhi)
|
|
break
|
|
case 'delete':
|
|
g = this._dump('-', a, alo, ahi)
|
|
break
|
|
case 'insert':
|
|
g = this._dump('+', b, blo, bhi)
|
|
break
|
|
case 'equal':
|
|
g = this._dump(' ', a, alo, ahi)
|
|
break
|
|
default:
|
|
throw new Error("unknow tag (" + tag + ")")
|
|
}
|
|
for (_j = 0, _len1 = g.length; _j < _len1; _j++) {
|
|
line = g[_j]
|
|
lines.push(line)
|
|
}
|
|
}
|
|
return lines
|
|
}
|
|
|
|
Differ.prototype._dump = function(tag, x, lo, hi) {
|
|
/*
|
|
Generate comparison results for a same-tagged range.
|
|
*/
|
|
|
|
var i, _i, _results
|
|
_results = []
|
|
for (i = _i = lo; lo <= hi ? _i < hi : _i > hi; i = lo <= hi ? ++_i : --_i) {
|
|
_results.push("" + tag + " " + x[i])
|
|
}
|
|
return _results
|
|
}
|
|
|
|
Differ.prototype._plainReplace = function(a, alo, ahi, b, blo, bhi) {
|
|
var first, g, line, lines, second, _i, _j, _len, _len1, _ref
|
|
if (bhi - blo < ahi - alo) {
|
|
first = this._dump('+', b, blo, bhi)
|
|
second = this._dump('-', a, alo, ahi)
|
|
} else {
|
|
first = this._dump('-', a, alo, ahi)
|
|
second = this._dump('+', b, blo, bhi)
|
|
}
|
|
lines = []
|
|
_ref = [first, second]
|
|
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
|
|
g = _ref[_i]
|
|
for (_j = 0, _len1 = g.length; _j < _len1; _j++) {
|
|
line = g[_j]
|
|
lines.push(line)
|
|
}
|
|
}
|
|
return lines
|
|
}
|
|
|
|
Differ.prototype._fancyReplace = function(a, alo, ahi, b, blo, bhi) {
|
|
/*
|
|
When replacing one block of lines with another, search the blocks
|
|
for *similar* lines; the best-matching pair (if any) is used as a
|
|
synch point, and intraline difference marking is done on the
|
|
similar pair. Lots of work, but often worth it.
|
|
|
|
Example:
|
|
>>> d = new Differ
|
|
>>> d._fancyReplace(['abcDefghiJkl\n'], 0, 1,
|
|
... ['abcdefGhijkl\n'], 0, 1)
|
|
[ '- abcDefghiJkl\n',
|
|
'? ^ ^ ^\n',
|
|
'+ abcdefGhijkl\n',
|
|
'? ^ ^ ^\n' ]
|
|
*/
|
|
|
|
var aelt, ai, ai1, ai2, atags, belt, bestRatio, besti, bestj, bj, bj1, bj2, btags, cruncher, cutoff, eqi, eqj, i, j, la, lb, line, lines, tag, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _m, _n, _o, _ref, _ref1, _ref10, _ref11, _ref12, _ref2, _ref3, _ref4, _ref5, _ref6, _ref7, _ref8, _ref9
|
|
_ref = [0.74, 0.75], bestRatio = _ref[0], cutoff = _ref[1]
|
|
cruncher = new SequenceMatcher(this.charjunk)
|
|
_ref1 = [null, null], eqi = _ref1[0], eqj = _ref1[1]
|
|
lines = []
|
|
for (j = _i = blo; blo <= bhi ? _i < bhi : _i > bhi; j = blo <= bhi ? ++_i : --_i) {
|
|
bj = b[j]
|
|
cruncher.setSeq2(bj)
|
|
for (i = _j = alo; alo <= ahi ? _j < ahi : _j > ahi; i = alo <= ahi ? ++_j : --_j) {
|
|
ai = a[i]
|
|
if (ai === bj) {
|
|
if (eqi === null) {
|
|
_ref2 = [i, j], eqi = _ref2[0], eqj = _ref2[1]
|
|
}
|
|
continue
|
|
}
|
|
cruncher.setSeq1(ai)
|
|
if (cruncher.realQuickRatio() > bestRatio && cruncher.quickRatio() > bestRatio && cruncher.ratio() > bestRatio) {
|
|
_ref3 = [cruncher.ratio(), i, j], bestRatio = _ref3[0], besti = _ref3[1], bestj = _ref3[2]
|
|
}
|
|
}
|
|
}
|
|
if (bestRatio < cutoff) {
|
|
if (eqi === null) {
|
|
_ref4 = this._plainReplace(a, alo, ahi, b, blo, bhi)
|
|
for (_k = 0, _len = _ref4.length; _k < _len; _k++) {
|
|
line = _ref4[_k]
|
|
lines.push(line)
|
|
}
|
|
return lines
|
|
}
|
|
_ref5 = [eqi, eqj, 1.0], besti = _ref5[0], bestj = _ref5[1], bestRatio = _ref5[2]
|
|
} else {
|
|
eqi = null
|
|
}
|
|
_ref6 = this._fancyHelper(a, alo, besti, b, blo, bestj)
|
|
for (_l = 0, _len1 = _ref6.length; _l < _len1; _l++) {
|
|
line = _ref6[_l]
|
|
lines.push(line)
|
|
}
|
|
_ref7 = [a[besti], b[bestj]], aelt = _ref7[0], belt = _ref7[1]
|
|
if (eqi === null) {
|
|
atags = btags = ''
|
|
cruncher.setSeqs(aelt, belt)
|
|
_ref8 = cruncher.getOpcodes()
|
|
for (_m = 0, _len2 = _ref8.length; _m < _len2; _m++) {
|
|
_ref9 = _ref8[_m], tag = _ref9[0], ai1 = _ref9[1], ai2 = _ref9[2], bj1 = _ref9[3], bj2 = _ref9[4]
|
|
_ref10 = [ai2 - ai1, bj2 - bj1], la = _ref10[0], lb = _ref10[1]
|
|
switch (tag) {
|
|
case 'replace':
|
|
atags += Array(la + 1).join('^')
|
|
btags += Array(lb + 1).join('^')
|
|
break
|
|
case 'delete':
|
|
atags += Array(la + 1).join('-')
|
|
break
|
|
case 'insert':
|
|
btags += Array(lb + 1).join('+')
|
|
break
|
|
case 'equal':
|
|
atags += Array(la + 1).join(' ')
|
|
btags += Array(lb + 1).join(' ')
|
|
break
|
|
default:
|
|
throw new Error("unknow tag (" + tag + ")")
|
|
}
|
|
}
|
|
_ref11 = this._qformat(aelt, belt, atags, btags)
|
|
for (_n = 0, _len3 = _ref11.length; _n < _len3; _n++) {
|
|
line = _ref11[_n]
|
|
lines.push(line)
|
|
}
|
|
} else {
|
|
lines.push(' ' + aelt)
|
|
}
|
|
_ref12 = this._fancyHelper(a, besti + 1, ahi, b, bestj + 1, bhi)
|
|
for (_o = 0, _len4 = _ref12.length; _o < _len4; _o++) {
|
|
line = _ref12[_o]
|
|
lines.push(line)
|
|
}
|
|
return lines
|
|
}
|
|
|
|
Differ.prototype._fancyHelper = function(a, alo, ahi, b, blo, bhi) {
|
|
var g
|
|
g = []
|
|
if (alo < ahi) {
|
|
if (blo < bhi) {
|
|
g = this._fancyReplace(a, alo, ahi, b, blo, bhi)
|
|
} else {
|
|
g = this._dump('-', a, alo, ahi)
|
|
}
|
|
} else if (blo < bhi) {
|
|
g = this._dump('+', b, blo, bhi)
|
|
}
|
|
return g
|
|
}
|
|
|
|
Differ.prototype._qformat = function(aline, bline, atags, btags) {
|
|
/*
|
|
Format "?" output and deal with leading tabs.
|
|
|
|
Example:
|
|
|
|
>>> d = new Differ
|
|
>>> d._qformat('\tabcDefghiJkl\n', '\tabcdefGhijkl\n',
|
|
[ '- \tabcDefghiJkl\n',
|
|
'? \t ^ ^ ^\n',
|
|
'+ \tabcdefGhijkl\n',
|
|
'? \t ^ ^ ^\n' ]
|
|
*/
|
|
|
|
var common, lines
|
|
lines = []
|
|
common = min(_countLeading(aline, '\t'), _countLeading(bline, '\t'))
|
|
common = min(common, _countLeading(atags.slice(0, common), ' '))
|
|
common = min(common, _countLeading(btags.slice(0, common), ' '))
|
|
atags = atags.slice(common).replace(/\s+$/, '')
|
|
btags = btags.slice(common).replace(/\s+$/, '')
|
|
lines.push('- ' + aline)
|
|
if (atags.length) {
|
|
lines.push("? " + (Array(common + 1).join('\t')) + atags + "\n")
|
|
}
|
|
lines.push('+ ' + bline)
|
|
if (btags.length) {
|
|
lines.push("? " + (Array(common + 1).join('\t')) + btags + "\n")
|
|
}
|
|
return lines
|
|
}
|
|
|
|
return Differ
|
|
|
|
})()
|
|
|
|
IS_LINE_JUNK = function(line, pat) {
|
|
if (pat == null) {
|
|
pat = /^\s*#?\s*$/
|
|
}
|
|
/*
|
|
Return 1 for ignorable line: iff `line` is blank or contains a single '#'.
|
|
|
|
Examples:
|
|
|
|
>>> IS_LINE_JUNK('\n')
|
|
true
|
|
>>> IS_LINE_JUNK(' # \n')
|
|
true
|
|
>>> IS_LINE_JUNK('hello\n')
|
|
false
|
|
*/
|
|
|
|
return pat.test(line)
|
|
}
|
|
|
|
IS_CHARACTER_JUNK = function(ch, ws) {
|
|
if (ws == null) {
|
|
ws = ' \t'
|
|
}
|
|
/*
|
|
Return 1 for ignorable character: iff `ch` is a space or tab.
|
|
|
|
Examples:
|
|
>>> IS_CHARACTER_JUNK(' ').should.be.true
|
|
true
|
|
>>> IS_CHARACTER_JUNK('\t').should.be.true
|
|
true
|
|
>>> IS_CHARACTER_JUNK('\n').should.be.false
|
|
false
|
|
>>> IS_CHARACTER_JUNK('x').should.be.false
|
|
false
|
|
*/
|
|
|
|
return __indexOf.call(ws, ch) >= 0
|
|
}
|
|
|
|
_formatRangeUnified = function(start, stop) {
|
|
/*
|
|
Convert range to the "ed" format'
|
|
*/
|
|
|
|
var beginning, length
|
|
beginning = start + 1
|
|
length = stop - start
|
|
if (length === 1) {
|
|
return "" + beginning
|
|
}
|
|
if (!length) {
|
|
beginning--
|
|
}
|
|
return "" + beginning + "," + length
|
|
}
|
|
|
|
unifiedDiff = function(a, b, _arg) {
|
|
var file1Range, file2Range, first, fromdate, fromfile, fromfiledate, group, i1, i2, j1, j2, last, line, lines, lineterm, n, started, tag, todate, tofile, tofiledate, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _m, _ref, _ref1, _ref2, _ref3, _ref4, _ref5, _ref6
|
|
_ref = _arg != null ? _arg : {}, fromfile = _ref.fromfile, tofile = _ref.tofile, fromfiledate = _ref.fromfiledate, tofiledate = _ref.tofiledate, n = _ref.n, lineterm = _ref.lineterm
|
|
/*
|
|
Compare two sequences of lines; generate the delta as a unified diff.
|
|
|
|
Unified diffs are a compact way of showing line changes and a few
|
|
lines of context. The number of context lines is set by 'n' which
|
|
defaults to three.
|
|
|
|
By default, the diff control lines (those with ---, +++, or @@) are
|
|
created with a trailing newline.
|
|
|
|
For inputs that do not have trailing newlines, set the lineterm
|
|
argument to "" so that the output will be uniformly newline free.
|
|
|
|
The unidiff format normally has a header for filenames and modification
|
|
times. Any or all of these may be specified using strings for
|
|
'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
|
|
The modification times are normally expressed in the ISO 8601 format.
|
|
|
|
Example:
|
|
|
|
>>> unifiedDiff('one two three four'.split(' '),
|
|
... 'zero one tree four'.split(' '), {
|
|
... fromfile: 'Original'
|
|
... tofile: 'Current',
|
|
... fromfiledate: '2005-01-26 23:30:50',
|
|
... tofiledate: '2010-04-02 10:20:52',
|
|
... lineterm: ''
|
|
... })
|
|
[ '--- Original\t2005-01-26 23:30:50',
|
|
'+++ Current\t2010-04-02 10:20:52',
|
|
'@@ -1,4 +1,4 @@',
|
|
'+zero',
|
|
' one',
|
|
'-two',
|
|
'-three',
|
|
'+tree',
|
|
' four' ]
|
|
*/
|
|
|
|
if (fromfile == null) {
|
|
fromfile = ''
|
|
}
|
|
if (tofile == null) {
|
|
tofile = ''
|
|
}
|
|
if (fromfiledate == null) {
|
|
fromfiledate = ''
|
|
}
|
|
if (tofiledate == null) {
|
|
tofiledate = ''
|
|
}
|
|
if (n == null) {
|
|
n = 3
|
|
}
|
|
if (lineterm == null) {
|
|
lineterm = '\n'
|
|
}
|
|
lines = []
|
|
started = false
|
|
_ref1 = (new SequenceMatcher(null, a, b)).getGroupedOpcodes()
|
|
for (_i = 0, _len = _ref1.length; _i < _len; _i++) {
|
|
group = _ref1[_i]
|
|
if (!started) {
|
|
started = true
|
|
fromdate = fromfiledate ? "\t" + fromfiledate : ''
|
|
todate = tofiledate ? "\t" + tofiledate : ''
|
|
lines.push("--- " + fromfile + fromdate + lineterm)
|
|
lines.push("+++ " + tofile + todate + lineterm)
|
|
}
|
|
_ref2 = [group[0], group[group.length - 1]], first = _ref2[0], last = _ref2[1]
|
|
file1Range = _formatRangeUnified(first[1], last[2])
|
|
file2Range = _formatRangeUnified(first[3], last[4])
|
|
lines.push("@@ -" + file1Range + " +" + file2Range + " @@" + lineterm)
|
|
for (_j = 0, _len1 = group.length; _j < _len1; _j++) {
|
|
_ref3 = group[_j], tag = _ref3[0], i1 = _ref3[1], i2 = _ref3[2], j1 = _ref3[3], j2 = _ref3[4]
|
|
if (tag === 'equal') {
|
|
_ref4 = a.slice(i1, i2)
|
|
for (_k = 0, _len2 = _ref4.length; _k < _len2; _k++) {
|
|
line = _ref4[_k]
|
|
lines.push(' ' + line)
|
|
}
|
|
continue
|
|
}
|
|
if (tag === 'replace' || tag === 'delete') {
|
|
_ref5 = a.slice(i1, i2)
|
|
for (_l = 0, _len3 = _ref5.length; _l < _len3; _l++) {
|
|
line = _ref5[_l]
|
|
lines.push('-' + line)
|
|
}
|
|
}
|
|
if (tag === 'replace' || tag === 'insert') {
|
|
_ref6 = b.slice(j1, j2)
|
|
for (_m = 0, _len4 = _ref6.length; _m < _len4; _m++) {
|
|
line = _ref6[_m]
|
|
lines.push('+' + line)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return lines
|
|
}
|
|
|
|
_formatRangeContext = function(start, stop) {
|
|
/*
|
|
Convert range to the "ed" format'
|
|
*/
|
|
|
|
var beginning, length
|
|
beginning = start + 1
|
|
length = stop - start
|
|
if (!length) {
|
|
beginning--
|
|
}
|
|
if (length <= 1) {
|
|
return "" + beginning
|
|
}
|
|
return "" + beginning + "," + (beginning + length - 1)
|
|
}
|
|
|
|
contextDiff = function(a, b, _arg) {
|
|
var file1Range, file2Range, first, fromdate, fromfile, fromfiledate, group, i1, i2, j1, j2, last, line, lines, lineterm, n, prefix, started, tag, todate, tofile, tofiledate, _, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _m, _ref, _ref1, _ref2, _ref3, _ref4, _ref5, _ref6
|
|
_ref = _arg != null ? _arg : {}, fromfile = _ref.fromfile, tofile = _ref.tofile, fromfiledate = _ref.fromfiledate, tofiledate = _ref.tofiledate, n = _ref.n, lineterm = _ref.lineterm
|
|
/*
|
|
Compare two sequences of lines; generate the delta as a context diff.
|
|
|
|
Context diffs are a compact way of showing line changes and a few
|
|
lines of context. The number of context lines is set by 'n' which
|
|
defaults to three.
|
|
|
|
By default, the diff control lines (those with *** or ---) are
|
|
created with a trailing newline. This is helpful so that inputs
|
|
created from file.readlines() result in diffs that are suitable for
|
|
file.writelines() since both the inputs and outputs have trailing
|
|
newlines.
|
|
|
|
For inputs that do not have trailing newlines, set the lineterm
|
|
argument to "" so that the output will be uniformly newline free.
|
|
|
|
The context diff format normally has a header for filenames and
|
|
modification times. Any or all of these may be specified using
|
|
strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
|
|
The modification times are normally expressed in the ISO 8601 format.
|
|
If not specified, the strings default to blanks.
|
|
|
|
Example:
|
|
>>> a = ['one\n', 'two\n', 'three\n', 'four\n']
|
|
>>> b = ['zero\n', 'one\n', 'tree\n', 'four\n']
|
|
>>> contextDiff(a, b, {fromfile: 'Original', tofile: 'Current'})
|
|
[ '*** Original\n',
|
|
'--- Current\n',
|
|
'***************\n',
|
|
'*** 1,4 ****\n',
|
|
' one\n',
|
|
'! two\n',
|
|
'! three\n',
|
|
' four\n',
|
|
'--- 1,4 ----\n',
|
|
'+ zero\n',
|
|
' one\n',
|
|
'! tree\n',
|
|
' four\n' ]
|
|
*/
|
|
|
|
if (fromfile == null) {
|
|
fromfile = ''
|
|
}
|
|
if (tofile == null) {
|
|
tofile = ''
|
|
}
|
|
if (fromfiledate == null) {
|
|
fromfiledate = ''
|
|
}
|
|
if (tofiledate == null) {
|
|
tofiledate = ''
|
|
}
|
|
if (n == null) {
|
|
n = 3
|
|
}
|
|
if (lineterm == null) {
|
|
lineterm = '\n'
|
|
}
|
|
prefix = {
|
|
insert: '+ ',
|
|
"delete": '- ',
|
|
replace: '! ',
|
|
equal: ' '
|
|
}
|
|
started = false
|
|
lines = []
|
|
_ref1 = (new SequenceMatcher(null, a, b)).getGroupedOpcodes()
|
|
for (_i = 0, _len = _ref1.length; _i < _len; _i++) {
|
|
group = _ref1[_i]
|
|
if (!started) {
|
|
started = true
|
|
fromdate = fromfiledate ? "\t" + fromfiledate : ''
|
|
todate = tofiledate ? "\t" + tofiledate : ''
|
|
lines.push("*** " + fromfile + fromdate + lineterm)
|
|
lines.push("--- " + tofile + todate + lineterm)
|
|
_ref2 = [group[0], group[group.length - 1]], first = _ref2[0], last = _ref2[1]
|
|
lines.push('***************' + lineterm)
|
|
file1Range = _formatRangeContext(first[1], last[2])
|
|
lines.push("*** " + file1Range + " ****" + lineterm)
|
|
if (_any((function() {
|
|
var _j, _len1, _ref3, _results
|
|
_results = []
|
|
for (_j = 0, _len1 = group.length; _j < _len1; _j++) {
|
|
_ref3 = group[_j], tag = _ref3[0], _ = _ref3[1], _ = _ref3[2], _ = _ref3[3], _ = _ref3[4]
|
|
_results.push(tag === 'replace' || tag === 'delete')
|
|
}
|
|
return _results
|
|
})())) {
|
|
for (_j = 0, _len1 = group.length; _j < _len1; _j++) {
|
|
_ref3 = group[_j], tag = _ref3[0], i1 = _ref3[1], i2 = _ref3[2], _ = _ref3[3], _ = _ref3[4]
|
|
if (tag !== 'insert') {
|
|
_ref4 = a.slice(i1, i2)
|
|
for (_k = 0, _len2 = _ref4.length; _k < _len2; _k++) {
|
|
line = _ref4[_k]
|
|
lines.push(prefix[tag] + line)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
file2Range = _formatRangeContext(first[3], last[4])
|
|
lines.push("--- " + file2Range + " ----" + lineterm)
|
|
if (_any((function() {
|
|
var _l, _len3, _ref5, _results
|
|
_results = []
|
|
for (_l = 0, _len3 = group.length; _l < _len3; _l++) {
|
|
_ref5 = group[_l], tag = _ref5[0], _ = _ref5[1], _ = _ref5[2], _ = _ref5[3], _ = _ref5[4]
|
|
_results.push(tag === 'replace' || tag === 'insert')
|
|
}
|
|
return _results
|
|
})())) {
|
|
for (_l = 0, _len3 = group.length; _l < _len3; _l++) {
|
|
_ref5 = group[_l], tag = _ref5[0], _ = _ref5[1], _ = _ref5[2], j1 = _ref5[3], j2 = _ref5[4]
|
|
if (tag !== 'delete') {
|
|
_ref6 = b.slice(j1, j2)
|
|
for (_m = 0, _len4 = _ref6.length; _m < _len4; _m++) {
|
|
line = _ref6[_m]
|
|
lines.push(prefix[tag] + line)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return lines
|
|
}
|
|
|
|
ndiff = function(a, b, linejunk, charjunk) {
|
|
if (charjunk == null) {
|
|
charjunk = IS_CHARACTER_JUNK
|
|
}
|
|
/*
|
|
Compare `a` and `b` (lists of strings); return a `Differ`-style delta.
|
|
|
|
Optional keyword parameters `linejunk` and `charjunk` are for filter
|
|
functions (or None):
|
|
|
|
- linejunk: A function that should accept a single string argument, and
|
|
return true iff the string is junk. The default is null, and is
|
|
recommended;
|
|
|
|
- charjunk: A function that should accept a string of length 1. The
|
|
default is module-level function IS_CHARACTER_JUNK, which filters out
|
|
whitespace characters (a blank or tab; note: bad idea to include newline
|
|
in this!).
|
|
|
|
Example:
|
|
>>> a = ['one\n', 'two\n', 'three\n']
|
|
>>> b = ['ore\n', 'tree\n', 'emu\n']
|
|
>>> ndiff(a, b)
|
|
[ '- one\n',
|
|
'? ^\n',
|
|
'+ ore\n',
|
|
'? ^\n',
|
|
'- two\n',
|
|
'- three\n',
|
|
'? -\n',
|
|
'+ tree\n',
|
|
'+ emu\n' ]
|
|
*/
|
|
|
|
return (new Differ(linejunk, charjunk)).compare(a, b)
|
|
}
|
|
|
|
restore = function(delta, which) {
|
|
/*
|
|
Generate one of the two sequences that generated a delta.
|
|
|
|
Given a `delta` produced by `Differ.compare()` or `ndiff()`, extract
|
|
lines originating from file 1 or 2 (parameter `which`), stripping off line
|
|
prefixes.
|
|
|
|
Examples:
|
|
>>> a = ['one\n', 'two\n', 'three\n']
|
|
>>> b = ['ore\n', 'tree\n', 'emu\n']
|
|
>>> diff = ndiff(a, b)
|
|
>>> restore(diff, 1)
|
|
[ 'one\n',
|
|
'two\n',
|
|
'three\n' ]
|
|
>>> restore(diff, 2)
|
|
[ 'ore\n',
|
|
'tree\n',
|
|
'emu\n' ]
|
|
*/
|
|
|
|
var line, lines, prefixes, tag, _i, _len, _ref
|
|
tag = {
|
|
1: '- ',
|
|
2: '+ '
|
|
}[which]
|
|
if (!tag) {
|
|
throw new Error("unknow delta choice (must be 1 or 2): " + which)
|
|
}
|
|
prefixes = [' ', tag]
|
|
lines = []
|
|
for (_i = 0, _len = delta.length; _i < _len; _i++) {
|
|
line = delta[_i]
|
|
if (_ref = line.slice(0, 2), __indexOf.call(prefixes, _ref) >= 0) {
|
|
lines.push(line.slice(2))
|
|
}
|
|
}
|
|
return lines
|
|
}
|
|
|
|
exports._arrayCmp = _arrayCmp
|
|
|
|
exports.SequenceMatcher = SequenceMatcher
|
|
|
|
exports.getCloseMatches = getCloseMatches
|
|
|
|
exports._countLeading = _countLeading
|
|
|
|
exports.Differ = Differ
|
|
|
|
exports.IS_LINE_JUNK = IS_LINE_JUNK
|
|
|
|
exports.IS_CHARACTER_JUNK = IS_CHARACTER_JUNK
|
|
|
|
exports._formatRangeUnified = _formatRangeUnified
|
|
|
|
exports.unifiedDiff = unifiedDiff
|
|
|
|
exports._formatRangeContext = _formatRangeContext
|
|
|
|
exports.contextDiff = contextDiff
|
|
|
|
exports.ndiff = ndiff
|
|
|
|
exports.restore = restore
|
|
|
|
}).call(this)
|