levenshtein.js
3.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
(function() {
'use strict';
var collator;
try {
collator = (typeof Intl !== "undefined" && typeof Intl.Collator !== "undefined") ? Intl.Collator("generic", { sensitivity: "base" }) : null;
} catch (err){
console.log("Collator could not be initialized and wouldn't be used");
}
// arrays to re-use
var prevRow = [],
str2Char = [];
/**
* Based on the algorithm at http://en.wikipedia.org/wiki/Levenshtein_distance.
*/
var Levenshtein = {
/**
* Calculate levenshtein distance of the two strings.
*
* @param str1 String the first string.
* @param str2 String the second string.
* @param [options] Additional options.
* @param [options.useCollator] Use `Intl.Collator` for locale-sensitive string comparison.
* @return Integer the levenshtein distance (0 and above).
*/
get: function(str1, str2, options) {
var useCollator = (options && collator && options.useCollator);
var str1Len = str1.length,
str2Len = str2.length;
// base cases
if (str1Len === 0) return str2Len;
if (str2Len === 0) return str1Len;
// two rows
var curCol, nextCol, i, j, tmp;
// initialise previous row
for (i=0; i<str2Len; ++i) {
prevRow[i] = i;
str2Char[i] = str2.charCodeAt(i);
}
prevRow[str2Len] = str2Len;
var strCmp;
if (useCollator) {
// calculate current row distance from previous row using collator
for (i = 0; i < str1Len; ++i) {
nextCol = i + 1;
for (j = 0; j < str2Len; ++j) {
curCol = nextCol;
// substution
strCmp = 0 === collator.compare(str1.charAt(i), String.fromCharCode(str2Char[j]));
nextCol = prevRow[j] + (strCmp ? 0 : 1);
// insertion
tmp = curCol + 1;
if (nextCol > tmp) {
nextCol = tmp;
}
// deletion
tmp = prevRow[j + 1] + 1;
if (nextCol > tmp) {
nextCol = tmp;
}
// copy current col value into previous (in preparation for next iteration)
prevRow[j] = curCol;
}
// copy last col value into previous (in preparation for next iteration)
prevRow[j] = nextCol;
}
}
else {
// calculate current row distance from previous row without collator
for (i = 0; i < str1Len; ++i) {
nextCol = i + 1;
for (j = 0; j < str2Len; ++j) {
curCol = nextCol;
// substution
strCmp = str1.charCodeAt(i) === str2Char[j];
nextCol = prevRow[j] + (strCmp ? 0 : 1);
// insertion
tmp = curCol + 1;
if (nextCol > tmp) {
nextCol = tmp;
}
// deletion
tmp = prevRow[j + 1] + 1;
if (nextCol > tmp) {
nextCol = tmp;
}
// copy current col value into previous (in preparation for next iteration)
prevRow[j] = curCol;
}
// copy last col value into previous (in preparation for next iteration)
prevRow[j] = nextCol;
}
}
return nextCol;
}
};
// amd
if (typeof define !== "undefined" && define !== null && define.amd) {
define(function() {
return Levenshtein;
});
}
// commonjs
else if (typeof module !== "undefined" && module !== null && typeof exports !== "undefined" && module.exports === exports) {
module.exports = Levenshtein;
}
// web worker
else if (typeof self !== "undefined" && typeof self.postMessage === 'function' && typeof self.importScripts === 'function') {
self.Levenshtein = Levenshtein;
}
// browser main thread
else if (typeof window !== "undefined" && window !== null) {
window.Levenshtein = Levenshtein;
}
}());