-
Notifications
You must be signed in to change notification settings - Fork 2
/
fuzzybear.js
163 lines (144 loc) · 5.72 KB
/
fuzzybear.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
/**
* Fuzzy string search across a list of elements with support for multiple algorithms,
* short string search and scoring.
*
* @author Itay Grudev
* @copyright (c) 2021 Clustermarket Ltd.
* @license MIT
* @export fuzzybear
*/
import jaccard from './methods/jaccard'
import jaro_winkler from './methods/jaro_winkler'
const METHODS = {
jaccard: jaccard,
jaro_winkler: jaro_winkler,
}
/**
* Perform a fuzzy string search across a list of elements.
* @param {String} searchTerm
* @param {Object[]|String[]} elements
* @param {String} elements[].label - Value to be searched against
* @param {Object} options
* @param {Number} options.results - Number of results to return. Defaults to 0 - all elements distanced
* @param {String} options.labelField - Field to search against. Defaults to "label"
* @param {Boolean} options.caseSensitive - Whether to perform a case sensitive match. Defaults to false
* @param {Number} options.minScore - Minimum score of matches to be included in the results
* @param {Object[]} options.methods - Which methods to use when scoring matches
* @param {String} options.methods[].name - Search algorithm name
* @param {Number} options.methods[].weight - Search algorithm weight in scoring
* @param {Object} options.methods[].params - Search algorithm parameters
* @raises {Error} if the search method is not supported or if element are invalid
*/
export function search( searchTerm, elements, options = {}){
options = optionsDefaults( options )
validateOptions( options )
loadMethods( options )
let resultSet = []
elements.forEach( ( element ) => {
let value
if( typeof element === 'string' ){
value = element
} else if( typeof element === 'object' && typeof element[options.labelField] === 'string' ){
value = element[options.labelField]
} else {
throw new Error( 'Element without label is not searchable' )
}
// Convert the string only values to object with value and distance
if( typeof element === 'string' ) {
element = {
[options.labelField]: element
}
}
let searchScore = score( searchTerm, value, options, true )
if( searchScore >= options.minScore )
resultSet.push( Object.assign({}, element, { _score: searchScore }))
})
// Sort ascending based on index
resultSet.sort(( left, right ) => { return right._score - left._score })
if( options.results > 0 )
resultSet = resultSet.slice( 0, options.results )
return resultSet
}
/**
* Perform a fuzzy string distance of two strings.
* @param {String} searchTerm
* @param {String} testString
* @param {Object} options
* @param {Number} options.results - Number of results to return. Defaults to 0 - all elements distanced
* @param {String} options.labelField - Field to search against. Defaults to "label"
* @param {Boolean} options.caseSensitive - Whether to perform a case sensitive match. Defaults to false
* @param {Number} options.minScore - Minimum score of matches to be included in the results
* @param {Object[]} options.methods - Which methods to use when scoring matches
* @param {String} options.methods[].name - Search algorithm name
* @param {Object} options.methods[].function - A custom search algorithm function. The function takes
* @param {Number} options.methods[].weight - Search algorithm weight in scoring
* @param {Object} options.methods[].params - Search algorithm parameters
* @raises {Error} if the search method is not supported or if element are invalid
*/
export function score( searchTerm, testString, options = {}, _skipOptionsPrep = false ){
if( ! _skipOptionsPrep ){
options = optionsDefaults( options )
validateOptions( options )
loadMethods( options )
}
if( ! options.caseSensitive ) searchTerm = searchTerm.toLowerCase()
if( ! options.caseSensitive ) testString = testString.toLowerCase()
let score = 0, weight = 0
options.methods.forEach( ( method ) => {
score += (1 - method.function( searchTerm, testString, method.params )) * method.weight
weight += method.weight
})
return score / weight
}
function optionsDefaults( options ){
options = Object.assign({}, {
results: 0,
minScore: - Number.MAX_VALUE,
caseSensitive: false,
labelField: 'label',
methods: [
{
name: 'jaro_winkler',
params: { p: 0.1 },
weight: 1.5
},
{
name: 'jaccard',
params: { n: 2 },
weight: 1
},
],
}, options )
options.methods = options.methods.map(( method ) => {
if( typeof method === 'string' ){
method = {
name: method,
weight: 1
}
} else if( typeof method === 'function' ){
method = {
function: method,
weight: 1
}
} else if( typeof method === 'object' ){
if( method.weight === undefined ) method.weight = 1
} else {
throw new Error( 'Invalid method definition' )
}
return method
})
return options
}
function validateOptions( options ){
// Raise an error if there are no methods specified
if( options.methods.length === 0 )
throw new Error( 'No search methods specified.' )
}
function loadMethods( options ){
// Load specified methods
options.methods.forEach( (method, index) => {
if( method.function ) return // Skip if a function is already defined
if( method.name in METHODS )
method.function = METHODS[method.name]
})
}