This repository has been archived by the owner on Jul 22, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 11
/
index.js
149 lines (122 loc) · 5.8 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
// The scores are arranged so that a continuous match of characters will
// result in a total score of 1.
//
// The best case, this character is a match, and either this is the start
// of the string, or the previous character was also a match.
var SCORE_CONTINUE_MATCH = 1,
// A new match at the start of a word scores better than a new match
// elsewhere as it's more likely that the user will type the starts
// of fragments.
// NOTE: We score word jumps between spaces slightly higher than slashes, brackets
// hyphens, etc.
SCORE_SPACE_WORD_JUMP = 0.9,
SCORE_NON_SPACE_WORD_JUMP = 0.8,
// Any other match isn't ideal, but we include it for completeness.
SCORE_CHARACTER_JUMP = 0.3,
// If the user transposed two letters, it should be signficantly penalized.
//
// i.e. "ouch" is more likely than "curtain" when "uc" is typed.
SCORE_TRANSPOSITION = 0.1,
// The goodness of a match should decay slightly with each missing
// character.
//
// i.e. "bad" is more likely than "bard" when "bd" is typed.
//
// This will not change the order of suggestions based on SCORE_* until
// 100 characters are inserted between matches.
PENALTY_SKIPPED = 0.999,
// The goodness of an exact-case match should be higher than a
// case-insensitive match by a small amount.
//
// i.e. "HTML" is more likely than "haml" when "HM" is typed.
//
// This will not change the order of suggestions based on SCORE_* until
// 1000 characters are inserted between matches.
PENALTY_CASE_MISMATCH = 0.9999,
// Match higher for letters closer to the beginning of the word
PENALTY_DISTANCE_FROM_START = 0.9,
// If the word has more characters than the user typed, it should
// be penalised slightly.
//
// i.e. "html" is more likely than "html5" if I type "html".
//
// However, it may well be the case that there's a sensible secondary
// ordering (like alphabetical) that it makes sense to rely on when
// there are many prefix matches, so we don't make the penalty increase
// with the number of tokens.
PENALTY_NOT_COMPLETE = 0.99;
var IS_GAP_REGEXP = /[\\\/_+.#"@\[\(\{&]/,
COUNT_GAPS_REGEXP = /[\\\/_+.#"@\[\(\{&]/g,
IS_SPACE_REGEXP = /[\s-]/,
COUNT_SPACE_REGEXP = /[\s-]/g;
function commandScoreInner(string, abbreviation, lowerString, lowerAbbreviation, stringIndex, abbreviationIndex, memoizedResults) {
if (abbreviationIndex === abbreviation.length) {
if (stringIndex === string.length) {
return SCORE_CONTINUE_MATCH;
}
return PENALTY_NOT_COMPLETE;
}
var memoizeKey = `${stringIndex},${abbreviationIndex}`;
if (memoizedResults[memoizeKey] !== undefined) {
return memoizedResults[memoizeKey];
}
var abbreviationChar = lowerAbbreviation.charAt(abbreviationIndex);
var index = lowerString.indexOf(abbreviationChar, stringIndex);
var highScore = 0;
var score, transposedScore, wordBreaks, spaceBreaks;
while (index >= 0) {
score = commandScoreInner(string, abbreviation, lowerString, lowerAbbreviation, index + 1, abbreviationIndex + 1, memoizedResults);
if (score > highScore) {
if (index === stringIndex) {
score *= SCORE_CONTINUE_MATCH;
} else if (IS_GAP_REGEXP.test(string.charAt(index - 1))) {
score *= SCORE_NON_SPACE_WORD_JUMP;
wordBreaks = string.slice(stringIndex, index - 1).match(COUNT_GAPS_REGEXP);
if (wordBreaks && stringIndex > 0) {
score *= Math.pow(PENALTY_SKIPPED, wordBreaks.length);
}
} else if (IS_SPACE_REGEXP.test(string.charAt(index - 1))) {
score *= SCORE_SPACE_WORD_JUMP;
spaceBreaks = string.slice(stringIndex, index - 1).match(COUNT_SPACE_REGEXP);
if (spaceBreaks && stringIndex > 0) {
score *= Math.pow(PENALTY_SKIPPED, spaceBreaks.length);
}
} else {
score *= SCORE_CHARACTER_JUMP;
if (stringIndex > 0) {
score *= Math.pow(PENALTY_SKIPPED, index - stringIndex);
}
}
if (string.charAt(index) !== abbreviation.charAt(abbreviationIndex)) {
score *= PENALTY_CASE_MISMATCH;
}
}
if (score < SCORE_TRANSPOSITION &&
(lowerString.charAt(index - 1) === lowerAbbreviation.charAt(abbreviationIndex + 1)) ||
lowerAbbreviation.charAt(abbreviationIndex + 1) === lowerAbbreviation.charAt(abbreviationIndex) // allow duplicate letters. Ref #7428
&& lowerString.charAt(index - 1) !== lowerAbbreviation.charAt(abbreviationIndex)) {
transposedScore = commandScoreInner(string, abbreviation, lowerString, lowerAbbreviation, index + 1, abbreviationIndex + 2, memoizedResults);
if (transposedScore * SCORE_TRANSPOSITION > score) {
score = transposedScore * SCORE_TRANSPOSITION;
}
}
if (score > highScore) {
highScore = score;
}
index = lowerString.indexOf(abbreviationChar, index + 1);
}
memoizedResults[memoizeKey] = highScore;
return highScore;
}
function formatInput (string) {
// convert all valid space characters to space so they match each other
return string.toLowerCase().replace(COUNT_SPACE_REGEXP, ' ')
}
function commandScore(string, abbreviation) {
/* NOTE:
* in the original, we used to do the lower-casing on each recursive call, but this meant that toLowerCase()
* was the dominating cost in the algorithm, passing both is a little ugly, but considerably faster.
*/
return commandScoreInner(string, abbreviation, formatInput(string), formatInput(abbreviation), 0, 0, {});
}
module.exports = commandScore;