forked from trekhleb/javascript-algorithms
-
Notifications
You must be signed in to change notification settings - Fork 1
/
zAlgorithm.js
132 lines (112 loc) · 4.94 KB
/
zAlgorithm.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
// The string separator that is being used for "word" and "text" concatenation.
const SEPARATOR = '$';
/**
* @param {string} zString
* @return {number[]}
*/
function buildZArray(zString) {
// Initiate zArray and fill it with zeros.
const zArray = new Array(zString.length).fill(null).map(() => 0);
// Z box boundaries.
let zBoxLeftIndex = 0;
let zBoxRightIndex = 0;
// Position of current zBox character that is also a position of
// the same character in prefix.
// For example:
// Z string: ab$xxabxx
// Indices: 012345678
// Prefix: ab.......
// Z box: .....ab..
// Z box shift for 'a' would be 0 (0-position in prefix and 0-position in Z box)
// Z box shift for 'b' would be 1 (1-position in prefix and 1-position in Z box)
let zBoxShift = 0;
// Go through all characters of the zString.
for (let charIndex = 1; charIndex < zString.length; charIndex += 1) {
if (charIndex > zBoxRightIndex) {
// We're OUTSIDE of Z box. In other words this is a case when we're
// starting from Z box of size 1.
// In this case let's make current character to be a Z box of length 1.
zBoxLeftIndex = charIndex;
zBoxRightIndex = charIndex;
// Now let's go and check current and the following characters to see if
// they are the same as a prefix. By doing this we will also expand our
// Z box. For example if starting from current position we will find 3
// more characters that are equal to the ones in the prefix we will expand
// right Z box boundary by 3.
while (
zBoxRightIndex < zString.length
&& zString[zBoxRightIndex - zBoxLeftIndex] === zString[zBoxRightIndex]
) {
// Expanding Z box right boundary.
zBoxRightIndex += 1;
}
// Now we may calculate how many characters starting from current position
// are are the same as the prefix. We may calculate it by difference between
// right and left Z box boundaries.
zArray[charIndex] = zBoxRightIndex - zBoxLeftIndex;
// Move right Z box boundary left by one position just because we've used
// [zBoxRightIndex - zBoxLeftIndex] index calculation above.
zBoxRightIndex -= 1;
} else {
// We're INSIDE of Z box.
// Calculate corresponding Z box shift. Because we want to copy the values
// from zArray that have been calculated before.
zBoxShift = charIndex - zBoxLeftIndex;
// Check if the value that has been already calculated before
// leaves us inside of Z box or it goes beyond the checkbox
// right boundary.
if (zArray[zBoxShift] < (zBoxRightIndex - charIndex) + 1) {
// If calculated value don't force us to go outside Z box
// then we're safe and we may simply use previously calculated value.
zArray[charIndex] = zArray[zBoxShift];
} else {
// In case if previously calculated values forces us to go outside of Z box
// we can't safely copy previously calculated zArray value. It is because
// we are sure that there is no further prefix matches outside of Z box.
// Thus such values must be re-calculated and reduced to certain point.
// To do so we need to shift left boundary of Z box to current position.
zBoxLeftIndex = charIndex;
// And start comparing characters one by one as we normally do for the case
// when we are outside of checkbox.
while (
zBoxRightIndex < zString.length
&& zString[zBoxRightIndex - zBoxLeftIndex] === zString[zBoxRightIndex]
) {
zBoxRightIndex += 1;
}
zArray[charIndex] = zBoxRightIndex - zBoxLeftIndex;
zBoxRightIndex -= 1;
}
}
}
// Return generated zArray.
return zArray;
}
/**
* @param {string} text
* @param {string} word
* @return {number[]}
*/
export default function zAlgorithm(text, word) {
// The list of word's positions in text. Word may be found in the same text
// in several different positions. Thus it is an array.
const wordPositions = [];
// Concatenate word and string. Word will be a prefix to a string.
const zString = `${word}${SEPARATOR}${text}`;
// Generate Z-array for concatenated string.
const zArray = buildZArray(zString);
// Based on Z-array properties each cell will tell us the length of the match between
// the string prefix and current sub-text. Thus we're may find all positions in zArray
// with the number that equals to the length of the word (zString prefix) and based on
// that positions we'll be able to calculate word positions in text.
for (let charIndex = 1; charIndex < zArray.length; charIndex += 1) {
if (zArray[charIndex] === word.length) {
// Since we did concatenation to form zString we need to subtract prefix
// and separator lengths.
const wordPosition = charIndex - word.length - SEPARATOR.length;
wordPositions.push(wordPosition);
}
}
// Return the list of word positions.
return wordPositions;
}