generated from neoncitylights/node
-
Notifications
You must be signed in to change notification settings - Fork 0
/
strings.ts
146 lines (130 loc) · 3.91 KB
/
strings.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import { isAsciiWhitespace, isSurrogate } from './codePoints';
/**
* Collects a sequence of codepoints that passes a given predicate function,
* starting at a given position.
*
* @see https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points
* @returns A 2-tuple of the new string and the new position
*/
export function collectCodepoints(
value: string,
position: number,
predicate: (codePoint: string) => boolean,
): [string, number] {
if(position >= value.length || value === '') {
return ['', position];
}
let newPosition = position;
let result = '';
for(const codePoint of value.slice(position)) {
if(predicate(codePoint)) {
result += codePoint;
newPosition++;
} else {
break;
}
}
return [result, newPosition];
}
/**
* A string with only Unicode scalar values (non-surrogate codepoints).
*
* @see https://unicode.org/glossary/#unicode_scalar_value
* @see https://infra.spec.whatwg.org/#scalar-value-string
* @see https://infra.spec.whatwg.org/#javascript-string-convert
*/
export const convertStringToScalarValue = (value: string): string => {
let scalarValueString = '';
for(const codePoint of value) {
if(isSurrogate(codePoint)) {
scalarValueString += '\u{FFFD}';
} else {
scalarValueString += codePoint;
}
}
return scalarValueString;
};
/**
* A string without any codepoints equal to either `U+000A`
* or `U+000D`.
* @see https://infra.spec.whatwg.org/#strip-newlines
*/
export const stripNewlines = (value: string): string => {
let stripped = '';
for(const codePoint of value) {
if(codePoint !== '\u{000A}' && codePoint !== '\u{000D}') {
stripped += codePoint;
}
}
return stripped;
};
/**
* Replaces consecutive codepoints/pairs of `U+000D` and `U+000A`
* with a single `U+000A`, and any remaining `U+000D` codepoints
* with a single `U+000A`.
*
* @see https://infra.spec.whatwg.org/#normalize-newlines
*/
export const normalizeNewlines = (value: string): string => {
let normalized = '';
for(let i = 0; i < value.length; i++) {
if(value[i] === '\u{000D}' && value[i+1] === '\u{000A}') {
normalized += '\u{000A}';
i++;
continue;
}
normalized += value[i];
}
return normalized.replace('\u{000D}', '\u{000A}');
};
/**
* An implementation of the WHATWG "strip trailing and leading ascii whitespace" algorithm.
* This is a slightly different, less strict version of `String.prototype.trim()`.
*
* Both remove: `U+0009` TAB, `U+000C` FF, and `U+0020` SPACE.
*
* Notable differences:
* - This also removes: `U+000A` LF and `U+000D` CR.
* - `trim()` also removes: `U+000B` VT, `U+00A0` NBSP, `U+FEFF` ZWNBSP,
* and characters that fall within the General Unicode `Space_Separator`
* category (USP).
*
* @see https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace
* @see https://tc39.es/ecma262/multipage/text-processing.html#sec-string.prototype.trim
*/
export const stripTrailingLeadingAsciiWhitespace = (value: string): string => {
let leadingIndex = 0;
while(isAsciiWhitespace(value[leadingIndex] as string)) {
leadingIndex++;
}
let trailingIndex = value.length;
while(isAsciiWhitespace(value[trailingIndex - 1] as string)) {
trailingIndex--;
}
return value.substring(leadingIndex, trailingIndex);
};
/**
* Algorithm to collapse/reduce consecutive ASCII whitespace codepoints
* into a single U+0020 codepoint, as well as remove whitespace from
* both the start and end.
*
* @see https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace
*/
export const stripCollapseAsciiWhitespace = (value: string): string => {
let result = '';
let lastSeenWhitespace = false;
for(let i = 0; i < value.length; i++) {
const codepoint = value[i] as string;
if(isAsciiWhitespace(codepoint)) {
if(!lastSeenWhitespace) {
lastSeenWhitespace = true;
result += '\u{0020}';
continue;
}
} else {
lastSeenWhitespace = false;
result += codepoint;
}
}
return stripTrailingLeadingAsciiWhitespace(result);
};