diff --git a/sob.js b/sob.js new file mode 100644 index 0000000..6789997 --- /dev/null +++ b/sob.js @@ -0,0 +1,70 @@ +const Sob = { + stringToWords(str) { return str.match(/\b(\w+)\b/g) }, + + stringToChars(str) { return Array.from(str) }, + + stringToUTF16(str) { + const utf16 = []; + for (let i = 0; i < str.length; i += 1) { + utf16.push(str.charCodeAt(i)); + } + return utf16; + }, + + stringToUTF8(str) { + const utf8 = []; + for (let i = 0; i < str.length; i += 1) { + let code = str.charCodeAt(i); + if (code < 0x80) { + utf8.push(code); + } else if (code < 0x800) { + utf8.push(0xc0 | (code >> 6)); + utf8.push(0x80 | (code & 0x3f)); + } else if (code < 0xd800 || code >= 0xe000) { + utf8.push(0xe0 | (code >> 12)); + utf8.push(0x80 | ((code >> 6) & 0x3f)); + utf8.push(0x80 | (code & 0x3f)); + } else { + i += 1; + code = 0x10000 + (((code & 0x3ff) << 10) | (str.charCodeAt(i) & 0x3ff)); + utf8.push(0xf0 | (code >>18)); + utf8.push(0x80 | ((code>>12) & 0x3f)); + utf8.push(0x80 | ((code>>6) & 0x3f)); + utf8.push(0x80 | (code & 0x3f)); + } + } + return utf8; + }, + + sanitiseHTML(html) { + return html + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("\"", """) + .replace("'", "'"); + }, + + sanitiseEscapes(str) { + let chars = { + "\n" : "\\n", "\r" : "\\r", + "\f" : "\\f", "\t" : "\\t", + "\b" : "\\b", + "\"" : "\\\"", "\'" : "\\'", + }; + return Sob.stringToChars(str).map(x => chars[x] ?? x).join(""); + }, + + showList(xs, each=undefined) { + let first = true; + let sb = ""; + for (const x of xs) { + if (!first) { + sb += ", "; + } + first = false; + sb += each == undefined ? x : each(x); + } + return sb; + }, +}; \ No newline at end of file diff --git a/word-count.html b/word-count.html new file mode 100644 index 0000000..2c306bf --- /dev/null +++ b/word-count.html @@ -0,0 +1,6 @@ + + + +
"
+ sb += `#words: ${words.length}
`
+ sb += `#characters: ${chars.length}
`
+ sb += `#bytes: ${bytes.length}
`
+ sb += "
"
+ sb += "[" + Sob.showList(words, x => `"${Sob.sanitiseEscapes(x)}"`) + "]";
+ sb += "
"
+ sb += "[" + Sob.showList(chars, x => `'${Sob.sanitiseEscapes(x)}'`) + "]";
+ sb += "
"
+ sb += "[" + Sob.showList(bytes) + "]";
+ sb += "