diff --git a/sob.js b/sob.js new file mode 100644 index 0000000..6789997 --- /dev/null +++ b/sob.js @@ -0,0 +1,70 @@ +const Sob = { + stringToWords(str) { return str.match(/\b(\w+)\b/g) }, + + stringToChars(str) { return Array.from(str) }, + + stringToUTF16(str) { + const utf16 = []; + for (let i = 0; i < str.length; i += 1) { + utf16.push(str.charCodeAt(i)); + } + return utf16; + }, + + stringToUTF8(str) { + const utf8 = []; + for (let i = 0; i < str.length; i += 1) { + let code = str.charCodeAt(i); + if (code < 0x80) { + utf8.push(code); + } else if (code < 0x800) { + utf8.push(0xc0 | (code >> 6)); + utf8.push(0x80 | (code & 0x3f)); + } else if (code < 0xd800 || code >= 0xe000) { + utf8.push(0xe0 | (code >> 12)); + utf8.push(0x80 | ((code >> 6) & 0x3f)); + utf8.push(0x80 | (code & 0x3f)); + } else { + i += 1; + code = 0x10000 + (((code & 0x3ff) << 10) | (str.charCodeAt(i) & 0x3ff)); + utf8.push(0xf0 | (code >>18)); + utf8.push(0x80 | ((code>>12) & 0x3f)); + utf8.push(0x80 | ((code>>6) & 0x3f)); + utf8.push(0x80 | (code & 0x3f)); + } + } + return utf8; + }, + + sanitiseHTML(html) { + return html + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("\"", """) + .replace("'", "'"); + }, + + sanitiseEscapes(str) { + let chars = { + "\n" : "\\n", "\r" : "\\r", + "\f" : "\\f", "\t" : "\\t", + "\b" : "\\b", + "\"" : "\\\"", "\'" : "\\'", + }; + return Sob.stringToChars(str).map(x => chars[x] ?? x).join(""); + }, + + showList(xs, each=undefined) { + let first = true; + let sb = ""; + for (const x of xs) { + if (!first) { + sb += ", "; + } + first = false; + sb += each == undefined ? x : each(x); + } + return sb; + }, +}; \ No newline at end of file diff --git a/word-count.html b/word-count.html new file mode 100644 index 0000000..2c306bf --- /dev/null +++ b/word-count.html @@ -0,0 +1,6 @@ + + + +
+ +
\ No newline at end of file diff --git a/word-count.js b/word-count.js new file mode 100644 index 0000000..b18691e --- /dev/null +++ b/word-count.js @@ -0,0 +1,31 @@ +function getTextContent() { + return document.getElementById("src").value; +} + +function setResultHTML(html) { + document.getElementById("dest").innerHTML = html; +} + +function getCounts() { + const text = getTextContent(); + const words = Sob.stringToWords(text); + const chars = Sob.stringToChars(text); + const bytes = Sob.stringToUTF8(text); + let sb = ""; + sb += "

" + sb += `#words: ${words.length}
` + sb += `#characters: ${chars.length}
` + sb += `#bytes: ${bytes.length}` + sb += "

" + sb += "
expand word list
"
+    sb += "[" + Sob.showList(words, x => `"${Sob.sanitiseEscapes(x)}"`) + "]";
+    sb += "
" + sb += "
expand character list
"
+    sb += "[" + Sob.showList(chars, x => `'${Sob.sanitiseEscapes(x)}'`) + "]";
+    sb += "
" + sb += "
expand byte list
"
+    sb += "[" + Sob.showList(bytes) + "]";
+    sb += "
" + setResultHTML(sb); + console.log("got counts"); +} \ No newline at end of file