Skip to content

Commit

Permalink
word counter
Browse files Browse the repository at this point in the history
  • Loading branch information
katsaii committed Oct 3, 2023
0 parents commit 60d0138
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 0 deletions.
70 changes: 70 additions & 0 deletions sob.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
const Sob = {
stringToWords(str) { return str.match(/\b(\w+)\b/g) },

stringToChars(str) { return Array.from(str) },

stringToUTF16(str) {
const utf16 = [];
for (let i = 0; i < str.length; i += 1) {
utf16.push(str.charCodeAt(i));
}
return utf16;
},

stringToUTF8(str) {
const utf8 = [];
for (let i = 0; i < str.length; i += 1) {
let code = str.charCodeAt(i);
if (code < 0x80) {
utf8.push(code);
} else if (code < 0x800) {
utf8.push(0xc0 | (code >> 6));
utf8.push(0x80 | (code & 0x3f));
} else if (code < 0xd800 || code >= 0xe000) {
utf8.push(0xe0 | (code >> 12));
utf8.push(0x80 | ((code >> 6) & 0x3f));
utf8.push(0x80 | (code & 0x3f));
} else {
i += 1;
code = 0x10000 + (((code & 0x3ff) << 10) | (str.charCodeAt(i) & 0x3ff));
utf8.push(0xf0 | (code >>18));
utf8.push(0x80 | ((code>>12) & 0x3f));
utf8.push(0x80 | ((code>>6) & 0x3f));
utf8.push(0x80 | (code & 0x3f));
}
}
return utf8;
},

sanitiseHTML(html) {
return html
.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;")
.replace("\"", "&quot;")
.replace("'", "&#039;");
},

sanitiseEscapes(str) {
let chars = {
"\n" : "\\n", "\r" : "\\r",
"\f" : "\\f", "\t" : "\\t",
"\b" : "\\b",
"\"" : "\\\"", "\'" : "\\'",
};
return Sob.stringToChars(str).map(x => chars[x] ?? x).join("");
},

showList(xs, each=undefined) {
let first = true;
let sb = "";
for (const x of xs) {
if (!first) {
sb += ", ";
}
first = false;
sb += each == undefined ? x : each(x);
}
return sb;
},
};
6 changes: 6 additions & 0 deletions word-count.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<script src="./sob.js"></script>
<script src="./char-count.js"></script>
<textarea id="src" rows="10" style="width : 100%">paste text here</textarea>
<hr>
<button onclick="getCounts()">word count</button>
<div id="dest"></div>
31 changes: 31 additions & 0 deletions word-count.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
function getTextContent() {
return document.getElementById("src").value;
}

function setResultHTML(html) {
document.getElementById("dest").innerHTML = html;
}

function getCounts() {
const text = getTextContent();
const words = Sob.stringToWords(text);
const chars = Sob.stringToChars(text);
const bytes = Sob.stringToUTF8(text);
let sb = "";
sb += "<p>"
sb += `<b>#words</b>: <code>${words.length}</code><br>`
sb += `<b>#characters</b>: <code>${chars.length}</code><br>`
sb += `<b>#bytes</b>: <code>${bytes.length}</code>`
sb += "</p>"
sb += "<details><summary>expand word list</summary><pre><code style=\"text-wrap : wrap\">"
sb += "[" + Sob.showList(words, x => `"${Sob.sanitiseEscapes(x)}"`) + "]";
sb += "</code></pre></details>"
sb += "<details><summary>expand character list</summary><pre><code style=\"text-wrap : wrap\">"
sb += "[" + Sob.showList(chars, x => `'${Sob.sanitiseEscapes(x)}'`) + "]";
sb += "</code></pre></details>"
sb += "<details><summary>expand byte list</summary><pre><code style=\"text-wrap : wrap\">"
sb += "[" + Sob.showList(bytes) + "]";
sb += "</code></pre></details>"
setResultHTML(sb);
console.log("got counts");
}

0 comments on commit 60d0138

Please sign in to comment.