-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 60d0138
Showing
3 changed files
with
107 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
const Sob = { | ||
stringToWords(str) { return str.match(/\b(\w+)\b/g) }, | ||
|
||
stringToChars(str) { return Array.from(str) }, | ||
|
||
stringToUTF16(str) { | ||
const utf16 = []; | ||
for (let i = 0; i < str.length; i += 1) { | ||
utf16.push(str.charCodeAt(i)); | ||
} | ||
return utf16; | ||
}, | ||
|
||
stringToUTF8(str) { | ||
const utf8 = []; | ||
for (let i = 0; i < str.length; i += 1) { | ||
let code = str.charCodeAt(i); | ||
if (code < 0x80) { | ||
utf8.push(code); | ||
} else if (code < 0x800) { | ||
utf8.push(0xc0 | (code >> 6)); | ||
utf8.push(0x80 | (code & 0x3f)); | ||
} else if (code < 0xd800 || code >= 0xe000) { | ||
utf8.push(0xe0 | (code >> 12)); | ||
utf8.push(0x80 | ((code >> 6) & 0x3f)); | ||
utf8.push(0x80 | (code & 0x3f)); | ||
} else { | ||
i += 1; | ||
code = 0x10000 + (((code & 0x3ff) << 10) | (str.charCodeAt(i) & 0x3ff)); | ||
utf8.push(0xf0 | (code >>18)); | ||
utf8.push(0x80 | ((code>>12) & 0x3f)); | ||
utf8.push(0x80 | ((code>>6) & 0x3f)); | ||
utf8.push(0x80 | (code & 0x3f)); | ||
} | ||
} | ||
return utf8; | ||
}, | ||
|
||
sanitiseHTML(html) { | ||
return html | ||
.replace("&", "&") | ||
.replace("<", "<") | ||
.replace(">", ">") | ||
.replace("\"", """) | ||
.replace("'", "'"); | ||
}, | ||
|
||
sanitiseEscapes(str) { | ||
let chars = { | ||
"\n" : "\\n", "\r" : "\\r", | ||
"\f" : "\\f", "\t" : "\\t", | ||
"\b" : "\\b", | ||
"\"" : "\\\"", "\'" : "\\'", | ||
}; | ||
return Sob.stringToChars(str).map(x => chars[x] ?? x).join(""); | ||
}, | ||
|
||
showList(xs, each=undefined) { | ||
let first = true; | ||
let sb = ""; | ||
for (const x of xs) { | ||
if (!first) { | ||
sb += ", "; | ||
} | ||
first = false; | ||
sb += each == undefined ? x : each(x); | ||
} | ||
return sb; | ||
}, | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
<script src="./sob.js"></script> | ||
<script src="./char-count.js"></script> | ||
<textarea id="src" rows="10" style="width : 100%">paste text here</textarea> | ||
<hr> | ||
<button onclick="getCounts()">word count</button> | ||
<div id="dest"></div> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
function getTextContent() { | ||
return document.getElementById("src").value; | ||
} | ||
|
||
function setResultHTML(html) { | ||
document.getElementById("dest").innerHTML = html; | ||
} | ||
|
||
function getCounts() { | ||
const text = getTextContent(); | ||
const words = Sob.stringToWords(text); | ||
const chars = Sob.stringToChars(text); | ||
const bytes = Sob.stringToUTF8(text); | ||
let sb = ""; | ||
sb += "<p>" | ||
sb += `<b>#words</b>: <code>${words.length}</code><br>` | ||
sb += `<b>#characters</b>: <code>${chars.length}</code><br>` | ||
sb += `<b>#bytes</b>: <code>${bytes.length}</code>` | ||
sb += "</p>" | ||
sb += "<details><summary>expand word list</summary><pre><code style=\"text-wrap : wrap\">" | ||
sb += "[" + Sob.showList(words, x => `"${Sob.sanitiseEscapes(x)}"`) + "]"; | ||
sb += "</code></pre></details>" | ||
sb += "<details><summary>expand character list</summary><pre><code style=\"text-wrap : wrap\">" | ||
sb += "[" + Sob.showList(chars, x => `'${Sob.sanitiseEscapes(x)}'`) + "]"; | ||
sb += "</code></pre></details>" | ||
sb += "<details><summary>expand byte list</summary><pre><code style=\"text-wrap : wrap\">" | ||
sb += "[" + Sob.showList(bytes) + "]"; | ||
sb += "</code></pre></details>" | ||
setResultHTML(sb); | ||
console.log("got counts"); | ||
} |