Skip to content

Commit

Permalink
Merge pull request #204 from hyperaudio/202-implement-clientside-whisper
Browse files Browse the repository at this point in the history
202 implement clientside whisper
  • Loading branch information
maboa authored Nov 22, 2023
2 parents aa41ecc + 180b8da commit 49c6f60
Show file tree
Hide file tree
Showing 6 changed files with 36,541 additions and 21 deletions.
40 changes: 40 additions & 0 deletions hyperaudio-client-whisper-template.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<!DOCTYPE html>
<html>
<body>
<div id="whisper-client-template">
<form>
<div class="mb-3">
<input id="media" type="text" placeholder="Link to media" class="input input-bordered w-full max-w-xs" disabled />
<span style="display:block; padding:16px" class="label-text">or</span>
<input id="file-input" name="file" type="file" class="file-input w-full max-w-xs" />
<hr class="my-2 h-0 border border-t-0 border-solid border-neutral-700 opacity-50 dark:border-neutral-200" />

<!--<label for="file-input" class="form-label">Which video/audio file should be transcribed?</label>
<input class="form-control" type="file" id="file-input" accept=".mp3,.wav,.mp4,.mov,.avi,.flv,.wmv,.mpeg,.mpg,.webm,.opus">
<div class="form-text">We only support audio and video files.</div>-->
</div>
<div class="mb-3">
<label for="model-name-input" class="form-label label-text">Which model should be used?</label>
<div>
<select class="form-select select select-bordered w-full max-w-xs" aria-label="Default select example" id="model-name-input">
<option selected="" value="whisper-tiny.en">Whisper (Tiny) English</option>
<option value="whisper-tiny">Whisper (Tiny)</option>
<option value="whisper-base">Whisper (Base) English</option>
<option value="whisper-base">Whisper (Base)</option>
<option value="whisper-small.en">Whisper (Small) English</option>
<option value="whisper-small">Whisper (Small)</option>
</select>
</div>
<div class="form-text" style="font-size: 90%;">
<p style="padding-top:16px">The models are listed in order of size. The larger the model, the more accurate it is – and slower to process.</p>
<p>The English models are slightly more accurate (for the English language only).</p>
</div>
</div>
<div class="modal-action">
<label id="form-submit-btn" for="transcribe-modal" class="btn btn-primary">TRANSCRIBE</label>
</div>
<!--<button id="form-submit-btn" class="btn btn-primary" disabled="">Submit</button>-->
</form>
</div>
</body>
</html>
8 changes: 4 additions & 4 deletions hyperaudio-deepgram-modal.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
<div id="deepgram-modal-template">
<form id="deepgram-form" name="deepgram-form">
<div class="flex flex-col gap-4 w-full">
<label id="close-modal" for="transcribe-modal" class="btn btn-sm btn-circle absolute right-2 top-2"></label>
<h3 class="font-bold text-lg">Transcribe</h3>
<!--<label id="close-modal" for="transcribe-modal" class="btn btn-sm btn-circle absolute right-2 top-2">✕</label>
<h3 class="font-bold text-lg">Transcribe</h3>-->
<input id="token" type="text" placeholder="Deepgram token" class="input input-bordered w-full max-w-xs" />
<hr class="my-2 h-0 border border-t-0 border-solid border-neutral-700 opacity-50 dark:border-neutral-200" />
<input id="media" type="text" placeholder="Link to media" class="input input-bordered w-full max-w-xs" />
<span class="label-text">or</span>
<input id="file" name="file" type="file" class="file-input w-full max-w-xs" />
<input id="deepgram-file" name="file" type="file" class="file-input w-full max-w-xs" />
<hr class="my-2 h-0 border border-t-0 border-solid border-neutral-700 opacity-50 dark:border-neutral-200" />

<span class="label-text">Model</span>
Expand All @@ -32,7 +32,7 @@ <h3 class="font-bold text-lg">Transcribe</h3>

</div>
<div class="modal-action">
<label id="transcribe-btn" for="transcribe-modal" class="btn btn-primary">Transcribe</label>
<label id="transcribe-btn" for="transcribe-modal" class="btn btn-primary">TRANSCRIBE</label>
</div>
</form>
</div>
Expand Down
44 changes: 36 additions & 8 deletions index.html
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<!-- (C) The Hyperaudio Project. AGPL 3.0 @license: https://www.gnu.org/licenses/agpl-3.0.en.html -->
<!-- Hyperaudio Lite Editor - Version 0.2.17 -->
<!-- Hyperaudio Lite Editor - Version 0.3 -->

<!-- Hyperaudio Lite Editor's source code is provided under a dual license model.
<!-- Hyperaudio Lite Editor's source code is provided under a dual license model.
Commercial license
------------------
Expand All @@ -24,10 +24,10 @@
<link rel="stylesheet" href="css/hyperaudio-lite-player.css">

<script src="js/hyperaudio-lite-editor-deepgram.js"></script>
<!--<script src="js/hyperaudio-lite-editor-captions.js"></script>-->
<script src="js/hyperaudio-lite-editor-whisper.js"></script>

<!-- DaisyUI / Tailwind -->
<link href="https://cdn.jsdelivr.net/npm/daisyui@2.51.5/dist/full.css" rel="stylesheet" type="text/css" />
<link href="https://cdn.jsdelivr.net/npm/daisyui@4.4.2/dist/full.css" rel="stylesheet" type="text/css" />
<script src="https://cdn.tailwindcss.com"></script>

<!-- Meta Tags required for
Expand Down Expand Up @@ -146,7 +146,7 @@
</button>
<div class="dropdown menu-compact">
<label tabindex="0" class="btn m-1 btn-outline gap-2">
<span id="file-dropdown-text">File</span>
<span id="file-dropdown-text">FILE</span>
<svg id="file-dropdown-symbol" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-chevron-down"><polyline points="6 9 12 15 18 9"></polyline></svg>
<svg id="file-dropdown-symbol-mobile" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-menu"><line x1="4" x2="20" y1="12" y2="12"/><line x1="4" x2="20" y1="6" y2="6"/><line x1="4" x2="20" y1="18" y2="18"/></svg>
</label>
Expand Down Expand Up @@ -208,14 +208,28 @@ <h3 class="text-lg font-bold">Topics</h3>
</div>
</div>
<label for="transcribe-modal" class="btn btn-primary gap-2">
<span id="transcribe-label">Transcribe</span>
<span id="transcribe-label-mobile">New</span>
<span id="transcribe-label">TRANSCRIBE</span>
<span id="transcribe-label-mobile">NEW</span>
<svg id="transcribe-logo" xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-sticker"><path d="M15.5 3H5a2 2 0 0 0-2 2v14c0 1.1.9 2 2 2h14a2 2 0 0 0 2-2V8.5L15.5 3Z"></path><path d="M15 3v6h6"></path><path d="M10 16s.8 1 2 1c1.3 0 2-1 2-1"></path><path d="M8 13h0"></path><path d="M16 13h0"></path></svg>
</label>
<input type="checkbox" id="transcribe-modal" class="modal-toggle" />
<div class="modal">
<div class="modal-box">
<deepgram-service templateUrl="hyperaudio-deepgram-modal.html"></deepgram-service>
<label id="close-modal" for="transcribe-modal" class="btn btn-sm btn-circle absolute right-2 top-2"></label>
<h3 class="font-bold text-lg" style="margin-bottom:16px">Transcribe</h3>
<div role="tablist" class="tabs tabs-lifted">

<input type="radio" name="my_tabs_2" role="tab" class="tab" style="width:160px" aria-label="Whisper (Local)" checked />
<div role="tabpanel" class="tab-content bg-base-100 border-base-300 rounded-box p-10">
<client-whisper-service></client-whisper-service>
</div>

<input type="radio" name="my_tabs_2" role="tab" class="tab" style="width:160px" aria-label="Deepgram (Cloud)" />
<div role="tabpanel" class="tab-content bg-base-100 border-base-300 rounded-box p-10">
<deepgram-service templateUrl="hyperaudio-deepgram-modal.html"></deepgram-service>
</div>

</div>
</div>
</div>
</div>
Expand Down Expand Up @@ -402,6 +416,12 @@ <h3 class="text-lg font-bold">Topics</h3>
</section>
</article>
</div>

<!--<div id="transcription-progress-bar-container">
<div class="form-text">Transcription progress:</div><div class="progress">
<div id="transcription-progress-bar" class="progress-bar" role="progressbar" style="width:0%" aria-valuenow="25" aria-valuemin="0" aria-valuemax="100">0%</div>
</div>
</div>-->
</div>
<div id="captionsource-alert" class="alert alert-info shadow-lg" style="visibility:hidden; z-index:2; position:absolute; top:50%; left:50%; width:480px; height:140px; margin: -240px 0 0 -240px;">
<div>
Expand Down Expand Up @@ -1118,5 +1138,13 @@ <h3 class="font-bold text-lg">Load from Local Storage</h3>
}

</script>
<script>
// SVGs

const transcribingSvg = "data:image/svg+xml,%3Csvg width='45' height='45' viewBox='0 0 45 45' xmlns='http://www.w3.org/2000/svg' stroke='%23000'%3E%3Cg fill='none' fill-rule='evenodd' transform='translate(1 1)' stroke-width='2'%3E%3Ccircle cx='22' cy='22' r='6' stroke-opacity='0'%3E%3Canimate attributeName='r' begin='1.5s' dur='3s' values='6;22' calcMode='linear' repeatCount='indefinite' /%3E%3Canimate attributeName='stroke-opacity' begin='1.5s' dur='3s' values='1;0' calcMode='linear' repeatCount='indefinite' /%3E%3Canimate attributeName='stroke-width' begin='1.5s' dur='3s' values='2;0' calcMode='linear' repeatCount='indefinite' /%3E%3C/circle%3E%3Ccircle cx='22' cy='22' r='6' stroke-opacity='0'%3E%3Canimate attributeName='r' begin='3s' dur='3s' values='6;22' calcMode='linear' repeatCount='indefinite' /%3E%3Canimate attributeName='stroke-opacity' begin='3s' dur='3s' values='1;0' calcMode='linear' repeatCount='indefinite' /%3E%3Canimate attributeName='stroke-width' begin='3s' dur='3s' values='2;0' calcMode='linear' repeatCount='indefinite' /%3E%3C/circle%3E%3Ccircle cx='22' cy='22' r='8'%3E%3Canimate attributeName='r' begin='0s' dur='1.5s' values='6;1;2;3;4;5;6' calcMode='linear' repeatCount='indefinite' /%3E%3C/circle%3E%3C/g%3E%3C/svg%3E";

const errorSvg = "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' width='256' height='256' viewBox='0 0 256 256' xml:space='preserve'%3E%3Cdefs%3E%3C/defs%3E%3Cg style='stroke: none; stroke-width: 0; stroke-dasharray: none; stroke-linecap: butt; stroke-linejoin: miter; stroke-miterlimit: 10; fill: none; fill-rule: nonzero; opacity: 1;' transform='translate(1.4065934065934016 1.4065934065934016) scale(2.81 2.81)' %3E%3Cpath d='M 85.429 85.078 H 4.571 c -1.832 0 -3.471 -0.947 -4.387 -2.533 c -0.916 -1.586 -0.916 -3.479 0 -5.065 L 40.613 7.455 C 41.529 5.869 43.169 4.922 45 4.922 c 0 0 0 0 0 0 c 1.832 0 3.471 0.947 4.386 2.533 l 40.429 70.025 c 0.916 1.586 0.916 3.479 0.001 5.065 C 88.901 84.131 87.261 85.078 85.429 85.078 z M 45 7.922 c -0.747 0 -1.416 0.386 -1.79 1.033 L 2.782 78.979 c -0.373 0.646 -0.373 1.419 0 2.065 c 0.374 0.647 1.042 1.033 1.789 1.033 h 80.858 c 0.747 0 1.416 -0.387 1.789 -1.033 s 0.373 -1.419 0 -2.065 L 46.789 8.955 C 46.416 8.308 45.747 7.922 45 7.922 L 45 7.922 z M 45 75.325 c -4.105 0 -7.446 -3.34 -7.446 -7.445 s 3.34 -7.445 7.446 -7.445 s 7.445 3.34 7.445 7.445 S 49.106 75.325 45 75.325 z M 45 63.435 c -2.451 0 -4.446 1.994 -4.446 4.445 s 1.995 4.445 4.446 4.445 s 4.445 -1.994 4.445 -4.445 S 47.451 63.435 45 63.435 z M 45 57.146 c -3.794 0 -6.882 -3.087 -6.882 -6.882 V 34.121 c 0 -3.794 3.087 -6.882 6.882 -6.882 c 3.794 0 6.881 3.087 6.881 6.882 v 16.144 C 51.881 54.06 48.794 57.146 45 57.146 z M 45 30.239 c -2.141 0 -3.882 1.741 -3.882 3.882 v 16.144 c 0 2.141 1.741 3.882 3.882 3.882 c 2.14 0 3.881 -1.741 3.881 -3.882 V 34.121 C 48.881 31.98 47.14 30.239 45 30.239 z' style='stroke: none; stroke-width: 1; stroke-dasharray: none; stroke-linecap: butt; stroke-linejoin: miter; stroke-miterlimit: 10; fill: rgb(0,0,0); fill-rule: nonzero; opacity: 1;' transform=' matrix(1 0 0 1 0 0) ' stroke-linecap='round' /%3E%3C/g%3E%3C/svg%3E";
</script>

</body>
</html>
14 changes: 5 additions & 9 deletions js/hyperaudio-lite-editor-deepgram.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ class DeepgramService extends HTMLElement {

clearFilePicker(event) {
event.preventDefault();
document.querySelector('#file').value = "";
document.querySelector('#deepgram-file').value = "";
}

updatePlayerWithLocalFile(event) {
const file = document.querySelector('[name=file]').files[0];
const file = document.querySelector('#deepgram-file').files[0];
// Create a new FileReader instance
const reader = new FileReader();

Expand Down Expand Up @@ -153,7 +153,7 @@ class DeepgramService extends HTMLElement {
const model = document.querySelector('#language-model').value;
let media = document.querySelector('#media').value;
const token = document.querySelector('#token').value;
const file = document.querySelector('[name=file]').files[0];
const file = document.querySelector('#deepgram-file').files[0];
let tier = document.querySelector('#tier').value;

if (media.toLowerCase().startsWith("https://") === false && media.toLowerCase().startsWith("http://") === false) {
Expand Down Expand Up @@ -217,10 +217,10 @@ class DeepgramService extends HTMLElement {
customElements.define('deepgram-service', DeepgramService);

function addModalEventListeners(modal) {
document.querySelector('#file').addEventListener('change',modal.clearMediaUrl);
document.querySelector('#deepgram-file').addEventListener('change',modal.clearMediaUrl);
document.querySelector('#media').addEventListener('change',modal.clearFilePicker);
document.querySelector('#transcribe-btn').addEventListener('click', modal.getData);
document.querySelector('#file').addEventListener('change', modal.updatePlayerWithLocalFile);
document.querySelector('#deepgram-file').addEventListener('change', modal.updatePlayerWithLocalFile);
document.querySelector('#language-model').addEventListener('change', modal.updateDropdowns);
document.querySelector('#language-model').addEventListener('change', modal.updateTierDropdown);
document.querySelector('#language').addEventListener('change', modal.updateTierDropdown);
Expand Down Expand Up @@ -678,7 +678,3 @@ function populateLanguageDeepgramRestricted() {

document.querySelector("#tier").disabled=false;
}

const transcribingSvg = "data:image/svg+xml,%3Csvg width='45' height='45' viewBox='0 0 45 45' xmlns='http://www.w3.org/2000/svg' stroke='%23000'%3E%3Cg fill='none' fill-rule='evenodd' transform='translate(1 1)' stroke-width='2'%3E%3Ccircle cx='22' cy='22' r='6' stroke-opacity='0'%3E%3Canimate attributeName='r' begin='1.5s' dur='3s' values='6;22' calcMode='linear' repeatCount='indefinite' /%3E%3Canimate attributeName='stroke-opacity' begin='1.5s' dur='3s' values='1;0' calcMode='linear' repeatCount='indefinite' /%3E%3Canimate attributeName='stroke-width' begin='1.5s' dur='3s' values='2;0' calcMode='linear' repeatCount='indefinite' /%3E%3C/circle%3E%3Ccircle cx='22' cy='22' r='6' stroke-opacity='0'%3E%3Canimate attributeName='r' begin='3s' dur='3s' values='6;22' calcMode='linear' repeatCount='indefinite' /%3E%3Canimate attributeName='stroke-opacity' begin='3s' dur='3s' values='1;0' calcMode='linear' repeatCount='indefinite' /%3E%3Canimate attributeName='stroke-width' begin='3s' dur='3s' values='2;0' calcMode='linear' repeatCount='indefinite' /%3E%3C/circle%3E%3Ccircle cx='22' cy='22' r='8'%3E%3Canimate attributeName='r' begin='0s' dur='1.5s' values='6;1;2;3;4;5;6' calcMode='linear' repeatCount='indefinite' /%3E%3C/circle%3E%3C/g%3E%3C/svg%3E";

const errorSvg = "data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' width='256' height='256' viewBox='0 0 256 256' xml:space='preserve'%3E%3Cdefs%3E%3C/defs%3E%3Cg style='stroke: none; stroke-width: 0; stroke-dasharray: none; stroke-linecap: butt; stroke-linejoin: miter; stroke-miterlimit: 10; fill: none; fill-rule: nonzero; opacity: 1;' transform='translate(1.4065934065934016 1.4065934065934016) scale(2.81 2.81)' %3E%3Cpath d='M 85.429 85.078 H 4.571 c -1.832 0 -3.471 -0.947 -4.387 -2.533 c -0.916 -1.586 -0.916 -3.479 0 -5.065 L 40.613 7.455 C 41.529 5.869 43.169 4.922 45 4.922 c 0 0 0 0 0 0 c 1.832 0 3.471 0.947 4.386 2.533 l 40.429 70.025 c 0.916 1.586 0.916 3.479 0.001 5.065 C 88.901 84.131 87.261 85.078 85.429 85.078 z M 45 7.922 c -0.747 0 -1.416 0.386 -1.79 1.033 L 2.782 78.979 c -0.373 0.646 -0.373 1.419 0 2.065 c 0.374 0.647 1.042 1.033 1.789 1.033 h 80.858 c 0.747 0 1.416 -0.387 1.789 -1.033 s 0.373 -1.419 0 -2.065 L 46.789 8.955 C 46.416 8.308 45.747 7.922 45 7.922 L 45 7.922 z M 45 75.325 c -4.105 0 -7.446 -3.34 -7.446 -7.445 s 3.34 -7.445 7.446 -7.445 s 7.445 3.34 7.445 7.445 S 49.106 75.325 45 75.325 z M 45 63.435 c -2.451 0 -4.446 1.994 -4.446 4.445 s 1.995 4.445 4.446 4.445 s 4.445 -1.994 4.445 -4.445 S 47.451 63.435 45 63.435 z M 45 57.146 c -3.794 0 -6.882 -3.087 -6.882 -6.882 V 34.121 c 0 -3.794 3.087 -6.882 6.882 -6.882 c 3.794 0 6.881 3.087 6.881 6.882 v 16.144 C 51.881 54.06 48.794 57.146 45 57.146 z M 45 30.239 c -2.141 0 -3.882 1.741 -3.882 3.882 v 16.144 c 0 2.141 1.741 3.882 3.882 3.882 c 2.14 0 3.881 -1.741 3.881 -3.882 V 34.121 C 48.881 31.98 47.14 30.239 45 30.239 z' style='stroke: none; stroke-width: 1; stroke-dasharray: none; stroke-linecap: butt; stroke-linejoin: miter; stroke-miterlimit: 10; fill: rgb(0,0,0); fill-rule: nonzero; opacity: 1;' transform=' matrix(1 0 0 1 0 0) ' stroke-linecap='round' /%3E%3C/g%3E%3C/svg%3E";
Loading

0 comments on commit 49c6f60

Please sign in to comment.