From 24254a7d6e79225126aa95ca7be29ebdc5910eda Mon Sep 17 00:00:00 2001
From: Mark Boas
Date: Fri, 11 Feb 2022 18:10:39 +0100
Subject: [PATCH 1/3] 73 added ability to download captions
---
wp-hyperaudio/hyperaudio-admin.php | 31 +-
wp-hyperaudio/js/caption.js | 698 +++++++++++++++++++----------
wp-hyperaudio/js/converter.js | 21 +
3 files changed, 517 insertions(+), 233 deletions(-)
diff --git a/wp-hyperaudio/hyperaudio-admin.php b/wp-hyperaudio/hyperaudio-admin.php
index 99128ae..50c1a58 100644
--- a/wp-hyperaudio/hyperaudio-admin.php
+++ b/wp-hyperaudio/hyperaudio-admin.php
@@ -15,6 +15,7 @@ function hyperaudio_load_admin_script($hook)
return;
}
+ wp_enqueue_script('caption', plugins_url('/js/caption.js', __FILE__), false, '1.0.0', false);
wp_enqueue_script( 'converter', plugin_dir_url( __FILE__ ) . '/js/converter.js', array( 'jquery' ), '1.0.0', true );
}
@@ -143,6 +144,21 @@ function hyperaudio_options_page()
padding: 4px;
}
+ .sub-holder {
+ font-size: 80%;
+ padding-top: 16px;
+ display: none;
+ }
+
+ .sub-btn {
+ margin-right: 16px;
+ }
+
+ .sub-download {
+ padding-right: 16px;
+ display: none;
+ }
+
@@ -266,13 +282,18 @@ function hyperaudio_options_page()
-
+
+
+
+
+
-
-
-
+
\ No newline at end of file
diff --git a/wp-hyperaudio/js/caption.js b/wp-hyperaudio/js/caption.js
index a860f12..f7aabed 100644
--- a/wp-hyperaudio/js/caption.js
+++ b/wp-hyperaudio/js/caption.js
@@ -1,290 +1,532 @@
-/*! (C) The Hyperaudio Project. MIT @license: en.wikipedia.org/wiki/MIT_License. */
-'use strict';
-
-var caption = function () {
- var cap = {};
-
- function formatSeconds(seconds) {
- if (typeof seconds == 'number') {
- return new Date(seconds.toFixed(3) * 1000).toISOString().substring(11,23);
- } else {
- console.log('warning - attempting to format the non number: ' + seconds);
- return null;
+var $ = jQuery; // needed for wordpress
+
+$(document).ready(function() {
+ var p = document.getElementById('para-split');
+ var cp = document.getElementById('current-para-split');
+ var paraSplitTime = p.value;
+ var paraPunct = $('#para-punctuation').prop('checked');
+
+ p.addEventListener(
+ 'input',
+ function() {
+ cp.innerHTML = p.value;
+ paraSplitTime = p.value;
+ },
+ false
+ );
+
+ $('#para-punctuation').change(function() {
+ if (this.checked) {
+ paraPunct = $('#para-punctuation').prop('checked');
}
- }
+ });
- function convertTimecodeToSrt(timecode) {
- //the same as VTT format but milliseconds separated by a comma
- return timecode.substring(0,8) + "," + timecode.substring(9,12);
- }
+ $('#markup-view').click(function() {
+ $('#rendered-view').addClass('inactive');
+ $(this).removeClass('inactive');
+ $('#rtranscript').hide();
- cap.init = function (transcriptId, playerId, maxLength, minLength) {
- var transcript = document.getElementById(transcriptId);
- var words = transcript.querySelectorAll('[data-m]');
- var data = {};
- data.segments = [];
- var segmentIndex = 0;
-
- function segmentMeta(speaker, start, duration, chars) {
- this.speaker = speaker;
- this.start = start;
- this.duration = duration;
- this.chars = chars;
- this.words = [];
- }
+ var regex = /\span>(.*?)\ 0) {
+ strToMatch = strToMatch.replace(""+matches[1], matches[1]+"");
+ }
}
- var thisWordMeta;
- var thisSegmentMeta = null;
+ $('#htranscript').val(strToMatch);
+ $('#htranscript').show();
+ return false;
+ });
+
+ $('#rendered-view').click(function() {
+ $('#markup-view').addClass('inactive');
+ $(this).removeClass('inactive');
+ $('#htranscript').hide();
+ $('#rtranscript').html("rendering...");
+ $('#rtranscript').show();
+
+ setTimeout(renderTranscript, 100);
+
+ return false;
+ });
+
+ function renderTranscript() {
+ $('#rtranscript').html($('#htranscript').val());
+
+ //document.getElementById("gen-subs").addEventListener("click", genSubs);
+ $('#gen-subs').click(genSubs);
+ //document.getElementById("generate-captions").style.display = 'inline';
+ $('#generate-captions').show();
+
+ function genSubs(){
+ var cap1 = caption();
+ var subs = cap1.init("rtranscript", null, null, null);
+ //console.log(subs.vtt);
+ //console.log(subs.srt);
+ //var downloadLinkVtt = document.getElementById("download-vtt");
+ //downloadLinkVtt.setAttribute("href", 'data:text/vtt,'+encodeURIComponent(subs.vtt));
+ //downloadLinkVtt.style.display = 'inline';
+ $('#download-vtt').attr("href", 'data:text/vtt,'+encodeURIComponent(subs.vtt)).show();
+
+ //var downloadLinkSrt = document.getElementById("download-srt");
+ //downloadLinkSrt.setAttribute("href", 'data:text/vtt,'+encodeURIComponent(subs.srt));
+ //downloadLinkSrt.style.display = 'inline';
+ $('#download-srt').attr("href", 'data:text/vtt,'+encodeURIComponent(subs.srt)).show();
+ };
+ }
+
+ String.prototype.replaceAll = function(search, replacement) {
+ var target = this;
+ return target.replace(new RegExp(search, 'g'), replacement);
+ };
- // defaults
- var maxLineLength = 37;
- var minLineLength = 21;
+ // From popcorn.parserSRT.js
- var captionsVtt = 'WEBVTT\n';
- var captionsSrt = '';
+ function parseSRT(data) {
- var endSentenceDelimiter = /[\.。?؟!]/g;
- var midSentenceDelimiter = /[,、–,،و:,…‥]/g;
+ document.dispatchEvent(event);
- if (!isNaN(maxLength) && maxLength != null) {
- maxLineLength = maxLength;
- }
+ var i = 0,
+ len = 0,
+ idx = 0,
+ lines,
+ time,
+ text,
+ sub;
- if (!isNaN(minLength) && minLength != null) {
- minLineLength = minLength;
- }
+ // Simple function to convert HH:MM:SS,MMM or HH:MM:SS.MMM to SS.MMM
+ // Assume valid, returns 0 on error
- var lastSpeaker = '';
+ var toSeconds = function(t_in) {
+ var t = t_in.split(':');
- words.forEach(function (word, i) {
- if (thisSegmentMeta === null) {
- // create segment meta object
- thisSegmentMeta = new segmentMeta('', null, 0, 0, 0);
- }
+ try {
+ var s = t[2].split(',');
- if (word.classList.contains('speaker')) {
- // checking that this is not a new segment AND a new empty segment wasn't already created
- if (thisSegmentMeta !== null && thisSegmentMeta.start !== null) {
- //console.log("pushing...");
- //console.log(thisSegmentMeta);
- data.segments.push(thisSegmentMeta); // push the previous segment because it's a new speaker
- thisSegmentMeta = new segmentMeta('', null, 0, 0, 0);
+ // Just in case a . is decimal seperator
+ if (s.length === 1) {
+ s = t[2].split('.');
}
- thisSegmentMeta.speaker = word.innerText;
- } else {
- var thisStart = parseInt(word.getAttribute('data-m')) / 1000;
- var thisDuration = parseInt(word.getAttribute('data-d')) / 1000;
+ return (
+ parseFloat(t[0], 10) * 3600 +
+ parseFloat(t[1], 10) * 60 +
+ parseFloat(s[0], 10) +
+ parseFloat(s[1], 10) / 1000
+ );
+ } catch (e) {
+ return 0;
+ }
+ };
- if (isNaN(thisStart)) {
- thisStart = 0;
- }
+ var outputString = '';
+ var lineBreaks = $('#line-breaks').prop('checked');
+ var ltime = 0;
+ var ltext;
- if (isNaN(thisDuration)) {
- thisDuration = 0;
- }
+ // Here is where the magic happens
+ // Split on line breaks
+ lines = data.split(/(?:\r\n|\r|\n)/gm);
+ len = lines.length;
+
+ for (i = 0; i < len; i++) {
+ sub = {};
+ text = [];
- var thisText = word.innerText;
+ sub.id = parseInt(lines[i++], 10);
+
+ // Split on '-->' delimiter, trimming spaces as well
+
+ try {
+ time = lines[i++].split(/[\t ]*-->[\t ]*/);
+ } catch (e) {
+ alert('Warning. Possible issue on line ' + i + ": '" + lines[i] + "'.");
+ break;
+ }
- thisWordMeta = new wordMeta(thisStart, thisDuration, thisText);
+ sub.start = toSeconds(time[0]);
- if (thisSegmentMeta.start === null) {
- thisSegmentMeta.start = thisStart;
- thisSegmentMeta.duration = 0;
- thisSegmentMeta.chars = 0;
+ // So as to trim positioning information from end
+ if (!time[1]) {
+ alert('Warning. Issue on line ' + i + ": '" + lines[i] + "'.");
+ return;
+ }
+
+ idx = time[1].indexOf(' ');
+ if (idx !== -1) {
+ time[1] = time[1].substr(0, idx);
+ }
+ sub.end = toSeconds(time[1]);
+
+ // Build single line of text from multi-line subtitle in file
+ while (i < len && lines[i]) {
+ text.push(lines[i++]);
+ }
+
+ // Join into 1 line, SSA-style linebreaks
+ // Strip out other SSA-style tags
+ sub.text = text.join('\\N').replace(/\{(\\[\w]+\(?([\w\d]+,?)+\)?)+\}/gi, '');
+
+ // Escape HTML entities
+ sub.text = sub.text.replace(//g, '>');
+
+ // Unescape great than and less than when it makes a valid html tag of a supported style (font, b, u, s, i)
+ // Modified version of regex from Phil Haack's blog: http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx
+ // Later modified by kev: http://kevin.deldycke.com/2007/03/ultimate-regular-expression-for-html-tag-parsing-with-php/
+ sub.text = sub.text.replace(
+ /<(\/?(font|b|u|i|s))((\s+(\w|\w[\w\-]*\w)(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)(\/?)>/gi,
+ '<$1$3$7>'
+ );
+ //sub.text = sub.text.replace( /\\N/gi, "
" );
+ sub.text = sub.text.replace(/\\N/gi, ' ');
+
+ var splitMode = 0;
+
+ var wordLengthSplit = $('#word-length').prop('checked');
+
+ // enhancements to take account of word length
+
+ var swords = sub.text.split(' ');
+ var sduration = sub.end - sub.start;
+ var stimeStep = sduration / swords.length;
+
+ // determine length of words
+
+ var swordLengths = [];
+ var swordTimes = [];
+
+ var totalLetters = 0;
+ for (var si = 0, sl = swords.length; si < sl; ++si) {
+ totalLetters = totalLetters + swords[si].length;
+ swordLengths[si] = swords[si].length;
+ }
+
+ var letterTime = sduration / totalLetters;
+ var wordStart = 0;
+
+ for (var si = 0, sl = swords.length; si < sl; ++si) {
+ var wordTime = swordLengths[si] * letterTime;
+ var stime;
+ if (wordLengthSplit) {
+ stime = Math.round((sub.start + si * stimeStep) * 1000);
+
+ document.dispatchEvent(event);
+ } else {
+ stime = Math.round((wordStart + sub.start) * 1000);
+
+ document.dispatchEvent(event);
}
- thisSegmentMeta.duration += thisDuration;
- thisSegmentMeta.chars += thisText.length;
+ wordStart = wordStart + wordTime;
+ var stext = swords[si];
- thisSegmentMeta.words.push(thisWordMeta);
+ if (stime - ltime > paraSplitTime * 1000 && paraSplitTime > 0) {
- // remove spaces first just in case
- var lastChar = thisText.replace(/\s/g, '').slice(-1);
- if (lastChar.match(endSentenceDelimiter)) {
- data.segments.push(thisSegmentMeta);
- thisSegmentMeta = null;
+ var punctPresent =
+ ltext && (ltext.indexOf('.') > 0 || ltext.indexOf('?') > 0 || ltext.indexOf('!') > 0);
+ if (!paraPunct || (paraPunct && punctPresent)) {
+ outputString += '
';
+ }
}
- }
- });
- //console.log(data.segments);
+ outputString += '' + stext + ' ';
+
+ ltime = stime;
+ ltext = stext;
- function captionMeta(start, stop, text) {
- this.start = start;
- this.stop = stop;
- this.text = text;
+ if (lineBreaks) outputString = outputString + '\n';
+ }
}
+ return outputString + '
';
+ }
- var captions = [];
- var thisCaption = null;
+ $('#transform').click(function() {
+ $('#transform-spinner').show();
+ $('#htranscript').val("converting...");
+ setTimeout(generateTranscript, 100);
+ });
- data.segments.map(function (segment) {
- // If the entire segment fits on a line, add it to the captions.
- if (segment.chars < maxLineLength) {
- thisCaption = new captionMeta(
- formatSeconds(segment.start),
- formatSeconds(segment.start + segment.duration),
- '',
- );
+ function generateTranscript() {
- segment.words.forEach(function (wordMeta) {
- thisCaption.text += wordMeta.text;
- });
+ var input = $('#subtitles').val();
- thisCaption.text += '\n';
- //console.log("0. pushing because the whole segment fits on a line!");
- //console.log(thisCaption);
- captions.push(thisCaption);
- thisCaption = null;
- } else {
- // The number of chars in this segment is longer than our single line maximum
-
- var charCount = 0;
- var lineText = '';
- var firstLine = true;
- var lastOutTime;
- var lastInTime = null;
-
- segment.words.forEach(function (wordMeta, index) {
- var lastChar = wordMeta.text.replace(/\s/g, '').slice(-1);
-
- if (lastInTime === null) {
- // if it doesn't exist yet set the caption start time to the word's start time.
- lastInTime = wordMeta.start;
- }
+ var ht;
- // Are we over the minimum length of a line and hitting a good place to split mid-sentence?
- if (charCount + wordMeta.text.length > minLineLength && lastChar.match(midSentenceDelimiter)) {
- if (firstLine === true) {
- thisCaption = new captionMeta(
- formatSeconds(lastInTime),
- formatSeconds(wordMeta.start + wordMeta.duration),
- '',
- );
- thisCaption.text += lineText + wordMeta.text + '\n';
+ var format = $('#format-select').val();
- //check for last word in segment, if it is we can push a one line caption, if not – move on to second line
+ switch (format) {
- if (index + 1 >= segment.words.length) {
- //console.log("1. pushing because we're at a good place to split, we're on the first line but it's the last word of the segment.");
- //console.log(thisCaption);
- captions.push(thisCaption);
- thisCaption = null;
- } else {
- firstLine = false;
- }
+ case 'oe':
+ var data = JSON.parse(input);
+ var items = ['\n'];
+ $.each(data.content.paragraphs, function(key, val) {
+ var paraStart = Math.round(val.start*1000);
+ items.push(
+ '\n' +
+ val.speaker +
+ ' '
+ );
+
+ var lastStart = 0;
+
+ $.each(val.words, function(k, v) {
+ if (typeof v.start !== 'undefined') {
+ items.push(
+ '\n' +
+ v.text +
+ ' '
+ );
+ lastStart = v.start;
} else {
- // We're on the second line ... we're over the minimum chars and in a good place to split – let's push the caption
-
- thisCaption.stop = formatSeconds(wordMeta.start + wordMeta.duration);
- thisCaption.text += lineText + wordMeta.text;
- //console.log("2. pushing because we're on the second line and have a good place to split");
- //console.log(thisCaption);
- captions.push(thisCaption);
- thisCaption = null;
- firstLine = true;
+
+ if (k === 0) {
+ lastStart = paraStart;
+ }
+
+ items.push(
+ '\n' +
+ v.text +
+ ' '
+ );
}
+ });
+ items.push('
');
+ });
- // whether first line or not we should reset ready for a new caption
- charCount = 0;
- lineText = '';
- lastInTime = null;
- } else {
- // we're not over the minimum length with a suitable splitting point
+ items.push('
');
+
+ ht = items.join('');
+
+ // remove empty paras
+
+ ht = ht.split("").join("");
+
+ break;
+
+ case 'google':
+ var data = JSON.parse(input);
+
+ var items = [''];
+
+ $.each(data.response.results, function(key, val) {
+ $.each(val.alternatives, function(k, v) {
+ for (var i = 0; i < v.words.length; i++) {
+ items.push(
+ '' +
+ v.words[i].word +
+ ' '
+ );
+
+
+ if (i > 0 && Math.round(parseFloat(v.words[i].startTime)) - Math.round(parseFloat(v.words[i-1].startTime)) > paraSplitTime && paraSplitTime > 0) {
+ items.push('
');
+ }
+ }
+ });
+ });
- // If we add this word are we over the maximum?
- if (charCount + wordMeta.text.length > maxLineLength) {
- if (firstLine === true) {
- if (lastOutTime === undefined) {
- lastOutTime = wordMeta.start + wordMeta.duration;
+ items.push('
');
+
+ ht = items.join('');
+ break;
+
+ case 'speechmatics':
+ var data = JSON.parse(input);
+ var items = [''];
+ $.each(data, function(key, val) {
+ if (key == 'words') {
+ for (var i = 0; i < val.length; i++) {
+ var punct = "";
+ if ((i+1) < val.length && val[i+1].name === ".") {
+ punct = ".";
+ }
+
+ if (val[i].name !== ".") {
+ items.push(
+ '' +
+ val[i].name + punct +
+ ' '
+ );
+ }
+
+ if (i > 0 && Math.round(parseFloat(val[i].time)) - Math.round(parseFloat(val[i-1].time)) > paraSplitTime && paraSplitTime > 0) {
+ if ((paraPunct && punct === ".") || (paraPunct === false)) {
+ items.push('
');
}
+ }
+ }
+ }
+ });
- thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(lastOutTime), '');
- thisCaption.text += lineText + '\n';
+ items.push('
');
- // It's just the first line so we should only push a new caption if it's the very last word!
+ ht = items.join('');
+ break;
- if (index >= segment.words.length) {
- captions.push(thisCaption);
- thisCaption = null;
- } else {
- firstLine = false;
- }
- } else {
- // We're on the second line and since we're over the maximum with the next word we should push this caption!
+ case 'gentle':
+ var data = JSON.parse(input);
- thisCaption.stop = formatSeconds(lastOutTime);
- thisCaption.text += lineText;
+ wds = data['words'] || [];
+ transcript = data['transcript'];
- captions.push(thisCaption);
+ var trans = document.createElement('p');
- thisCaption = null;
- firstLine = true;
- }
+ trans.innerHTML = '';
+
+ var currentOffset = 0;
+ var wordCounter = 0;
+ var lastOutTime = 0;
+
+ wds.forEach(function(wd) {
+ // Add non-linked text
+
+ var newlineDetected = false;
+
+ if (wd.startOffset > currentOffset) {
+ var txt = transcript.slice(currentOffset, wd.startOffset);
+ newlineDetected = /\r|\n/.exec(txt);
- // do the stuff we need to do to start a new line
- charCount = wordMeta.text.length;
- lineText = wordMeta.text;
- lastInTime = wordMeta.start; // Why do we do this??????
+ if (trans.lastChild) {
+ trans.lastChild.text += txt + " ";
} else {
- // We're not over the maximum with this word, update the line length and add the word to the text
+ // this happens only at the beginning when offset not zero
+ var span = document.createElement('span');
+ var initialWd = document.createTextNode(txt + " ");
+ var initialDatam = document.createAttribute('data-m');
+ var initialDatad = document.createAttribute('data-d');
+
+ span.appendChild(initialWd);
+ initialDatam.value = 0;
+ initialDatad.value = 0;
+ span.setAttributeNode(initialDatam);
+ span.setAttributeNode(initialDatad);
+ trans.appendChild(span);
+ trans.appendChild(span);
+ }
- charCount += wordMeta.text.length;
- lineText += wordMeta.text;
+ if (newlineDetected) {
+ var lineBreak = document.createElement('br');
+ trans.appendChild(lineBreak);
}
+ currentOffset = wd.startOffset;
}
- // for every word update the lastOutTime
- lastOutTime = wordMeta.start + wordMeta.duration;
- });
+ var datam = document.createAttribute('data-m');
+ var datad = document.createAttribute('data-d');
+
+ var word = document.createElement('span');
+ var txt = transcript.slice(wd.startOffset, wd.endOffset+1);
+
+ if (!txt.endsWith(" ")){
+ txt = txt + " ";
+ }
+
+ var wordText = document.createTextNode(txt);
+ word.appendChild(wordText);
+
+ if (wd.start !== undefined) {
+ datam.value = Math.floor(wd.start * 1000);
+ datad.value = Math.floor((wd.end - wd.start) * 1000);
+ } else {
+ // look ahead to the next timed word
+ for (var i = wordCounter; i < wds.length - 1; i++) {
+ if (wds[i + 1].start !== undefined) {
+ datam.value = Math.floor(wds[i + 1].start * 1000);
+ break;
+ }
+ }
+ datad.value = '100'; // default duration when not known
+ }
- // we're out of words for this segment - decision time!
- if (thisCaption !== null) {
- // The caption had been started, time to add whatever text we have and add a stop point
- thisCaption.stop = formatSeconds(lastOutTime);
- thisCaption.text += lineText;
- //console.log("3. pushing at end of segment when new caption HAS BEEN created");
- //console.log(thisCaption);
- captions.push(thisCaption);
- thisCaption = null;
- } else {
- // caption hadn't been started yet - create one!
- if (lastInTime !== null) {
- thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(lastOutTime), lineText);
- //console.log("4. pushing at end of segment when new caption has yet to be created");
- //console.log(thisCaption);
- captions.push(thisCaption);
- thisCaption = null;
+ if (datam.value < lastOutTime) {
+ datam.value = lastOutTime + 1;
}
- }
- }
- });
- captions.forEach(function (caption, i) {
- captionsVtt += '\n' + caption.start + ' --> ' + caption.stop + '\n' + caption.text + '\n';
- captionsSrt += '\n' + (i + 1) + '\n' + convertTimecodeToSrt(caption.start) + ' --> ' + convertTimecodeToSrt(caption.stop) + '\n' + caption.text + '\n';
- });
+ word.setAttributeNode(datam);
+ word.setAttributeNode(datad);
- var trackElement = document.getElementById(playerId+'-vtt');
+ lastOutTime = parseInt(datam.value) + parseInt(datad.value);
- if (trackElement !== null) {
- trackElement.setAttribute("src", 'data:text/vtt,'+encodeURIComponent(captionsVtt));
- }
+ trans.appendChild(word);
+
+ currentOffset = wd.endOffset;
+ wordCounter++;
+ });
- function captionsObj(vtt, srt) {
- this.vtt = vtt;
- this.srt = srt;
+ var txt = transcript.slice(currentOffset, transcript.length);
+ var word = document.createTextNode(txt);
+ trans.appendChild(word);
+ currentOffset = transcript.length;
+
+ article = document.createElement('article');
+ section = document.createElement('section');
+
+ section.appendChild(trans);
+ article.appendChild(section);
+
+ ht = article.outerHTML;
+
+ //newlines can cause issues within HTML tags
+ ht = ht.replace(/(?:\r\n|\r|\n)/g, '');
+
+ ht = ht.replace(new RegExp('
', 'g'), '
');
+
+ // replace all unneeded empty paras
+ ht = ht.replace(new RegExp('
', 'g'), '');
+
+ break;
+
+ case 'srt':
+ ht = parseSRT(input);
+ break;
+
+ case 'other':
+ var xmlString = input,
+ parser = new DOMParser(),
+ doc = parser.parseFromString(xmlString, 'text/xml');
+
+ var transcript = doc.getElementsByTagName('section')[0];
+
+ for (var i = 0; i < doc.getElementsByClassName('speaker').length; i++) {
+ transcript.getElementsByClassName('speaker')[i].innerHTML =
+ '[' +
+ transcript.getElementsByClassName('speaker')[i].innerHTML.replace(': ', '') +
+ '] ';
+ var datam = document.createAttribute('data-m');
+ var datad = document.createAttribute('data-d');
+ datam.value = transcript
+ .getElementsByClassName('speaker')
+ [i].nextElementSibling.getAttribute('data-m');
+ datad.value = '1';
+ transcript.getElementsByClassName('speaker')[i].setAttributeNode(datam);
+ transcript.getElementsByClassName('speaker')[i].setAttributeNode(datad);
+ }
+
+ var transcriptText = transcript.outerHTML;
+
+ ht = '' + transcriptText + '';
}
- return new captionsObj(captionsVtt, captionsSrt);
- };
+ $('#htranscript').val(ht);
+ $('#rtranscript').html(ht);
- return cap;
-};
+ $('#transform-spinner').hide();
+ return false;
+ }
+});
\ No newline at end of file
diff --git a/wp-hyperaudio/js/converter.js b/wp-hyperaudio/js/converter.js
index 4a8fd14..f7aabed 100644
--- a/wp-hyperaudio/js/converter.js
+++ b/wp-hyperaudio/js/converter.js
@@ -55,6 +55,27 @@ $(document).ready(function() {
function renderTranscript() {
$('#rtranscript').html($('#htranscript').val());
+
+ //document.getElementById("gen-subs").addEventListener("click", genSubs);
+ $('#gen-subs').click(genSubs);
+ //document.getElementById("generate-captions").style.display = 'inline';
+ $('#generate-captions').show();
+
+ function genSubs(){
+ var cap1 = caption();
+ var subs = cap1.init("rtranscript", null, null, null);
+ //console.log(subs.vtt);
+ //console.log(subs.srt);
+ //var downloadLinkVtt = document.getElementById("download-vtt");
+ //downloadLinkVtt.setAttribute("href", 'data:text/vtt,'+encodeURIComponent(subs.vtt));
+ //downloadLinkVtt.style.display = 'inline';
+ $('#download-vtt').attr("href", 'data:text/vtt,'+encodeURIComponent(subs.vtt)).show();
+
+ //var downloadLinkSrt = document.getElementById("download-srt");
+ //downloadLinkSrt.setAttribute("href", 'data:text/vtt,'+encodeURIComponent(subs.srt));
+ //downloadLinkSrt.style.display = 'inline';
+ $('#download-srt').attr("href", 'data:text/vtt,'+encodeURIComponent(subs.srt)).show();
+ };
}
String.prototype.replaceAll = function(search, replacement) {
From 4bc6cf505eedb84a794f4f82667a112c2cf0e3a2 Mon Sep 17 00:00:00 2001
From: Mark Boas
Date: Fri, 11 Feb 2022 18:26:21 +0100
Subject: [PATCH 2/3] fix for caption.js overwrite with converter.js
---
wp-hyperaudio/js/caption.js | 669 +++++++++++-------------------------
1 file changed, 207 insertions(+), 462 deletions(-)
diff --git a/wp-hyperaudio/js/caption.js b/wp-hyperaudio/js/caption.js
index f7aabed..ed580e1 100644
--- a/wp-hyperaudio/js/caption.js
+++ b/wp-hyperaudio/js/caption.js
@@ -1,532 +1,277 @@
-var $ = jQuery; // needed for wordpress
-
-$(document).ready(function() {
- var p = document.getElementById('para-split');
- var cp = document.getElementById('current-para-split');
- var paraSplitTime = p.value;
- var paraPunct = $('#para-punctuation').prop('checked');
-
- p.addEventListener(
- 'input',
- function() {
- cp.innerHTML = p.value;
- paraSplitTime = p.value;
- },
- false
- );
-
- $('#para-punctuation').change(function() {
- if (this.checked) {
- paraPunct = $('#para-punctuation').prop('checked');
- }
- });
-
- $('#markup-view').click(function() {
- $('#rendered-view').addClass('inactive');
- $(this).removeClass('inactive');
- $('#rtranscript').hide();
+'use strict';
- var regex = /\span>(.*?)\ 0) {
- strToMatch = strToMatch.replace(""+matches[1], matches[1]+"");
- }
+ function formatSeconds(seconds) {
+ if(typeof seconds == 'number'){
+ return new Date(seconds.toFixed(3) * 1000).toISOString().substr(11, 12);
+ } else {
+ console.log("warning - attempting to format the non number: "+seconds);
+ return null;
}
-
- $('#htranscript').val(strToMatch);
- $('#htranscript').show();
- return false;
- });
-
- $('#rendered-view').click(function() {
- $('#markup-view').addClass('inactive');
- $(this).removeClass('inactive');
- $('#htranscript').hide();
- $('#rtranscript').html("rendering...");
- $('#rtranscript').show();
-
- setTimeout(renderTranscript, 100);
-
- return false;
- });
-
- function renderTranscript() {
- $('#rtranscript').html($('#htranscript').val());
-
- //document.getElementById("gen-subs").addEventListener("click", genSubs);
- $('#gen-subs').click(genSubs);
- //document.getElementById("generate-captions").style.display = 'inline';
- $('#generate-captions').show();
-
- function genSubs(){
- var cap1 = caption();
- var subs = cap1.init("rtranscript", null, null, null);
- //console.log(subs.vtt);
- //console.log(subs.srt);
- //var downloadLinkVtt = document.getElementById("download-vtt");
- //downloadLinkVtt.setAttribute("href", 'data:text/vtt,'+encodeURIComponent(subs.vtt));
- //downloadLinkVtt.style.display = 'inline';
- $('#download-vtt').attr("href", 'data:text/vtt,'+encodeURIComponent(subs.vtt)).show();
-
- //var downloadLinkSrt = document.getElementById("download-srt");
- //downloadLinkSrt.setAttribute("href", 'data:text/vtt,'+encodeURIComponent(subs.srt));
- //downloadLinkSrt.style.display = 'inline';
- $('#download-srt').attr("href", 'data:text/vtt,'+encodeURIComponent(subs.srt)).show();
- };
}
- String.prototype.replaceAll = function(search, replacement) {
- var target = this;
- return target.replace(new RegExp(search, 'g'), replacement);
- };
-
- // From popcorn.parserSRT.js
-
- function parseSRT(data) {
-
- document.dispatchEvent(event);
-
- var i = 0,
- len = 0,
- idx = 0,
- lines,
- time,
- text,
- sub;
-
- // Simple function to convert HH:MM:SS,MMM or HH:MM:SS.MMM to SS.MMM
- // Assume valid, returns 0 on error
-
- var toSeconds = function(t_in) {
- var t = t_in.split(':');
-
- try {
- var s = t[2].split(',');
-
- // Just in case a . is decimal seperator
- if (s.length === 1) {
- s = t[2].split('.');
- }
-
- return (
- parseFloat(t[0], 10) * 3600 +
- parseFloat(t[1], 10) * 60 +
- parseFloat(s[0], 10) +
- parseFloat(s[1], 10) / 1000
- );
- } catch (e) {
- return 0;
- }
- };
+ cap.init = function(transcriptId, playerId, maxLength, minLength) {
+ var transcript = document.getElementById(transcriptId);
+ var words = transcript.querySelectorAll('[data-m]');
+ var data = {};
+ data.segments = [];
+ var segmentIndex = 0;
+
+ function segmentMeta(speaker, start, duration, chars) {
+ this.speaker = speaker;
+ this.start = start;
+ this.duration = duration;
+ this.chars = chars;
+ this.words = [];
+ }
- var outputString = '';
- var lineBreaks = $('#line-breaks').prop('checked');
- var ltime = 0;
- var ltext;
+ function wordMeta(start, duration, text) {
+ this.start = start;
+ this.duration = duration;
+ this.text = text;
+ }
- // Here is where the magic happens
- // Split on line breaks
- lines = data.split(/(?:\r\n|\r|\n)/gm);
- len = lines.length;
+ var thisWordMeta;
+ var thisSegmentMeta = null;
- for (i = 0; i < len; i++) {
- sub = {};
- text = [];
+ // defaults
+ var maxLineLength = 37;
+ var minLineLength = 21;
- sub.id = parseInt(lines[i++], 10);
+ var captionsVtt = "WEBVTT\n"
- // Split on '-->' delimiter, trimming spaces as well
+ var endSentenceDelimiter = /[\.。?؟!]/g;
+ var midSentenceDelimiter = /[,、–,،و:,…‥]/g;
- try {
- time = lines[i++].split(/[\t ]*-->[\t ]*/);
- } catch (e) {
- alert('Warning. Possible issue on line ' + i + ": '" + lines[i] + "'.");
- break;
- }
+ if (!isNaN(maxLength)) {
+ maxLineLength = maxLength;
+ }
- sub.start = toSeconds(time[0]);
+ if (!isNaN(minLength)) {
+ minLineLength = minLength;
+ }
- // So as to trim positioning information from end
- if (!time[1]) {
- alert('Warning. Issue on line ' + i + ": '" + lines[i] + "'.");
- return;
- }
+ var lastSpeaker = "";
+
+ words.forEach(function(word, i) {
- idx = time[1].indexOf(' ');
- if (idx !== -1) {
- time[1] = time[1].substr(0, idx);
+ if (thisSegmentMeta === null) {
+ // create segment meta object
+ thisSegmentMeta = new segmentMeta("", null, 0, 0, 0);
}
- sub.end = toSeconds(time[1]);
- // Build single line of text from multi-line subtitle in file
- while (i < len && lines[i]) {
- text.push(lines[i++]);
- }
+ if (word.classList.contains("speaker")) {
- // Join into 1 line, SSA-style linebreaks
- // Strip out other SSA-style tags
- sub.text = text.join('\\N').replace(/\{(\\[\w]+\(?([\w\d]+,?)+\)?)+\}/gi, '');
+ // checking that this is not a new segment AND a new empty segment wasn't already created
+ if (thisSegmentMeta !== null && thisSegmentMeta.start !== null) {
+ //console.log("pushing...");
+ //console.log(thisSegmentMeta);
+ data.segments.push(thisSegmentMeta); // push the previous segment because it's a new speaker
+ thisSegmentMeta = new segmentMeta("", null, 0, 0, 0);
+ }
- // Escape HTML entities
- sub.text = sub.text.replace(//g, '>');
+ thisSegmentMeta.speaker = word.innerText;
- // Unescape great than and less than when it makes a valid html tag of a supported style (font, b, u, s, i)
- // Modified version of regex from Phil Haack's blog: http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx
- // Later modified by kev: http://kevin.deldycke.com/2007/03/ultimate-regular-expression-for-html-tag-parsing-with-php/
- sub.text = sub.text.replace(
- /<(\/?(font|b|u|i|s))((\s+(\w|\w[\w\-]*\w)(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)(\/?)>/gi,
- '<$1$3$7>'
- );
- //sub.text = sub.text.replace( /\\N/gi, "
" );
- sub.text = sub.text.replace(/\\N/gi, ' ');
+ } else {
- var splitMode = 0;
+ var thisStart = parseInt(word.getAttribute("data-m"))/1000;
+ var thisDuration = parseInt(word.getAttribute("data-d"))/1000;
- var wordLengthSplit = $('#word-length').prop('checked');
+ if (isNaN(thisStart)) {
+ thisStart = 0;
+ }
+
+ if (isNaN(thisDuration)) {
+ thisDuration = 0;
+ }
- // enhancements to take account of word length
+ var thisText = word.innerText;
- var swords = sub.text.split(' ');
- var sduration = sub.end - sub.start;
- var stimeStep = sduration / swords.length;
+ thisWordMeta = new wordMeta(thisStart, thisDuration, thisText);
+
+ if (thisSegmentMeta.start === null) {
+ thisSegmentMeta.start = thisStart;
+ thisSegmentMeta.duration = 0;
+ thisSegmentMeta.chars = 0;
+ }
- // determine length of words
+ thisSegmentMeta.duration += thisDuration;
+ thisSegmentMeta.chars += thisText.length;
- var swordLengths = [];
- var swordTimes = [];
+ thisSegmentMeta.words.push(thisWordMeta);
- var totalLetters = 0;
- for (var si = 0, sl = swords.length; si < sl; ++si) {
- totalLetters = totalLetters + swords[si].length;
- swordLengths[si] = swords[si].length;
+ // remove spaces first just in case
+ var lastChar = thisText.replace(/\s/g, '').slice(-1);
+ if (lastChar.match(endSentenceDelimiter)) {
+ data.segments.push(thisSegmentMeta);
+ thisSegmentMeta = null;
+ }
}
+ });
- var letterTime = sduration / totalLetters;
- var wordStart = 0;
+ //console.log(data.segments);
- for (var si = 0, sl = swords.length; si < sl; ++si) {
- var wordTime = swordLengths[si] * letterTime;
- var stime;
- if (wordLengthSplit) {
- stime = Math.round((sub.start + si * stimeStep) * 1000);
-
- document.dispatchEvent(event);
- } else {
- stime = Math.round((wordStart + sub.start) * 1000);
+ function captionMeta(start, stop, text) {
+ this.start = start;
+ this.stop = stop;
+ this.text = text;
+ }
- document.dispatchEvent(event);
- }
+ var captions = [];
+ var thisCaption = null;
- wordStart = wordStart + wordTime;
- var stext = swords[si];
+ data.segments.map(function(segment) {
- if (stime - ltime > paraSplitTime * 1000 && paraSplitTime > 0) {
+ // If the entire segment fits on a line, add it to the captions.
+ if (segment.chars < maxLineLength) {
- var punctPresent =
- ltext && (ltext.indexOf('.') > 0 || ltext.indexOf('?') > 0 || ltext.indexOf('!') > 0);
- if (!paraPunct || (paraPunct && punctPresent)) {
- outputString += '
';
- }
- }
+ thisCaption = new captionMeta(formatSeconds(segment.start), formatSeconds(segment.start + segment.duration), "");
+
+ segment.words.forEach(function(wordMeta) {
+ thisCaption.text += wordMeta.text;
+ });
- outputString += '' + stext + ' ';
+ thisCaption.text += "\n";
+ //console.log("0. pushing because the whole segment fits on a line!");
+ //console.log(thisCaption);
+ captions.push(thisCaption);
+ thisCaption = null;
- ltime = stime;
- ltext = stext;
+ } else { // The number of chars in this segment is longer than our single line maximum
- if (lineBreaks) outputString = outputString + '\n';
- }
- }
- return outputString + '
';
- }
+ var charCount = 0;
+ var lineText = "";
+ var firstLine = true;
+ var lastOutTime;
+ var lastInTime = null;
+
+ segment.words.forEach(function(wordMeta, index) {
- $('#transform').click(function() {
- $('#transform-spinner').show();
- $('#htranscript').val("converting...");
- setTimeout(generateTranscript, 100);
- });
-
- function generateTranscript() {
-
- var input = $('#subtitles').val();
-
- var ht;
-
- var format = $('#format-select').val();
-
- switch (format) {
-
- case 'oe':
- var data = JSON.parse(input);
- var items = ['\n'];
- $.each(data.content.paragraphs, function(key, val) {
- var paraStart = Math.round(val.start*1000);
- items.push(
- '\n' +
- val.speaker +
- ' '
- );
-
- var lastStart = 0;
-
- $.each(val.words, function(k, v) {
- if (typeof v.start !== 'undefined') {
- items.push(
- '\n' +
- v.text +
- ' '
- );
- lastStart = v.start;
- } else {
-
- if (k === 0) {
- lastStart = paraStart;
- }
-
- items.push(
- '\n' +
- v.text +
- ' '
- );
- }
- });
- items.push('
');
- });
+ var lastChar = wordMeta.text.replace(/\s/g, '').slice(-1);
- items.push('
');
+ if (lastInTime === null) { // if it doesn't exist yet set the caption start time to the word's start time.
+ lastInTime = wordMeta.start;
+ }
- ht = items.join('');
+ // Are we over the minimum length of a line and hitting a good place to split mid-sentence?
+ if (charCount + wordMeta.text.length > minLineLength && lastChar.match(midSentenceDelimiter)) {
- // remove empty paras
+ if (firstLine === true) {
- ht = ht.split("").join("");
-
- break;
-
- case 'google':
- var data = JSON.parse(input);
-
- var items = [''];
-
- $.each(data.response.results, function(key, val) {
- $.each(val.alternatives, function(k, v) {
- for (var i = 0; i < v.words.length; i++) {
- items.push(
- '' +
- v.words[i].word +
- ' '
- );
-
-
- if (i > 0 && Math.round(parseFloat(v.words[i].startTime)) - Math.round(parseFloat(v.words[i-1].startTime)) > paraSplitTime && paraSplitTime > 0) {
- items.push('
');
+ thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(wordMeta.start + wordMeta.duration), "");
+ thisCaption.text += lineText + wordMeta.text + "\n";
+
+ //check for last word in segment, if it is we can push a one line caption, if not – move on to second line
+
+ if (index + 1 >= segment.words.length) {
+ //console.log("1. pushing because we're at a good place to split, we're on the first line but it's the last word of the segment.");
+ //console.log(thisCaption);
+ captions.push(thisCaption);
+ thisCaption = null;
+ } else {
+ firstLine = false;
}
- }
- });
- });
- items.push('
');
+ } else { // We're on the second line ... we're over the minimum chars and in a good place to split – let's push the caption
- ht = items.join('');
- break;
-
- case 'speechmatics':
- var data = JSON.parse(input);
- var items = [''];
- $.each(data, function(key, val) {
- if (key == 'words') {
- for (var i = 0; i < val.length; i++) {
- var punct = "";
- if ((i+1) < val.length && val[i+1].name === ".") {
- punct = ".";
- }
-
- if (val[i].name !== ".") {
- items.push(
- '' +
- val[i].name + punct +
- ' '
- );
- }
-
- if (i > 0 && Math.round(parseFloat(val[i].time)) - Math.round(parseFloat(val[i-1].time)) > paraSplitTime && paraSplitTime > 0) {
- if ((paraPunct && punct === ".") || (paraPunct === false)) {
- items.push('
');
- }
- }
+ thisCaption.stop = formatSeconds(wordMeta.start + wordMeta.duration);
+ thisCaption.text += lineText + wordMeta.text + "\n";
+ //console.log("2. pushing because we're on the second line and have a good place to split");
+ //console.log(thisCaption);
+ captions.push(thisCaption);
+ thisCaption = null;
+ firstLine = true;
}
- }
- });
- items.push('
');
+ // whether first line or not we should reset ready for a new caption
+ charCount = 0;
+ lineText = "";
+ lastInTime = null;
- ht = items.join('');
- break;
+ } else { // we're not over the minimum length with a suitable splitting point
- case 'gentle':
- var data = JSON.parse(input);
+ // If we add this word are we over the maximum?
+ if (charCount + wordMeta.text.length > maxLineLength) {
- wds = data['words'] || [];
- transcript = data['transcript'];
+ if (firstLine === true) {
+
+ if (lastOutTime === undefined) {
+ lastOutTime = wordMeta.start + wordMeta.duration;
+ }
- var trans = document.createElement('p');
+ thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(lastOutTime), "");
+ thisCaption.text += lineText + "\n";
- trans.innerHTML = '';
+ // It's just the first line so we should only push a new caption if it's the very last word!
- var currentOffset = 0;
- var wordCounter = 0;
- var lastOutTime = 0;
+ if (index >= segment.words.length) {
+ captions.push(thisCaption);
+ thisCaption = null;
+ } else {
+ firstLine = false;
+ }
- wds.forEach(function(wd) {
- // Add non-linked text
+ } else { // We're on the second line and since we're over the maximum with the next word we should push this caption!
- var newlineDetected = false;
+ thisCaption.stop = formatSeconds(lastOutTime);
+ thisCaption.text += lineText + "\n";
+
+ captions.push(thisCaption);
- if (wd.startOffset > currentOffset) {
- var txt = transcript.slice(currentOffset, wd.startOffset);
- newlineDetected = /\r|\n/.exec(txt);
+ thisCaption = null;
+ firstLine = true;
+ }
- if (trans.lastChild) {
- trans.lastChild.text += txt + " ";
- } else {
- // this happens only at the beginning when offset not zero
- var span = document.createElement('span');
- var initialWd = document.createTextNode(txt + " ");
- var initialDatam = document.createAttribute('data-m');
- var initialDatad = document.createAttribute('data-d');
+ // do the stuff we need to do to start a new line
+ charCount = wordMeta.text.length;
+ lineText = wordMeta.text;
+ lastInTime = wordMeta.start; // Why do we do this??????
- span.appendChild(initialWd);
- initialDatam.value = 0;
- initialDatad.value = 0;
- span.setAttributeNode(initialDatam);
- span.setAttributeNode(initialDatad);
- trans.appendChild(span);
- trans.appendChild(span);
- }
+ } else { // We're not over the maximum with this word, update the line length and add the word to the text
- if (newlineDetected) {
- var lineBreak = document.createElement('br');
- trans.appendChild(lineBreak);
- }
- currentOffset = wd.startOffset;
- }
+ charCount += wordMeta.text.length;
+ lineText += wordMeta.text;
- var datam = document.createAttribute('data-m');
- var datad = document.createAttribute('data-d');
-
- var word = document.createElement('span');
- var txt = transcript.slice(wd.startOffset, wd.endOffset+1);
-
- if (!txt.endsWith(" ")){
- txt = txt + " ";
- }
-
- var wordText = document.createTextNode(txt);
- word.appendChild(wordText);
-
- if (wd.start !== undefined) {
- datam.value = Math.floor(wd.start * 1000);
- datad.value = Math.floor((wd.end - wd.start) * 1000);
- } else {
- // look ahead to the next timed word
- for (var i = wordCounter; i < wds.length - 1; i++) {
- if (wds[i + 1].start !== undefined) {
- datam.value = Math.floor(wds[i + 1].start * 1000);
- break;
- }
}
- datad.value = '100'; // default duration when not known
- }
-
- if (datam.value < lastOutTime) {
- datam.value = lastOutTime + 1;
}
- word.setAttributeNode(datam);
- word.setAttributeNode(datad);
-
- lastOutTime = parseInt(datam.value) + parseInt(datad.value);
-
- trans.appendChild(word);
-
- currentOffset = wd.endOffset;
- wordCounter++;
+ // for every word update the lastOutTime
+ lastOutTime = wordMeta.start + wordMeta.duration;
});
-
- var txt = transcript.slice(currentOffset, transcript.length);
- var word = document.createTextNode(txt);
- trans.appendChild(word);
- currentOffset = transcript.length;
-
- article = document.createElement('article');
- section = document.createElement('section');
-
- section.appendChild(trans);
- article.appendChild(section);
-
- ht = article.outerHTML;
-
- //newlines can cause issues within HTML tags
- ht = ht.replace(/(?:\r\n|\r|\n)/g, '');
-
- ht = ht.replace(new RegExp('
', 'g'), '');
-
- // replace all unneeded empty paras
- ht = ht.replace(new RegExp('
', 'g'), '');
-
- break;
-
- case 'srt':
- ht = parseSRT(input);
- break;
-
- case 'other':
- var xmlString = input,
- parser = new DOMParser(),
- doc = parser.parseFromString(xmlString, 'text/xml');
-
- var transcript = doc.getElementsByTagName('section')[0];
-
- for (var i = 0; i < doc.getElementsByClassName('speaker').length; i++) {
- transcript.getElementsByClassName('speaker')[i].innerHTML =
- '[' +
- transcript.getElementsByClassName('speaker')[i].innerHTML.replace(': ', '') +
- '] ';
- var datam = document.createAttribute('data-m');
- var datad = document.createAttribute('data-d');
- datam.value = transcript
- .getElementsByClassName('speaker')
- [i].nextElementSibling.getAttribute('data-m');
- datad.value = '1';
- transcript.getElementsByClassName('speaker')[i].setAttributeNode(datam);
- transcript.getElementsByClassName('speaker')[i].setAttributeNode(datad);
+
+ // we're out of words for this segment - decision time!
+ if (thisCaption !== null) { // The caption had been started, time to add whatever text we have and add a stop point
+ thisCaption.stop = formatSeconds(lastOutTime);
+ thisCaption.text += lineText + "\n";
+ //console.log("3. pushing at end of segment when new caption HAS BEEN created");
+ //console.log(thisCaption);
+ captions.push(thisCaption);
+ thisCaption = null;
+
+ } else { // caption hadn't been started yet - create one!
+ if (lastInTime !== null) {
+ thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(lastOutTime), lineText);
+ //console.log("4. pushing at end of segment when new caption has yet to be created");
+ //console.log(thisCaption);
+ captions.push(thisCaption);
+ thisCaption = null;
+ }
}
+ }
+ });
- var transcriptText = transcript.outerHTML;
-
- ht = '' + transcriptText + '';
- }
+ captions.forEach(function(caption) {
+ captionsVtt += "\n" + caption.start + "-->" + caption.stop + "\n" + caption.text + "\n";
+ });
- $('#htranscript').val(ht);
- $('#rtranscript').html(ht);
+ document.getElementById(playerId+'-vtt').setAttribute("src", 'data:text/vtt,'+encodeURIComponent(captionsVtt));
+ console.log(captionsVtt);
- $('#transform-spinner').hide();
- return false;
}
+
+ return cap;
+
});
\ No newline at end of file
From cbc5816c7434560de53c3827a16b5192962a224c Mon Sep 17 00:00:00 2001
From: Mark Boas
Date: Fri, 11 Feb 2022 18:34:25 +0100
Subject: [PATCH 3/3] latest version of caption.js
---
wp-hyperaudio/js/caption.js | 159 +++++++++++++++++++-----------------
1 file changed, 86 insertions(+), 73 deletions(-)
diff --git a/wp-hyperaudio/js/caption.js b/wp-hyperaudio/js/caption.js
index ed580e1..a860f12 100644
--- a/wp-hyperaudio/js/caption.js
+++ b/wp-hyperaudio/js/caption.js
@@ -1,19 +1,24 @@
+/*! (C) The Hyperaudio Project. MIT @license: en.wikipedia.org/wiki/MIT_License. */
'use strict';
-var caption = (function () {
-
+var caption = function () {
var cap = {};
function formatSeconds(seconds) {
- if(typeof seconds == 'number'){
- return new Date(seconds.toFixed(3) * 1000).toISOString().substr(11, 12);
+ if (typeof seconds == 'number') {
+ return new Date(seconds.toFixed(3) * 1000).toISOString().substring(11,23);
} else {
- console.log("warning - attempting to format the non number: "+seconds);
+ console.log('warning - attempting to format the non number: ' + seconds);
return null;
}
}
- cap.init = function(transcriptId, playerId, maxLength, minLength) {
+ function convertTimecodeToSrt(timecode) {
+ //the same as VTT format but milliseconds separated by a comma
+ return timecode.substring(0,8) + "," + timecode.substring(9,12);
+ }
+
+ cap.init = function (transcriptId, playerId, maxLength, minLength) {
var transcript = document.getElementById(transcriptId);
var words = transcript.querySelectorAll('[data-m]');
var data = {};
@@ -41,49 +46,46 @@ var caption = (function () {
var maxLineLength = 37;
var minLineLength = 21;
- var captionsVtt = "WEBVTT\n"
+ var captionsVtt = 'WEBVTT\n';
+ var captionsSrt = '';
var endSentenceDelimiter = /[\.。?؟!]/g;
var midSentenceDelimiter = /[,、–,،و:,…‥]/g;
- if (!isNaN(maxLength)) {
+ if (!isNaN(maxLength) && maxLength != null) {
maxLineLength = maxLength;
}
- if (!isNaN(minLength)) {
+ if (!isNaN(minLength) && minLength != null) {
minLineLength = minLength;
}
- var lastSpeaker = "";
-
- words.forEach(function(word, i) {
+ var lastSpeaker = '';
+ words.forEach(function (word, i) {
if (thisSegmentMeta === null) {
// create segment meta object
- thisSegmentMeta = new segmentMeta("", null, 0, 0, 0);
+ thisSegmentMeta = new segmentMeta('', null, 0, 0, 0);
}
- if (word.classList.contains("speaker")) {
-
+ if (word.classList.contains('speaker')) {
// checking that this is not a new segment AND a new empty segment wasn't already created
- if (thisSegmentMeta !== null && thisSegmentMeta.start !== null) {
+ if (thisSegmentMeta !== null && thisSegmentMeta.start !== null) {
//console.log("pushing...");
//console.log(thisSegmentMeta);
data.segments.push(thisSegmentMeta); // push the previous segment because it's a new speaker
- thisSegmentMeta = new segmentMeta("", null, 0, 0, 0);
+ thisSegmentMeta = new segmentMeta('', null, 0, 0, 0);
}
thisSegmentMeta.speaker = word.innerText;
-
} else {
-
- var thisStart = parseInt(word.getAttribute("data-m"))/1000;
- var thisDuration = parseInt(word.getAttribute("data-d"))/1000;
+ var thisStart = parseInt(word.getAttribute('data-m')) / 1000;
+ var thisDuration = parseInt(word.getAttribute('data-d')) / 1000;
if (isNaN(thisStart)) {
thisStart = 0;
}
-
+
if (isNaN(thisDuration)) {
thisDuration = 0;
}
@@ -91,8 +93,8 @@ var caption = (function () {
var thisText = word.innerText;
thisWordMeta = new wordMeta(thisStart, thisDuration, thisText);
-
- if (thisSegmentMeta.start === null) {
+
+ if (thisSegmentMeta.start === null) {
thisSegmentMeta.start = thisStart;
thisSegmentMeta.duration = 0;
thisSegmentMeta.chars = 0;
@@ -123,47 +125,51 @@ var caption = (function () {
var captions = [];
var thisCaption = null;
- data.segments.map(function(segment) {
-
+ data.segments.map(function (segment) {
// If the entire segment fits on a line, add it to the captions.
if (segment.chars < maxLineLength) {
+ thisCaption = new captionMeta(
+ formatSeconds(segment.start),
+ formatSeconds(segment.start + segment.duration),
+ '',
+ );
- thisCaption = new captionMeta(formatSeconds(segment.start), formatSeconds(segment.start + segment.duration), "");
-
- segment.words.forEach(function(wordMeta) {
+ segment.words.forEach(function (wordMeta) {
thisCaption.text += wordMeta.text;
});
- thisCaption.text += "\n";
+ thisCaption.text += '\n';
//console.log("0. pushing because the whole segment fits on a line!");
//console.log(thisCaption);
captions.push(thisCaption);
thisCaption = null;
-
- } else { // The number of chars in this segment is longer than our single line maximum
+ } else {
+ // The number of chars in this segment is longer than our single line maximum
var charCount = 0;
- var lineText = "";
+ var lineText = '';
var firstLine = true;
var lastOutTime;
var lastInTime = null;
-
- segment.words.forEach(function(wordMeta, index) {
+ segment.words.forEach(function (wordMeta, index) {
var lastChar = wordMeta.text.replace(/\s/g, '').slice(-1);
- if (lastInTime === null) { // if it doesn't exist yet set the caption start time to the word's start time.
+ if (lastInTime === null) {
+ // if it doesn't exist yet set the caption start time to the word's start time.
lastInTime = wordMeta.start;
}
// Are we over the minimum length of a line and hitting a good place to split mid-sentence?
if (charCount + wordMeta.text.length > minLineLength && lastChar.match(midSentenceDelimiter)) {
-
if (firstLine === true) {
+ thisCaption = new captionMeta(
+ formatSeconds(lastInTime),
+ formatSeconds(wordMeta.start + wordMeta.duration),
+ '',
+ );
+ thisCaption.text += lineText + wordMeta.text + '\n';
- thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(wordMeta.start + wordMeta.duration), "");
- thisCaption.text += lineText + wordMeta.text + "\n";
-
//check for last word in segment, if it is we can push a one line caption, if not – move on to second line
if (index + 1 >= segment.words.length) {
@@ -174,11 +180,11 @@ var caption = (function () {
} else {
firstLine = false;
}
-
- } else { // We're on the second line ... we're over the minimum chars and in a good place to split – let's push the caption
+ } else {
+ // We're on the second line ... we're over the minimum chars and in a good place to split – let's push the caption
thisCaption.stop = formatSeconds(wordMeta.start + wordMeta.duration);
- thisCaption.text += lineText + wordMeta.text + "\n";
+ thisCaption.text += lineText + wordMeta.text;
//console.log("2. pushing because we're on the second line and have a good place to split");
//console.log(thisCaption);
captions.push(thisCaption);
@@ -188,22 +194,20 @@ var caption = (function () {
// whether first line or not we should reset ready for a new caption
charCount = 0;
- lineText = "";
- lastInTime = null;
-
- } else { // we're not over the minimum length with a suitable splitting point
+ lineText = '';
+ lastInTime = null;
+ } else {
+ // we're not over the minimum length with a suitable splitting point
// If we add this word are we over the maximum?
if (charCount + wordMeta.text.length > maxLineLength) {
-
if (firstLine === true) {
-
if (lastOutTime === undefined) {
lastOutTime = wordMeta.start + wordMeta.duration;
}
- thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(lastOutTime), "");
- thisCaption.text += lineText + "\n";
+ thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(lastOutTime), '');
+ thisCaption.text += lineText + '\n';
// It's just the first line so we should only push a new caption if it's the very last word!
@@ -213,12 +217,12 @@ var caption = (function () {
} else {
firstLine = false;
}
-
- } else { // We're on the second line and since we're over the maximum with the next word we should push this caption!
+ } else {
+ // We're on the second line and since we're over the maximum with the next word we should push this caption!
thisCaption.stop = formatSeconds(lastOutTime);
- thisCaption.text += lineText + "\n";
-
+ thisCaption.text += lineText;
+
captions.push(thisCaption);
thisCaption = null;
@@ -226,52 +230,61 @@ var caption = (function () {
}
// do the stuff we need to do to start a new line
- charCount = wordMeta.text.length;
+ charCount = wordMeta.text.length;
lineText = wordMeta.text;
lastInTime = wordMeta.start; // Why do we do this??????
-
- } else { // We're not over the maximum with this word, update the line length and add the word to the text
+ } else {
+ // We're not over the maximum with this word, update the line length and add the word to the text
charCount += wordMeta.text.length;
lineText += wordMeta.text;
-
}
}
// for every word update the lastOutTime
lastOutTime = wordMeta.start + wordMeta.duration;
});
-
+
// we're out of words for this segment - decision time!
- if (thisCaption !== null) { // The caption had been started, time to add whatever text we have and add a stop point
+ if (thisCaption !== null) {
+ // The caption had been started, time to add whatever text we have and add a stop point
thisCaption.stop = formatSeconds(lastOutTime);
- thisCaption.text += lineText + "\n";
+ thisCaption.text += lineText;
//console.log("3. pushing at end of segment when new caption HAS BEEN created");
//console.log(thisCaption);
captions.push(thisCaption);
thisCaption = null;
-
- } else { // caption hadn't been started yet - create one!
- if (lastInTime !== null) {
+ } else {
+ // caption hadn't been started yet - create one!
+ if (lastInTime !== null) {
thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(lastOutTime), lineText);
//console.log("4. pushing at end of segment when new caption has yet to be created");
//console.log(thisCaption);
captions.push(thisCaption);
- thisCaption = null;
+ thisCaption = null;
}
}
}
});
- captions.forEach(function(caption) {
- captionsVtt += "\n" + caption.start + "-->" + caption.stop + "\n" + caption.text + "\n";
+ captions.forEach(function (caption, i) {
+ captionsVtt += '\n' + caption.start + ' --> ' + caption.stop + '\n' + caption.text + '\n';
+ captionsSrt += '\n' + (i + 1) + '\n' + convertTimecodeToSrt(caption.start) + ' --> ' + convertTimecodeToSrt(caption.stop) + '\n' + caption.text + '\n';
});
- document.getElementById(playerId+'-vtt').setAttribute("src", 'data:text/vtt,'+encodeURIComponent(captionsVtt));
- console.log(captionsVtt);
+ var trackElement = document.getElementById(playerId+'-vtt');
- }
+ if (trackElement !== null) {
+ trackElement.setAttribute("src", 'data:text/vtt,'+encodeURIComponent(captionsVtt));
+ }
- return cap;
+ function captionsObj(vtt, srt) {
+ this.vtt = vtt;
+ this.srt = srt;
+ }
+
+ return new captionsObj(captionsVtt, captionsSrt);
+ };
-});
\ No newline at end of file
+ return cap;
+};