From 24254a7d6e79225126aa95ca7be29ebdc5910eda Mon Sep 17 00:00:00 2001 From: Mark Boas Date: Fri, 11 Feb 2022 18:10:39 +0100 Subject: [PATCH 1/3] 73 added ability to download captions --- wp-hyperaudio/hyperaudio-admin.php | 31 +- wp-hyperaudio/js/caption.js | 698 +++++++++++++++++++---------- wp-hyperaudio/js/converter.js | 21 + 3 files changed, 517 insertions(+), 233 deletions(-) diff --git a/wp-hyperaudio/hyperaudio-admin.php b/wp-hyperaudio/hyperaudio-admin.php index 99128ae..50c1a58 100644 --- a/wp-hyperaudio/hyperaudio-admin.php +++ b/wp-hyperaudio/hyperaudio-admin.php @@ -15,6 +15,7 @@ function hyperaudio_load_admin_script($hook) return; } + wp_enqueue_script('caption', plugins_url('/js/caption.js', __FILE__), false, '1.0.0', false); wp_enqueue_script( 'converter', plugin_dir_url( __FILE__ ) . '/js/converter.js', array( 'jquery' ), '1.0.0', true ); } @@ -143,6 +144,21 @@ function hyperaudio_options_page() padding: 4px; } + .sub-holder { + font-size: 80%; + padding-top: 16px; + display: none; + } + + .sub-btn { + margin-right: 16px; + } + + .sub-download { + padding-right: 16px; + display: none; + } + @@ -266,13 +282,18 @@ function hyperaudio_options_page()
-
- +
+ + Download WebVTT ⬇ + Download SRT ⬇ +
+ + + + - - - + \ No newline at end of file diff --git a/wp-hyperaudio/js/caption.js b/wp-hyperaudio/js/caption.js index a860f12..f7aabed 100644 --- a/wp-hyperaudio/js/caption.js +++ b/wp-hyperaudio/js/caption.js @@ -1,290 +1,532 @@ -/*! (C) The Hyperaudio Project. MIT @license: en.wikipedia.org/wiki/MIT_License. */ -'use strict'; - -var caption = function () { - var cap = {}; - - function formatSeconds(seconds) { - if (typeof seconds == 'number') { - return new Date(seconds.toFixed(3) * 1000).toISOString().substring(11,23); - } else { - console.log('warning - attempting to format the non number: ' + seconds); - return null; +var $ = jQuery; // needed for wordpress + +$(document).ready(function() { + var p = document.getElementById('para-split'); + var cp = document.getElementById('current-para-split'); + var paraSplitTime = p.value; + var paraPunct = $('#para-punctuation').prop('checked'); + + p.addEventListener( + 'input', + function() { + cp.innerHTML = p.value; + paraSplitTime = p.value; + }, + false + ); + + $('#para-punctuation').change(function() { + if (this.checked) { + paraPunct = $('#para-punctuation').prop('checked'); } - } + }); - function convertTimecodeToSrt(timecode) { - //the same as VTT format but milliseconds separated by a comma - return timecode.substring(0,8) + "," + timecode.substring(9,12); - } + $('#markup-view').click(function() { + $('#rendered-view').addClass('inactive'); + $(this).removeClass('inactive'); + $('#rtranscript').hide(); - cap.init = function (transcriptId, playerId, maxLength, minLength) { - var transcript = document.getElementById(transcriptId); - var words = transcript.querySelectorAll('[data-m]'); - var data = {}; - data.segments = []; - var segmentIndex = 0; - - function segmentMeta(speaker, start, duration, chars) { - this.speaker = speaker; - this.start = start; - this.duration = duration; - this.chars = chars; - this.words = []; - } + var regex = /\span>(.*?)\ 0) { + strToMatch = strToMatch.replace(""+matches[1], matches[1]+""); + } } - var thisWordMeta; - var thisSegmentMeta = null; + $('#htranscript').val(strToMatch); + $('#htranscript').show(); + return false; + }); + + $('#rendered-view').click(function() { + $('#markup-view').addClass('inactive'); + $(this).removeClass('inactive'); + $('#htranscript').hide(); + $('#rtranscript').html("rendering..."); + $('#rtranscript').show(); + + setTimeout(renderTranscript, 100); + + return false; + }); + + function renderTranscript() { + $('#rtranscript').html($('#htranscript').val()); + + //document.getElementById("gen-subs").addEventListener("click", genSubs); + $('#gen-subs').click(genSubs); + //document.getElementById("generate-captions").style.display = 'inline'; + $('#generate-captions').show(); + + function genSubs(){ + var cap1 = caption(); + var subs = cap1.init("rtranscript", null, null, null); + //console.log(subs.vtt); + //console.log(subs.srt); + //var downloadLinkVtt = document.getElementById("download-vtt"); + //downloadLinkVtt.setAttribute("href", 'data:text/vtt,'+encodeURIComponent(subs.vtt)); + //downloadLinkVtt.style.display = 'inline'; + $('#download-vtt').attr("href", 'data:text/vtt,'+encodeURIComponent(subs.vtt)).show(); + + //var downloadLinkSrt = document.getElementById("download-srt"); + //downloadLinkSrt.setAttribute("href", 'data:text/vtt,'+encodeURIComponent(subs.srt)); + //downloadLinkSrt.style.display = 'inline'; + $('#download-srt').attr("href", 'data:text/vtt,'+encodeURIComponent(subs.srt)).show(); + }; + } + + String.prototype.replaceAll = function(search, replacement) { + var target = this; + return target.replace(new RegExp(search, 'g'), replacement); + }; - // defaults - var maxLineLength = 37; - var minLineLength = 21; + // From popcorn.parserSRT.js - var captionsVtt = 'WEBVTT\n'; - var captionsSrt = ''; + function parseSRT(data) { - var endSentenceDelimiter = /[\.。?؟!]/g; - var midSentenceDelimiter = /[,、–,،و:,…‥]/g; + document.dispatchEvent(event); - if (!isNaN(maxLength) && maxLength != null) { - maxLineLength = maxLength; - } + var i = 0, + len = 0, + idx = 0, + lines, + time, + text, + sub; - if (!isNaN(minLength) && minLength != null) { - minLineLength = minLength; - } + // Simple function to convert HH:MM:SS,MMM or HH:MM:SS.MMM to SS.MMM + // Assume valid, returns 0 on error - var lastSpeaker = ''; + var toSeconds = function(t_in) { + var t = t_in.split(':'); - words.forEach(function (word, i) { - if (thisSegmentMeta === null) { - // create segment meta object - thisSegmentMeta = new segmentMeta('', null, 0, 0, 0); - } + try { + var s = t[2].split(','); - if (word.classList.contains('speaker')) { - // checking that this is not a new segment AND a new empty segment wasn't already created - if (thisSegmentMeta !== null && thisSegmentMeta.start !== null) { - //console.log("pushing..."); - //console.log(thisSegmentMeta); - data.segments.push(thisSegmentMeta); // push the previous segment because it's a new speaker - thisSegmentMeta = new segmentMeta('', null, 0, 0, 0); + // Just in case a . is decimal seperator + if (s.length === 1) { + s = t[2].split('.'); } - thisSegmentMeta.speaker = word.innerText; - } else { - var thisStart = parseInt(word.getAttribute('data-m')) / 1000; - var thisDuration = parseInt(word.getAttribute('data-d')) / 1000; + return ( + parseFloat(t[0], 10) * 3600 + + parseFloat(t[1], 10) * 60 + + parseFloat(s[0], 10) + + parseFloat(s[1], 10) / 1000 + ); + } catch (e) { + return 0; + } + }; - if (isNaN(thisStart)) { - thisStart = 0; - } + var outputString = '

'; + var lineBreaks = $('#line-breaks').prop('checked'); + var ltime = 0; + var ltext; - if (isNaN(thisDuration)) { - thisDuration = 0; - } + // Here is where the magic happens + // Split on line breaks + lines = data.split(/(?:\r\n|\r|\n)/gm); + len = lines.length; + + for (i = 0; i < len; i++) { + sub = {}; + text = []; - var thisText = word.innerText; + sub.id = parseInt(lines[i++], 10); + + // Split on '-->' delimiter, trimming spaces as well + + try { + time = lines[i++].split(/[\t ]*-->[\t ]*/); + } catch (e) { + alert('Warning. Possible issue on line ' + i + ": '" + lines[i] + "'."); + break; + } - thisWordMeta = new wordMeta(thisStart, thisDuration, thisText); + sub.start = toSeconds(time[0]); - if (thisSegmentMeta.start === null) { - thisSegmentMeta.start = thisStart; - thisSegmentMeta.duration = 0; - thisSegmentMeta.chars = 0; + // So as to trim positioning information from end + if (!time[1]) { + alert('Warning. Issue on line ' + i + ": '" + lines[i] + "'."); + return; + } + + idx = time[1].indexOf(' '); + if (idx !== -1) { + time[1] = time[1].substr(0, idx); + } + sub.end = toSeconds(time[1]); + + // Build single line of text from multi-line subtitle in file + while (i < len && lines[i]) { + text.push(lines[i++]); + } + + // Join into 1 line, SSA-style linebreaks + // Strip out other SSA-style tags + sub.text = text.join('\\N').replace(/\{(\\[\w]+\(?([\w\d]+,?)+\)?)+\}/gi, ''); + + // Escape HTML entities + sub.text = sub.text.replace(//g, '>'); + + // Unescape great than and less than when it makes a valid html tag of a supported style (font, b, u, s, i) + // Modified version of regex from Phil Haack's blog: http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx + // Later modified by kev: http://kevin.deldycke.com/2007/03/ultimate-regular-expression-for-html-tag-parsing-with-php/ + sub.text = sub.text.replace( + /<(\/?(font|b|u|i|s))((\s+(\w|\w[\w\-]*\w)(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)(\/?)>/gi, + '<$1$3$7>' + ); + //sub.text = sub.text.replace( /\\N/gi, "
" ); + sub.text = sub.text.replace(/\\N/gi, ' '); + + var splitMode = 0; + + var wordLengthSplit = $('#word-length').prop('checked'); + + // enhancements to take account of word length + + var swords = sub.text.split(' '); + var sduration = sub.end - sub.start; + var stimeStep = sduration / swords.length; + + // determine length of words + + var swordLengths = []; + var swordTimes = []; + + var totalLetters = 0; + for (var si = 0, sl = swords.length; si < sl; ++si) { + totalLetters = totalLetters + swords[si].length; + swordLengths[si] = swords[si].length; + } + + var letterTime = sduration / totalLetters; + var wordStart = 0; + + for (var si = 0, sl = swords.length; si < sl; ++si) { + var wordTime = swordLengths[si] * letterTime; + var stime; + if (wordLengthSplit) { + stime = Math.round((sub.start + si * stimeStep) * 1000); + + document.dispatchEvent(event); + } else { + stime = Math.round((wordStart + sub.start) * 1000); + + document.dispatchEvent(event); } - thisSegmentMeta.duration += thisDuration; - thisSegmentMeta.chars += thisText.length; + wordStart = wordStart + wordTime; + var stext = swords[si]; - thisSegmentMeta.words.push(thisWordMeta); + if (stime - ltime > paraSplitTime * 1000 && paraSplitTime > 0) { - // remove spaces first just in case - var lastChar = thisText.replace(/\s/g, '').slice(-1); - if (lastChar.match(endSentenceDelimiter)) { - data.segments.push(thisSegmentMeta); - thisSegmentMeta = null; + var punctPresent = + ltext && (ltext.indexOf('.') > 0 || ltext.indexOf('?') > 0 || ltext.indexOf('!') > 0); + if (!paraPunct || (paraPunct && punctPresent)) { + outputString += '

'; + } } - } - }); - //console.log(data.segments); + outputString += '' + stext + ' '; + + ltime = stime; + ltext = stext; - function captionMeta(start, stop, text) { - this.start = start; - this.stop = stop; - this.text = text; + if (lineBreaks) outputString = outputString + '\n'; + } } + return outputString + '

'; + } - var captions = []; - var thisCaption = null; + $('#transform').click(function() { + $('#transform-spinner').show(); + $('#htranscript').val("converting..."); + setTimeout(generateTranscript, 100); + }); - data.segments.map(function (segment) { - // If the entire segment fits on a line, add it to the captions. - if (segment.chars < maxLineLength) { - thisCaption = new captionMeta( - formatSeconds(segment.start), - formatSeconds(segment.start + segment.duration), - '', - ); + function generateTranscript() { - segment.words.forEach(function (wordMeta) { - thisCaption.text += wordMeta.text; - }); + var input = $('#subtitles').val(); - thisCaption.text += '\n'; - //console.log("0. pushing because the whole segment fits on a line!"); - //console.log(thisCaption); - captions.push(thisCaption); - thisCaption = null; - } else { - // The number of chars in this segment is longer than our single line maximum - - var charCount = 0; - var lineText = ''; - var firstLine = true; - var lastOutTime; - var lastInTime = null; - - segment.words.forEach(function (wordMeta, index) { - var lastChar = wordMeta.text.replace(/\s/g, '').slice(-1); - - if (lastInTime === null) { - // if it doesn't exist yet set the caption start time to the word's start time. - lastInTime = wordMeta.start; - } + var ht; - // Are we over the minimum length of a line and hitting a good place to split mid-sentence? - if (charCount + wordMeta.text.length > minLineLength && lastChar.match(midSentenceDelimiter)) { - if (firstLine === true) { - thisCaption = new captionMeta( - formatSeconds(lastInTime), - formatSeconds(wordMeta.start + wordMeta.duration), - '', - ); - thisCaption.text += lineText + wordMeta.text + '\n'; + var format = $('#format-select').val(); - //check for last word in segment, if it is we can push a one line caption, if not – move on to second line + switch (format) { - if (index + 1 >= segment.words.length) { - //console.log("1. pushing because we're at a good place to split, we're on the first line but it's the last word of the segment."); - //console.log(thisCaption); - captions.push(thisCaption); - thisCaption = null; - } else { - firstLine = false; - } + case 'oe': + var data = JSON.parse(input); + var items = ['
\n

']; + $.each(data.content.paragraphs, function(key, val) { + var paraStart = Math.round(val.start*1000); + items.push( + '\n' + + val.speaker + + ' ' + ); + + var lastStart = 0; + + $.each(val.words, function(k, v) { + if (typeof v.start !== 'undefined') { + items.push( + '\n' + + v.text + + ' ' + ); + lastStart = v.start; } else { - // We're on the second line ... we're over the minimum chars and in a good place to split – let's push the caption - - thisCaption.stop = formatSeconds(wordMeta.start + wordMeta.duration); - thisCaption.text += lineText + wordMeta.text; - //console.log("2. pushing because we're on the second line and have a good place to split"); - //console.log(thisCaption); - captions.push(thisCaption); - thisCaption = null; - firstLine = true; + + if (k === 0) { + lastStart = paraStart; + } + + items.push( + '\n' + + v.text + + ' ' + ); } + }); + items.push('

'); + }); - // whether first line or not we should reset ready for a new caption - charCount = 0; - lineText = ''; - lastInTime = null; - } else { - // we're not over the minimum length with a suitable splitting point + items.push('

'); + + ht = items.join(''); + + // remove empty paras + + ht = ht.split("

").join(""); + + break; + + case 'google': + var data = JSON.parse(input); + + var items = ['

']; + + $.each(data.response.results, function(key, val) { + $.each(val.alternatives, function(k, v) { + for (var i = 0; i < v.words.length; i++) { + items.push( + '' + + v.words[i].word + + ' ' + ); + + + if (i > 0 && Math.round(parseFloat(v.words[i].startTime)) - Math.round(parseFloat(v.words[i-1].startTime)) > paraSplitTime && paraSplitTime > 0) { + items.push('

'); + } + } + }); + }); - // If we add this word are we over the maximum? - if (charCount + wordMeta.text.length > maxLineLength) { - if (firstLine === true) { - if (lastOutTime === undefined) { - lastOutTime = wordMeta.start + wordMeta.duration; + items.push('

'); + + ht = items.join(''); + break; + + case 'speechmatics': + var data = JSON.parse(input); + var items = ['

']; + $.each(data, function(key, val) { + if (key == 'words') { + for (var i = 0; i < val.length; i++) { + var punct = ""; + if ((i+1) < val.length && val[i+1].name === ".") { + punct = "."; + } + + if (val[i].name !== ".") { + items.push( + '' + + val[i].name + punct + + ' ' + ); + } + + if (i > 0 && Math.round(parseFloat(val[i].time)) - Math.round(parseFloat(val[i-1].time)) > paraSplitTime && paraSplitTime > 0) { + if ((paraPunct && punct === ".") || (paraPunct === false)) { + items.push('

'); } + } + } + } + }); - thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(lastOutTime), ''); - thisCaption.text += lineText + '\n'; + items.push('

'); - // It's just the first line so we should only push a new caption if it's the very last word! + ht = items.join(''); + break; - if (index >= segment.words.length) { - captions.push(thisCaption); - thisCaption = null; - } else { - firstLine = false; - } - } else { - // We're on the second line and since we're over the maximum with the next word we should push this caption! + case 'gentle': + var data = JSON.parse(input); - thisCaption.stop = formatSeconds(lastOutTime); - thisCaption.text += lineText; + wds = data['words'] || []; + transcript = data['transcript']; - captions.push(thisCaption); + var trans = document.createElement('p'); - thisCaption = null; - firstLine = true; - } + trans.innerHTML = ''; + + var currentOffset = 0; + var wordCounter = 0; + var lastOutTime = 0; + + wds.forEach(function(wd) { + // Add non-linked text + + var newlineDetected = false; + + if (wd.startOffset > currentOffset) { + var txt = transcript.slice(currentOffset, wd.startOffset); + newlineDetected = /\r|\n/.exec(txt); - // do the stuff we need to do to start a new line - charCount = wordMeta.text.length; - lineText = wordMeta.text; - lastInTime = wordMeta.start; // Why do we do this?????? + if (trans.lastChild) { + trans.lastChild.text += txt + " "; } else { - // We're not over the maximum with this word, update the line length and add the word to the text + // this happens only at the beginning when offset not zero + var span = document.createElement('span'); + var initialWd = document.createTextNode(txt + " "); + var initialDatam = document.createAttribute('data-m'); + var initialDatad = document.createAttribute('data-d'); + + span.appendChild(initialWd); + initialDatam.value = 0; + initialDatad.value = 0; + span.setAttributeNode(initialDatam); + span.setAttributeNode(initialDatad); + trans.appendChild(span); + trans.appendChild(span); + } - charCount += wordMeta.text.length; - lineText += wordMeta.text; + if (newlineDetected) { + var lineBreak = document.createElement('br'); + trans.appendChild(lineBreak); } + currentOffset = wd.startOffset; } - // for every word update the lastOutTime - lastOutTime = wordMeta.start + wordMeta.duration; - }); + var datam = document.createAttribute('data-m'); + var datad = document.createAttribute('data-d'); + + var word = document.createElement('span'); + var txt = transcript.slice(wd.startOffset, wd.endOffset+1); + + if (!txt.endsWith(" ")){ + txt = txt + " "; + } + + var wordText = document.createTextNode(txt); + word.appendChild(wordText); + + if (wd.start !== undefined) { + datam.value = Math.floor(wd.start * 1000); + datad.value = Math.floor((wd.end - wd.start) * 1000); + } else { + // look ahead to the next timed word + for (var i = wordCounter; i < wds.length - 1; i++) { + if (wds[i + 1].start !== undefined) { + datam.value = Math.floor(wds[i + 1].start * 1000); + break; + } + } + datad.value = '100'; // default duration when not known + } - // we're out of words for this segment - decision time! - if (thisCaption !== null) { - // The caption had been started, time to add whatever text we have and add a stop point - thisCaption.stop = formatSeconds(lastOutTime); - thisCaption.text += lineText; - //console.log("3. pushing at end of segment when new caption HAS BEEN created"); - //console.log(thisCaption); - captions.push(thisCaption); - thisCaption = null; - } else { - // caption hadn't been started yet - create one! - if (lastInTime !== null) { - thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(lastOutTime), lineText); - //console.log("4. pushing at end of segment when new caption has yet to be created"); - //console.log(thisCaption); - captions.push(thisCaption); - thisCaption = null; + if (datam.value < lastOutTime) { + datam.value = lastOutTime + 1; } - } - } - }); - captions.forEach(function (caption, i) { - captionsVtt += '\n' + caption.start + ' --> ' + caption.stop + '\n' + caption.text + '\n'; - captionsSrt += '\n' + (i + 1) + '\n' + convertTimecodeToSrt(caption.start) + ' --> ' + convertTimecodeToSrt(caption.stop) + '\n' + caption.text + '\n'; - }); + word.setAttributeNode(datam); + word.setAttributeNode(datad); - var trackElement = document.getElementById(playerId+'-vtt'); + lastOutTime = parseInt(datam.value) + parseInt(datad.value); - if (trackElement !== null) { - trackElement.setAttribute("src", 'data:text/vtt,'+encodeURIComponent(captionsVtt)); - } + trans.appendChild(word); + + currentOffset = wd.endOffset; + wordCounter++; + }); - function captionsObj(vtt, srt) { - this.vtt = vtt; - this.srt = srt; + var txt = transcript.slice(currentOffset, transcript.length); + var word = document.createTextNode(txt); + trans.appendChild(word); + currentOffset = transcript.length; + + article = document.createElement('article'); + section = document.createElement('section'); + + section.appendChild(trans); + article.appendChild(section); + + ht = article.outerHTML; + + //newlines can cause issues within HTML tags + ht = ht.replace(/(?:\r\n|\r|\n)/g, ''); + + ht = ht.replace(new RegExp('
', 'g'), '

'); + + // replace all unneeded empty paras + ht = ht.replace(new RegExp('

', 'g'), ''); + + break; + + case 'srt': + ht = parseSRT(input); + break; + + case 'other': + var xmlString = input, + parser = new DOMParser(), + doc = parser.parseFromString(xmlString, 'text/xml'); + + var transcript = doc.getElementsByTagName('section')[0]; + + for (var i = 0; i < doc.getElementsByClassName('speaker').length; i++) { + transcript.getElementsByClassName('speaker')[i].innerHTML = + '[' + + transcript.getElementsByClassName('speaker')[i].innerHTML.replace(': ', '') + + '] '; + var datam = document.createAttribute('data-m'); + var datad = document.createAttribute('data-d'); + datam.value = transcript + .getElementsByClassName('speaker') + [i].nextElementSibling.getAttribute('data-m'); + datad.value = '1'; + transcript.getElementsByClassName('speaker')[i].setAttributeNode(datam); + transcript.getElementsByClassName('speaker')[i].setAttributeNode(datad); + } + + var transcriptText = transcript.outerHTML; + + ht = '
' + transcriptText + '
'; } - return new captionsObj(captionsVtt, captionsSrt); - }; + $('#htranscript').val(ht); + $('#rtranscript').html(ht); - return cap; -}; + $('#transform-spinner').hide(); + return false; + } +}); \ No newline at end of file diff --git a/wp-hyperaudio/js/converter.js b/wp-hyperaudio/js/converter.js index 4a8fd14..f7aabed 100644 --- a/wp-hyperaudio/js/converter.js +++ b/wp-hyperaudio/js/converter.js @@ -55,6 +55,27 @@ $(document).ready(function() { function renderTranscript() { $('#rtranscript').html($('#htranscript').val()); + + //document.getElementById("gen-subs").addEventListener("click", genSubs); + $('#gen-subs').click(genSubs); + //document.getElementById("generate-captions").style.display = 'inline'; + $('#generate-captions').show(); + + function genSubs(){ + var cap1 = caption(); + var subs = cap1.init("rtranscript", null, null, null); + //console.log(subs.vtt); + //console.log(subs.srt); + //var downloadLinkVtt = document.getElementById("download-vtt"); + //downloadLinkVtt.setAttribute("href", 'data:text/vtt,'+encodeURIComponent(subs.vtt)); + //downloadLinkVtt.style.display = 'inline'; + $('#download-vtt').attr("href", 'data:text/vtt,'+encodeURIComponent(subs.vtt)).show(); + + //var downloadLinkSrt = document.getElementById("download-srt"); + //downloadLinkSrt.setAttribute("href", 'data:text/vtt,'+encodeURIComponent(subs.srt)); + //downloadLinkSrt.style.display = 'inline'; + $('#download-srt').attr("href", 'data:text/vtt,'+encodeURIComponent(subs.srt)).show(); + }; } String.prototype.replaceAll = function(search, replacement) { From 4bc6cf505eedb84a794f4f82667a112c2cf0e3a2 Mon Sep 17 00:00:00 2001 From: Mark Boas Date: Fri, 11 Feb 2022 18:26:21 +0100 Subject: [PATCH 2/3] fix for caption.js overwrite with converter.js --- wp-hyperaudio/js/caption.js | 669 +++++++++++------------------------- 1 file changed, 207 insertions(+), 462 deletions(-) diff --git a/wp-hyperaudio/js/caption.js b/wp-hyperaudio/js/caption.js index f7aabed..ed580e1 100644 --- a/wp-hyperaudio/js/caption.js +++ b/wp-hyperaudio/js/caption.js @@ -1,532 +1,277 @@ -var $ = jQuery; // needed for wordpress - -$(document).ready(function() { - var p = document.getElementById('para-split'); - var cp = document.getElementById('current-para-split'); - var paraSplitTime = p.value; - var paraPunct = $('#para-punctuation').prop('checked'); - - p.addEventListener( - 'input', - function() { - cp.innerHTML = p.value; - paraSplitTime = p.value; - }, - false - ); - - $('#para-punctuation').change(function() { - if (this.checked) { - paraPunct = $('#para-punctuation').prop('checked'); - } - }); - - $('#markup-view').click(function() { - $('#rendered-view').addClass('inactive'); - $(this).removeClass('inactive'); - $('#rtranscript').hide(); +'use strict'; - var regex = /\span>(.*?)\ 0) { - strToMatch = strToMatch.replace(""+matches[1], matches[1]+""); - } + function formatSeconds(seconds) { + if(typeof seconds == 'number'){ + return new Date(seconds.toFixed(3) * 1000).toISOString().substr(11, 12); + } else { + console.log("warning - attempting to format the non number: "+seconds); + return null; } - - $('#htranscript').val(strToMatch); - $('#htranscript').show(); - return false; - }); - - $('#rendered-view').click(function() { - $('#markup-view').addClass('inactive'); - $(this).removeClass('inactive'); - $('#htranscript').hide(); - $('#rtranscript').html("rendering..."); - $('#rtranscript').show(); - - setTimeout(renderTranscript, 100); - - return false; - }); - - function renderTranscript() { - $('#rtranscript').html($('#htranscript').val()); - - //document.getElementById("gen-subs").addEventListener("click", genSubs); - $('#gen-subs').click(genSubs); - //document.getElementById("generate-captions").style.display = 'inline'; - $('#generate-captions').show(); - - function genSubs(){ - var cap1 = caption(); - var subs = cap1.init("rtranscript", null, null, null); - //console.log(subs.vtt); - //console.log(subs.srt); - //var downloadLinkVtt = document.getElementById("download-vtt"); - //downloadLinkVtt.setAttribute("href", 'data:text/vtt,'+encodeURIComponent(subs.vtt)); - //downloadLinkVtt.style.display = 'inline'; - $('#download-vtt').attr("href", 'data:text/vtt,'+encodeURIComponent(subs.vtt)).show(); - - //var downloadLinkSrt = document.getElementById("download-srt"); - //downloadLinkSrt.setAttribute("href", 'data:text/vtt,'+encodeURIComponent(subs.srt)); - //downloadLinkSrt.style.display = 'inline'; - $('#download-srt').attr("href", 'data:text/vtt,'+encodeURIComponent(subs.srt)).show(); - }; } - String.prototype.replaceAll = function(search, replacement) { - var target = this; - return target.replace(new RegExp(search, 'g'), replacement); - }; - - // From popcorn.parserSRT.js - - function parseSRT(data) { - - document.dispatchEvent(event); - - var i = 0, - len = 0, - idx = 0, - lines, - time, - text, - sub; - - // Simple function to convert HH:MM:SS,MMM or HH:MM:SS.MMM to SS.MMM - // Assume valid, returns 0 on error - - var toSeconds = function(t_in) { - var t = t_in.split(':'); - - try { - var s = t[2].split(','); - - // Just in case a . is decimal seperator - if (s.length === 1) { - s = t[2].split('.'); - } - - return ( - parseFloat(t[0], 10) * 3600 + - parseFloat(t[1], 10) * 60 + - parseFloat(s[0], 10) + - parseFloat(s[1], 10) / 1000 - ); - } catch (e) { - return 0; - } - }; + cap.init = function(transcriptId, playerId, maxLength, minLength) { + var transcript = document.getElementById(transcriptId); + var words = transcript.querySelectorAll('[data-m]'); + var data = {}; + data.segments = []; + var segmentIndex = 0; + + function segmentMeta(speaker, start, duration, chars) { + this.speaker = speaker; + this.start = start; + this.duration = duration; + this.chars = chars; + this.words = []; + } - var outputString = '

'; - var lineBreaks = $('#line-breaks').prop('checked'); - var ltime = 0; - var ltext; + function wordMeta(start, duration, text) { + this.start = start; + this.duration = duration; + this.text = text; + } - // Here is where the magic happens - // Split on line breaks - lines = data.split(/(?:\r\n|\r|\n)/gm); - len = lines.length; + var thisWordMeta; + var thisSegmentMeta = null; - for (i = 0; i < len; i++) { - sub = {}; - text = []; + // defaults + var maxLineLength = 37; + var minLineLength = 21; - sub.id = parseInt(lines[i++], 10); + var captionsVtt = "WEBVTT\n" - // Split on '-->' delimiter, trimming spaces as well + var endSentenceDelimiter = /[\.。?؟!]/g; + var midSentenceDelimiter = /[,、–,،و:,…‥]/g; - try { - time = lines[i++].split(/[\t ]*-->[\t ]*/); - } catch (e) { - alert('Warning. Possible issue on line ' + i + ": '" + lines[i] + "'."); - break; - } + if (!isNaN(maxLength)) { + maxLineLength = maxLength; + } - sub.start = toSeconds(time[0]); + if (!isNaN(minLength)) { + minLineLength = minLength; + } - // So as to trim positioning information from end - if (!time[1]) { - alert('Warning. Issue on line ' + i + ": '" + lines[i] + "'."); - return; - } + var lastSpeaker = ""; + + words.forEach(function(word, i) { - idx = time[1].indexOf(' '); - if (idx !== -1) { - time[1] = time[1].substr(0, idx); + if (thisSegmentMeta === null) { + // create segment meta object + thisSegmentMeta = new segmentMeta("", null, 0, 0, 0); } - sub.end = toSeconds(time[1]); - // Build single line of text from multi-line subtitle in file - while (i < len && lines[i]) { - text.push(lines[i++]); - } + if (word.classList.contains("speaker")) { - // Join into 1 line, SSA-style linebreaks - // Strip out other SSA-style tags - sub.text = text.join('\\N').replace(/\{(\\[\w]+\(?([\w\d]+,?)+\)?)+\}/gi, ''); + // checking that this is not a new segment AND a new empty segment wasn't already created + if (thisSegmentMeta !== null && thisSegmentMeta.start !== null) { + //console.log("pushing..."); + //console.log(thisSegmentMeta); + data.segments.push(thisSegmentMeta); // push the previous segment because it's a new speaker + thisSegmentMeta = new segmentMeta("", null, 0, 0, 0); + } - // Escape HTML entities - sub.text = sub.text.replace(//g, '>'); + thisSegmentMeta.speaker = word.innerText; - // Unescape great than and less than when it makes a valid html tag of a supported style (font, b, u, s, i) - // Modified version of regex from Phil Haack's blog: http://haacked.com/archive/2004/10/25/usingregularexpressionstomatchhtml.aspx - // Later modified by kev: http://kevin.deldycke.com/2007/03/ultimate-regular-expression-for-html-tag-parsing-with-php/ - sub.text = sub.text.replace( - /<(\/?(font|b|u|i|s))((\s+(\w|\w[\w\-]*\w)(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)(\/?)>/gi, - '<$1$3$7>' - ); - //sub.text = sub.text.replace( /\\N/gi, "
" ); - sub.text = sub.text.replace(/\\N/gi, ' '); + } else { - var splitMode = 0; + var thisStart = parseInt(word.getAttribute("data-m"))/1000; + var thisDuration = parseInt(word.getAttribute("data-d"))/1000; - var wordLengthSplit = $('#word-length').prop('checked'); + if (isNaN(thisStart)) { + thisStart = 0; + } + + if (isNaN(thisDuration)) { + thisDuration = 0; + } - // enhancements to take account of word length + var thisText = word.innerText; - var swords = sub.text.split(' '); - var sduration = sub.end - sub.start; - var stimeStep = sduration / swords.length; + thisWordMeta = new wordMeta(thisStart, thisDuration, thisText); + + if (thisSegmentMeta.start === null) { + thisSegmentMeta.start = thisStart; + thisSegmentMeta.duration = 0; + thisSegmentMeta.chars = 0; + } - // determine length of words + thisSegmentMeta.duration += thisDuration; + thisSegmentMeta.chars += thisText.length; - var swordLengths = []; - var swordTimes = []; + thisSegmentMeta.words.push(thisWordMeta); - var totalLetters = 0; - for (var si = 0, sl = swords.length; si < sl; ++si) { - totalLetters = totalLetters + swords[si].length; - swordLengths[si] = swords[si].length; + // remove spaces first just in case + var lastChar = thisText.replace(/\s/g, '').slice(-1); + if (lastChar.match(endSentenceDelimiter)) { + data.segments.push(thisSegmentMeta); + thisSegmentMeta = null; + } } + }); - var letterTime = sduration / totalLetters; - var wordStart = 0; + //console.log(data.segments); - for (var si = 0, sl = swords.length; si < sl; ++si) { - var wordTime = swordLengths[si] * letterTime; - var stime; - if (wordLengthSplit) { - stime = Math.round((sub.start + si * stimeStep) * 1000); - - document.dispatchEvent(event); - } else { - stime = Math.round((wordStart + sub.start) * 1000); + function captionMeta(start, stop, text) { + this.start = start; + this.stop = stop; + this.text = text; + } - document.dispatchEvent(event); - } + var captions = []; + var thisCaption = null; - wordStart = wordStart + wordTime; - var stext = swords[si]; + data.segments.map(function(segment) { - if (stime - ltime > paraSplitTime * 1000 && paraSplitTime > 0) { + // If the entire segment fits on a line, add it to the captions. + if (segment.chars < maxLineLength) { - var punctPresent = - ltext && (ltext.indexOf('.') > 0 || ltext.indexOf('?') > 0 || ltext.indexOf('!') > 0); - if (!paraPunct || (paraPunct && punctPresent)) { - outputString += '

'; - } - } + thisCaption = new captionMeta(formatSeconds(segment.start), formatSeconds(segment.start + segment.duration), ""); + + segment.words.forEach(function(wordMeta) { + thisCaption.text += wordMeta.text; + }); - outputString += '' + stext + ' '; + thisCaption.text += "\n"; + //console.log("0. pushing because the whole segment fits on a line!"); + //console.log(thisCaption); + captions.push(thisCaption); + thisCaption = null; - ltime = stime; - ltext = stext; + } else { // The number of chars in this segment is longer than our single line maximum - if (lineBreaks) outputString = outputString + '\n'; - } - } - return outputString + '

'; - } + var charCount = 0; + var lineText = ""; + var firstLine = true; + var lastOutTime; + var lastInTime = null; + + segment.words.forEach(function(wordMeta, index) { - $('#transform').click(function() { - $('#transform-spinner').show(); - $('#htranscript').val("converting..."); - setTimeout(generateTranscript, 100); - }); - - function generateTranscript() { - - var input = $('#subtitles').val(); - - var ht; - - var format = $('#format-select').val(); - - switch (format) { - - case 'oe': - var data = JSON.parse(input); - var items = ['
\n

']; - $.each(data.content.paragraphs, function(key, val) { - var paraStart = Math.round(val.start*1000); - items.push( - '\n' + - val.speaker + - ' ' - ); - - var lastStart = 0; - - $.each(val.words, function(k, v) { - if (typeof v.start !== 'undefined') { - items.push( - '\n' + - v.text + - ' ' - ); - lastStart = v.start; - } else { - - if (k === 0) { - lastStart = paraStart; - } - - items.push( - '\n' + - v.text + - ' ' - ); - } - }); - items.push('

'); - }); + var lastChar = wordMeta.text.replace(/\s/g, '').slice(-1); - items.push('

'); + if (lastInTime === null) { // if it doesn't exist yet set the caption start time to the word's start time. + lastInTime = wordMeta.start; + } - ht = items.join(''); + // Are we over the minimum length of a line and hitting a good place to split mid-sentence? + if (charCount + wordMeta.text.length > minLineLength && lastChar.match(midSentenceDelimiter)) { - // remove empty paras + if (firstLine === true) { - ht = ht.split("

").join(""); - - break; - - case 'google': - var data = JSON.parse(input); - - var items = ['

']; - - $.each(data.response.results, function(key, val) { - $.each(val.alternatives, function(k, v) { - for (var i = 0; i < v.words.length; i++) { - items.push( - '' + - v.words[i].word + - ' ' - ); - - - if (i > 0 && Math.round(parseFloat(v.words[i].startTime)) - Math.round(parseFloat(v.words[i-1].startTime)) > paraSplitTime && paraSplitTime > 0) { - items.push('

'); + thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(wordMeta.start + wordMeta.duration), ""); + thisCaption.text += lineText + wordMeta.text + "\n"; + + //check for last word in segment, if it is we can push a one line caption, if not – move on to second line + + if (index + 1 >= segment.words.length) { + //console.log("1. pushing because we're at a good place to split, we're on the first line but it's the last word of the segment."); + //console.log(thisCaption); + captions.push(thisCaption); + thisCaption = null; + } else { + firstLine = false; } - } - }); - }); - items.push('

'); + } else { // We're on the second line ... we're over the minimum chars and in a good place to split – let's push the caption - ht = items.join(''); - break; - - case 'speechmatics': - var data = JSON.parse(input); - var items = ['

']; - $.each(data, function(key, val) { - if (key == 'words') { - for (var i = 0; i < val.length; i++) { - var punct = ""; - if ((i+1) < val.length && val[i+1].name === ".") { - punct = "."; - } - - if (val[i].name !== ".") { - items.push( - '' + - val[i].name + punct + - ' ' - ); - } - - if (i > 0 && Math.round(parseFloat(val[i].time)) - Math.round(parseFloat(val[i-1].time)) > paraSplitTime && paraSplitTime > 0) { - if ((paraPunct && punct === ".") || (paraPunct === false)) { - items.push('

'); - } - } + thisCaption.stop = formatSeconds(wordMeta.start + wordMeta.duration); + thisCaption.text += lineText + wordMeta.text + "\n"; + //console.log("2. pushing because we're on the second line and have a good place to split"); + //console.log(thisCaption); + captions.push(thisCaption); + thisCaption = null; + firstLine = true; } - } - }); - items.push('

'); + // whether first line or not we should reset ready for a new caption + charCount = 0; + lineText = ""; + lastInTime = null; - ht = items.join(''); - break; + } else { // we're not over the minimum length with a suitable splitting point - case 'gentle': - var data = JSON.parse(input); + // If we add this word are we over the maximum? + if (charCount + wordMeta.text.length > maxLineLength) { - wds = data['words'] || []; - transcript = data['transcript']; + if (firstLine === true) { + + if (lastOutTime === undefined) { + lastOutTime = wordMeta.start + wordMeta.duration; + } - var trans = document.createElement('p'); + thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(lastOutTime), ""); + thisCaption.text += lineText + "\n"; - trans.innerHTML = ''; + // It's just the first line so we should only push a new caption if it's the very last word! - var currentOffset = 0; - var wordCounter = 0; - var lastOutTime = 0; + if (index >= segment.words.length) { + captions.push(thisCaption); + thisCaption = null; + } else { + firstLine = false; + } - wds.forEach(function(wd) { - // Add non-linked text + } else { // We're on the second line and since we're over the maximum with the next word we should push this caption! - var newlineDetected = false; + thisCaption.stop = formatSeconds(lastOutTime); + thisCaption.text += lineText + "\n"; + + captions.push(thisCaption); - if (wd.startOffset > currentOffset) { - var txt = transcript.slice(currentOffset, wd.startOffset); - newlineDetected = /\r|\n/.exec(txt); + thisCaption = null; + firstLine = true; + } - if (trans.lastChild) { - trans.lastChild.text += txt + " "; - } else { - // this happens only at the beginning when offset not zero - var span = document.createElement('span'); - var initialWd = document.createTextNode(txt + " "); - var initialDatam = document.createAttribute('data-m'); - var initialDatad = document.createAttribute('data-d'); + // do the stuff we need to do to start a new line + charCount = wordMeta.text.length; + lineText = wordMeta.text; + lastInTime = wordMeta.start; // Why do we do this?????? - span.appendChild(initialWd); - initialDatam.value = 0; - initialDatad.value = 0; - span.setAttributeNode(initialDatam); - span.setAttributeNode(initialDatad); - trans.appendChild(span); - trans.appendChild(span); - } + } else { // We're not over the maximum with this word, update the line length and add the word to the text - if (newlineDetected) { - var lineBreak = document.createElement('br'); - trans.appendChild(lineBreak); - } - currentOffset = wd.startOffset; - } + charCount += wordMeta.text.length; + lineText += wordMeta.text; - var datam = document.createAttribute('data-m'); - var datad = document.createAttribute('data-d'); - - var word = document.createElement('span'); - var txt = transcript.slice(wd.startOffset, wd.endOffset+1); - - if (!txt.endsWith(" ")){ - txt = txt + " "; - } - - var wordText = document.createTextNode(txt); - word.appendChild(wordText); - - if (wd.start !== undefined) { - datam.value = Math.floor(wd.start * 1000); - datad.value = Math.floor((wd.end - wd.start) * 1000); - } else { - // look ahead to the next timed word - for (var i = wordCounter; i < wds.length - 1; i++) { - if (wds[i + 1].start !== undefined) { - datam.value = Math.floor(wds[i + 1].start * 1000); - break; - } } - datad.value = '100'; // default duration when not known - } - - if (datam.value < lastOutTime) { - datam.value = lastOutTime + 1; } - word.setAttributeNode(datam); - word.setAttributeNode(datad); - - lastOutTime = parseInt(datam.value) + parseInt(datad.value); - - trans.appendChild(word); - - currentOffset = wd.endOffset; - wordCounter++; + // for every word update the lastOutTime + lastOutTime = wordMeta.start + wordMeta.duration; }); - - var txt = transcript.slice(currentOffset, transcript.length); - var word = document.createTextNode(txt); - trans.appendChild(word); - currentOffset = transcript.length; - - article = document.createElement('article'); - section = document.createElement('section'); - - section.appendChild(trans); - article.appendChild(section); - - ht = article.outerHTML; - - //newlines can cause issues within HTML tags - ht = ht.replace(/(?:\r\n|\r|\n)/g, ''); - - ht = ht.replace(new RegExp('
', 'g'), '

'); - - // replace all unneeded empty paras - ht = ht.replace(new RegExp('

', 'g'), ''); - - break; - - case 'srt': - ht = parseSRT(input); - break; - - case 'other': - var xmlString = input, - parser = new DOMParser(), - doc = parser.parseFromString(xmlString, 'text/xml'); - - var transcript = doc.getElementsByTagName('section')[0]; - - for (var i = 0; i < doc.getElementsByClassName('speaker').length; i++) { - transcript.getElementsByClassName('speaker')[i].innerHTML = - '[' + - transcript.getElementsByClassName('speaker')[i].innerHTML.replace(': ', '') + - '] '; - var datam = document.createAttribute('data-m'); - var datad = document.createAttribute('data-d'); - datam.value = transcript - .getElementsByClassName('speaker') - [i].nextElementSibling.getAttribute('data-m'); - datad.value = '1'; - transcript.getElementsByClassName('speaker')[i].setAttributeNode(datam); - transcript.getElementsByClassName('speaker')[i].setAttributeNode(datad); + + // we're out of words for this segment - decision time! + if (thisCaption !== null) { // The caption had been started, time to add whatever text we have and add a stop point + thisCaption.stop = formatSeconds(lastOutTime); + thisCaption.text += lineText + "\n"; + //console.log("3. pushing at end of segment when new caption HAS BEEN created"); + //console.log(thisCaption); + captions.push(thisCaption); + thisCaption = null; + + } else { // caption hadn't been started yet - create one! + if (lastInTime !== null) { + thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(lastOutTime), lineText); + //console.log("4. pushing at end of segment when new caption has yet to be created"); + //console.log(thisCaption); + captions.push(thisCaption); + thisCaption = null; + } } + } + }); - var transcriptText = transcript.outerHTML; - - ht = '
' + transcriptText + '
'; - } + captions.forEach(function(caption) { + captionsVtt += "\n" + caption.start + "-->" + caption.stop + "\n" + caption.text + "\n"; + }); - $('#htranscript').val(ht); - $('#rtranscript').html(ht); + document.getElementById(playerId+'-vtt').setAttribute("src", 'data:text/vtt,'+encodeURIComponent(captionsVtt)); + console.log(captionsVtt); - $('#transform-spinner').hide(); - return false; } + + return cap; + }); \ No newline at end of file From cbc5816c7434560de53c3827a16b5192962a224c Mon Sep 17 00:00:00 2001 From: Mark Boas Date: Fri, 11 Feb 2022 18:34:25 +0100 Subject: [PATCH 3/3] latest version of caption.js --- wp-hyperaudio/js/caption.js | 159 +++++++++++++++++++----------------- 1 file changed, 86 insertions(+), 73 deletions(-) diff --git a/wp-hyperaudio/js/caption.js b/wp-hyperaudio/js/caption.js index ed580e1..a860f12 100644 --- a/wp-hyperaudio/js/caption.js +++ b/wp-hyperaudio/js/caption.js @@ -1,19 +1,24 @@ +/*! (C) The Hyperaudio Project. MIT @license: en.wikipedia.org/wiki/MIT_License. */ 'use strict'; -var caption = (function () { - +var caption = function () { var cap = {}; function formatSeconds(seconds) { - if(typeof seconds == 'number'){ - return new Date(seconds.toFixed(3) * 1000).toISOString().substr(11, 12); + if (typeof seconds == 'number') { + return new Date(seconds.toFixed(3) * 1000).toISOString().substring(11,23); } else { - console.log("warning - attempting to format the non number: "+seconds); + console.log('warning - attempting to format the non number: ' + seconds); return null; } } - cap.init = function(transcriptId, playerId, maxLength, minLength) { + function convertTimecodeToSrt(timecode) { + //the same as VTT format but milliseconds separated by a comma + return timecode.substring(0,8) + "," + timecode.substring(9,12); + } + + cap.init = function (transcriptId, playerId, maxLength, minLength) { var transcript = document.getElementById(transcriptId); var words = transcript.querySelectorAll('[data-m]'); var data = {}; @@ -41,49 +46,46 @@ var caption = (function () { var maxLineLength = 37; var minLineLength = 21; - var captionsVtt = "WEBVTT\n" + var captionsVtt = 'WEBVTT\n'; + var captionsSrt = ''; var endSentenceDelimiter = /[\.。?؟!]/g; var midSentenceDelimiter = /[,、–,،و:,…‥]/g; - if (!isNaN(maxLength)) { + if (!isNaN(maxLength) && maxLength != null) { maxLineLength = maxLength; } - if (!isNaN(minLength)) { + if (!isNaN(minLength) && minLength != null) { minLineLength = minLength; } - var lastSpeaker = ""; - - words.forEach(function(word, i) { + var lastSpeaker = ''; + words.forEach(function (word, i) { if (thisSegmentMeta === null) { // create segment meta object - thisSegmentMeta = new segmentMeta("", null, 0, 0, 0); + thisSegmentMeta = new segmentMeta('', null, 0, 0, 0); } - if (word.classList.contains("speaker")) { - + if (word.classList.contains('speaker')) { // checking that this is not a new segment AND a new empty segment wasn't already created - if (thisSegmentMeta !== null && thisSegmentMeta.start !== null) { + if (thisSegmentMeta !== null && thisSegmentMeta.start !== null) { //console.log("pushing..."); //console.log(thisSegmentMeta); data.segments.push(thisSegmentMeta); // push the previous segment because it's a new speaker - thisSegmentMeta = new segmentMeta("", null, 0, 0, 0); + thisSegmentMeta = new segmentMeta('', null, 0, 0, 0); } thisSegmentMeta.speaker = word.innerText; - } else { - - var thisStart = parseInt(word.getAttribute("data-m"))/1000; - var thisDuration = parseInt(word.getAttribute("data-d"))/1000; + var thisStart = parseInt(word.getAttribute('data-m')) / 1000; + var thisDuration = parseInt(word.getAttribute('data-d')) / 1000; if (isNaN(thisStart)) { thisStart = 0; } - + if (isNaN(thisDuration)) { thisDuration = 0; } @@ -91,8 +93,8 @@ var caption = (function () { var thisText = word.innerText; thisWordMeta = new wordMeta(thisStart, thisDuration, thisText); - - if (thisSegmentMeta.start === null) { + + if (thisSegmentMeta.start === null) { thisSegmentMeta.start = thisStart; thisSegmentMeta.duration = 0; thisSegmentMeta.chars = 0; @@ -123,47 +125,51 @@ var caption = (function () { var captions = []; var thisCaption = null; - data.segments.map(function(segment) { - + data.segments.map(function (segment) { // If the entire segment fits on a line, add it to the captions. if (segment.chars < maxLineLength) { + thisCaption = new captionMeta( + formatSeconds(segment.start), + formatSeconds(segment.start + segment.duration), + '', + ); - thisCaption = new captionMeta(formatSeconds(segment.start), formatSeconds(segment.start + segment.duration), ""); - - segment.words.forEach(function(wordMeta) { + segment.words.forEach(function (wordMeta) { thisCaption.text += wordMeta.text; }); - thisCaption.text += "\n"; + thisCaption.text += '\n'; //console.log("0. pushing because the whole segment fits on a line!"); //console.log(thisCaption); captions.push(thisCaption); thisCaption = null; - - } else { // The number of chars in this segment is longer than our single line maximum + } else { + // The number of chars in this segment is longer than our single line maximum var charCount = 0; - var lineText = ""; + var lineText = ''; var firstLine = true; var lastOutTime; var lastInTime = null; - - segment.words.forEach(function(wordMeta, index) { + segment.words.forEach(function (wordMeta, index) { var lastChar = wordMeta.text.replace(/\s/g, '').slice(-1); - if (lastInTime === null) { // if it doesn't exist yet set the caption start time to the word's start time. + if (lastInTime === null) { + // if it doesn't exist yet set the caption start time to the word's start time. lastInTime = wordMeta.start; } // Are we over the minimum length of a line and hitting a good place to split mid-sentence? if (charCount + wordMeta.text.length > minLineLength && lastChar.match(midSentenceDelimiter)) { - if (firstLine === true) { + thisCaption = new captionMeta( + formatSeconds(lastInTime), + formatSeconds(wordMeta.start + wordMeta.duration), + '', + ); + thisCaption.text += lineText + wordMeta.text + '\n'; - thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(wordMeta.start + wordMeta.duration), ""); - thisCaption.text += lineText + wordMeta.text + "\n"; - //check for last word in segment, if it is we can push a one line caption, if not – move on to second line if (index + 1 >= segment.words.length) { @@ -174,11 +180,11 @@ var caption = (function () { } else { firstLine = false; } - - } else { // We're on the second line ... we're over the minimum chars and in a good place to split – let's push the caption + } else { + // We're on the second line ... we're over the minimum chars and in a good place to split – let's push the caption thisCaption.stop = formatSeconds(wordMeta.start + wordMeta.duration); - thisCaption.text += lineText + wordMeta.text + "\n"; + thisCaption.text += lineText + wordMeta.text; //console.log("2. pushing because we're on the second line and have a good place to split"); //console.log(thisCaption); captions.push(thisCaption); @@ -188,22 +194,20 @@ var caption = (function () { // whether first line or not we should reset ready for a new caption charCount = 0; - lineText = ""; - lastInTime = null; - - } else { // we're not over the minimum length with a suitable splitting point + lineText = ''; + lastInTime = null; + } else { + // we're not over the minimum length with a suitable splitting point // If we add this word are we over the maximum? if (charCount + wordMeta.text.length > maxLineLength) { - if (firstLine === true) { - if (lastOutTime === undefined) { lastOutTime = wordMeta.start + wordMeta.duration; } - thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(lastOutTime), ""); - thisCaption.text += lineText + "\n"; + thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(lastOutTime), ''); + thisCaption.text += lineText + '\n'; // It's just the first line so we should only push a new caption if it's the very last word! @@ -213,12 +217,12 @@ var caption = (function () { } else { firstLine = false; } - - } else { // We're on the second line and since we're over the maximum with the next word we should push this caption! + } else { + // We're on the second line and since we're over the maximum with the next word we should push this caption! thisCaption.stop = formatSeconds(lastOutTime); - thisCaption.text += lineText + "\n"; - + thisCaption.text += lineText; + captions.push(thisCaption); thisCaption = null; @@ -226,52 +230,61 @@ var caption = (function () { } // do the stuff we need to do to start a new line - charCount = wordMeta.text.length; + charCount = wordMeta.text.length; lineText = wordMeta.text; lastInTime = wordMeta.start; // Why do we do this?????? - - } else { // We're not over the maximum with this word, update the line length and add the word to the text + } else { + // We're not over the maximum with this word, update the line length and add the word to the text charCount += wordMeta.text.length; lineText += wordMeta.text; - } } // for every word update the lastOutTime lastOutTime = wordMeta.start + wordMeta.duration; }); - + // we're out of words for this segment - decision time! - if (thisCaption !== null) { // The caption had been started, time to add whatever text we have and add a stop point + if (thisCaption !== null) { + // The caption had been started, time to add whatever text we have and add a stop point thisCaption.stop = formatSeconds(lastOutTime); - thisCaption.text += lineText + "\n"; + thisCaption.text += lineText; //console.log("3. pushing at end of segment when new caption HAS BEEN created"); //console.log(thisCaption); captions.push(thisCaption); thisCaption = null; - - } else { // caption hadn't been started yet - create one! - if (lastInTime !== null) { + } else { + // caption hadn't been started yet - create one! + if (lastInTime !== null) { thisCaption = new captionMeta(formatSeconds(lastInTime), formatSeconds(lastOutTime), lineText); //console.log("4. pushing at end of segment when new caption has yet to be created"); //console.log(thisCaption); captions.push(thisCaption); - thisCaption = null; + thisCaption = null; } } } }); - captions.forEach(function(caption) { - captionsVtt += "\n" + caption.start + "-->" + caption.stop + "\n" + caption.text + "\n"; + captions.forEach(function (caption, i) { + captionsVtt += '\n' + caption.start + ' --> ' + caption.stop + '\n' + caption.text + '\n'; + captionsSrt += '\n' + (i + 1) + '\n' + convertTimecodeToSrt(caption.start) + ' --> ' + convertTimecodeToSrt(caption.stop) + '\n' + caption.text + '\n'; }); - document.getElementById(playerId+'-vtt').setAttribute("src", 'data:text/vtt,'+encodeURIComponent(captionsVtt)); - console.log(captionsVtt); + var trackElement = document.getElementById(playerId+'-vtt'); - } + if (trackElement !== null) { + trackElement.setAttribute("src", 'data:text/vtt,'+encodeURIComponent(captionsVtt)); + } - return cap; + function captionsObj(vtt, srt) { + this.vtt = vtt; + this.srt = srt; + } + + return new captionsObj(captionsVtt, captionsSrt); + }; -}); \ No newline at end of file + return cap; +};