diff --git a/index.html b/index.html index 1ca7d60..bffdb2b 100644 --- a/index.html +++ b/index.html @@ -289,6 +289,9 @@
  • +
  • +
  • +
  • @@ -1095,6 +1098,7 @@

    Caption Regeneration

    + @@ -1173,6 +1177,7 @@

    Load from Local Storage

    + diff --git a/js/hyperaudio-lite-editor-export.js b/js/hyperaudio-lite-editor-export.js index 67907a9..1dc8f88 100644 --- a/js/hyperaudio-lite-editor-export.js +++ b/js/hyperaudio-lite-editor-export.js @@ -71,6 +71,56 @@ class ImportJson extends HTMLElement { customElements.define('import-json', ImportJson); +class ExportYoutubeVtt extends HTMLElement { + + constructor() { + super(); + } + + exportYoutubeVtt() { + const jsonData = getHyperaudioJsonForExport(); + if (!jsonData) return; + + downloadText( + window.hyperaudioJsonToYoutubeVtt(jsonData), + 'youtube-captions.vtt', + 'text/vtt' + ); + } + + connectedCallback() { + this.innerHTML = `Export YouTube VTT`; + this.addEventListener('click', this.exportYoutubeVtt); + } +} + +customElements.define('export-youtube-vtt', ExportYoutubeVtt); + +class ExportYoutubeXml extends HTMLElement { + + constructor() { + super(); + } + + exportYoutubeXml() { + const jsonData = getHyperaudioJsonForExport(); + if (!jsonData) return; + + downloadText( + window.hyperaudioJsonToYoutubeTimedTextXml(jsonData), + 'youtube-captions.xml', + 'application/xml' + ); + } + + connectedCallback() { + this.innerHTML = `Export YouTube XML`; + this.addEventListener('click', this.exportYoutubeXml); + } +} + +customElements.define('export-youtube-xml', ExportYoutubeXml); + class ImportDeepgramJson extends HTMLElement { constructor() { @@ -428,6 +478,127 @@ class ImportVtt extends HTMLElement { customElements.define('import-vtt', ImportVtt); +class ImportYoutubeCaptions extends HTMLElement { + + constructor() { + super(); + } + + clearYoutubeCaptionMediaUrl(event) { + event.preventDefault(); + document.querySelector('#youtube-caption-media').value = ""; + } + + clearYoutubeCaptionFilePicker(event) { + event.preventDefault(); + document.querySelector('#youtube-caption-media-file').value = ""; + } + + confirmYoutubeCaptions() { + const player = document.querySelector("#hyperplayer"); + const hypertranscript = document.getElementById('hypertranscript'); + const track = document.querySelector('#hyperplayer-vtt'); + + if (!player || !hypertranscript || !track) { + alert("Currently you can only import YouTube captions from the transcript view."); + return; + } + + if (document.querySelector('#youtube-caption-media-file').value == ""){ + player.src = document.querySelector('#youtube-caption-media').value; + } else { + const mediaFile = document.querySelector('[name=youtube-caption-media-file]').files[0]; + const mediaReader = new FileReader(); + mediaReader.readAsArrayBuffer(mediaFile); + mediaReader.addEventListener('load', () => { + mediaFile.arrayBuffer().then((arrayBuffer) => { + const blob = new Blob([new Uint8Array(arrayBuffer)], {type: mediaFile.type }); + player.src = URL.createObjectURL(blob); + }); + }); + } + + const file = document.querySelector('[name=youtube-caption-file]').files[0]; + if (!file) { + alert("Please select a YouTube XML or VTT caption file."); + return; + } + + const reader = new FileReader(); + reader.addEventListener('load', (event) => { + const captionData = event.target.result; + const isXml = file.name.toLowerCase().endsWith('.xml') || captionData.trim().startsWith('<'); + const html = isXml + ? window.youtubeTimedTextXmlToHtml(captionData) + : window.youtubeVttToHtml(captionData); + + hypertranscript.innerHTML = html; + + const jsonData = htmlToJson(hypertranscript); + const youtubeVtt = window.hyperaudioJsonToYoutubeVtt(jsonData); + track.src = "data:text/vtt," + encodeURIComponent(youtubeVtt); + + updateCaptionsFromTranscript = false; + populateCaptionEditorFromVtt(youtubeVtt); + document.dispatchEvent(new CustomEvent('hyperaudioInit')); + }); + + reader.readAsText(file); + } + + connectedCallback() { + this.innerHTML = ` +
    + +
    + + `; + + document.querySelector('#youtube-caption-media-file').addEventListener('change',this.clearYoutubeCaptionMediaUrl); + document.querySelector('#youtube-caption-media').addEventListener('change',this.clearYoutubeCaptionFilePicker); + document.querySelector('#file-import-youtube-caption').addEventListener('click',this.confirmYoutubeCaptions); + } +} + +customElements.define('import-youtube-captions', ImportYoutubeCaptions); + +function getHyperaudioJsonForExport() { + const hypertranscript = document.getElementById('hypertranscript'); + + if (hypertranscript === null) { + alert("Currently you can only export YouTube captions from the transcript view."); + return null; + } + + return htmlToJson(hypertranscript); +} + +function downloadText(textData, fileName, mimeType) { + const dataStr = `data:${mimeType};charset=utf-8,` + encodeURIComponent(textData); + const downloadAnchorNode = document.createElement('a'); + downloadAnchorNode.setAttribute('href', dataStr); + downloadAnchorNode.setAttribute('download', fileName); + document.body.appendChild(downloadAnchorNode); + downloadAnchorNode.click(); + downloadAnchorNode.remove(); +} + function downloadJson(jsonData) { // download json file let dataStr = 'data:text/json;charset=utf-8,' + encodeURIComponent(JSON.stringify(jsonData, null, 2)); diff --git a/js/youtube-caption-converter.js b/js/youtube-caption-converter.js new file mode 100644 index 0000000..44e3c42 --- /dev/null +++ b/js/youtube-caption-converter.js @@ -0,0 +1,298 @@ +(function (root) { + function decodeEntities(value) { + return String(value) + .replace(/&#(\d+);/g, (_, code) => String.fromCharCode(Number(code))) + .replace(/&#x([0-9a-f]+);/gi, (_, code) => String.fromCharCode(parseInt(code, 16))) + .replace(/"/g, '"') + .replace(/'/g, "'") + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/&/g, '&'); + } + + function escapeHtml(value) { + return String(value) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"'); + } + + function normalizeLines(value) { + return String(value || '').replace(/\r\n|\r/g, '\n'); + } + + function parseTimestampMs(value) { + const clean = String(value || '').trim().replace(',', '.'); + if (!clean) return 0; + + if (!clean.includes(':')) { + const numeric = Number(clean); + return Number.isFinite(numeric) ? Math.round(numeric * 1000) : 0; + } + + const parts = clean.split(':').map(Number); + if (parts.some((part) => !Number.isFinite(part))) return 0; + + let seconds = 0; + if (parts.length === 3) { + seconds = parts[0] * 3600 + parts[1] * 60 + parts[2]; + } else if (parts.length === 2) { + seconds = parts[0] * 60 + parts[1]; + } else { + seconds = parts[0]; + } + return Math.round(seconds * 1000); + } + + function parseXmlTimeMs(value, fallbackMs, decimalIsSeconds) { + if (value === undefined || value === null || value === '') return fallbackMs; + const clean = String(value).trim(); + if (clean.includes(':')) return parseTimestampMs(clean); + + const numeric = Number(clean); + if (!Number.isFinite(numeric)) return fallbackMs; + if (clean.includes('.') || decimalIsSeconds) return Math.round(numeric * 1000); + return Math.round(numeric); + } + + function formatTimestamp(ms) { + const safeMs = Math.max(0, Math.round(ms)); + const hours = Math.floor(safeMs / 3600000); + const minutes = Math.floor((safeMs % 3600000) / 60000); + const seconds = Math.floor((safeMs % 60000) / 1000); + const millis = safeMs % 1000; + return [ + String(hours).padStart(2, '0'), + String(minutes).padStart(2, '0'), + `${String(seconds).padStart(2, '0')}.${String(millis).padStart(3, '0')}`, + ].join(':'); + } + + function stripTags(value) { + return String(value || '').replace(/<[^>]+>/g, ' '); + } + + function splitWords(value) { + const text = decodeEntities(stripTags(value)).replace(/\s+/g, ' ').trim(); + return text ? text.split(' ') : []; + } + + function wordsForSegment(text, startMs, endMs) { + const words = splitWords(text); + if (words.length === 0) return []; + + const duration = Math.max(0, endMs - startMs); + const step = words.length > 0 ? duration / words.length : 0; + + return words.map((word, index) => { + const wordStart = Math.round(startMs + step * index); + const wordEnd = index === words.length - 1 + ? Math.round(endMs) + : Math.round(startMs + step * (index + 1)); + return { + startMs: wordStart, + endMs: Math.max(wordStart, wordEnd), + text: word, + }; + }); + } + + function wordsFromTimedCue(text, cueStartMs, cueEndMs) { + const timestampPattern = /<((?:\d{1,2}:)?\d{2}:\d{2}[\.,]\d{3})>/g; + const segments = []; + let activeStart = cueStartMs; + let lastIndex = 0; + let match; + + while ((match = timestampPattern.exec(text)) !== null) { + const segmentText = text.slice(lastIndex, match.index); + if (segmentText.trim()) { + segments.push({ startMs: activeStart, text: segmentText }); + } + activeStart = parseTimestampMs(match[1]); + lastIndex = match.index + match[0].length; + } + + const finalText = text.slice(lastIndex); + if (finalText.trim()) { + segments.push({ startMs: activeStart, text: finalText }); + } + + if (segments.length === 0) { + return wordsForSegment(text, cueStartMs, cueEndMs); + } + + return segments.flatMap((segment, index) => { + const segmentEnd = index < segments.length - 1 ? segments[index + 1].startMs : cueEndMs; + return wordsForSegment(segment.text, segment.startMs, segmentEnd); + }); + } + + function paragraphsToHtml(paragraphs) { + const body = paragraphs.map((words) => { + const spans = words.map((word) => { + const start = Math.round(word.startMs); + const duration = Math.max(0, Math.round(word.endMs - word.startMs)); + return `${escapeHtml(word.text)} `; + }).join(''); + return `

    ${spans}

    `; + }).join(''); + + return `
    ${body}
    `; + } + + function youtubeVttToHtml(data) { + const blocks = normalizeLines(data) + .split(/\n\s*\n/) + .map((block) => block.trim()) + .filter(Boolean); + + const paragraphs = []; + for (const block of blocks) { + const lines = block.split('\n').map((line) => line.trimEnd()); + const timingIndex = lines.findIndex((line) => line.includes('-->')); + if (timingIndex === -1) continue; + + const timing = lines[timingIndex].split(/[\t ]*-->[\t ]*/); + const startMs = parseTimestampMs(timing[0]); + const endMs = parseTimestampMs((timing[1] || '').split(/\s+/)[0]); + const cueText = lines.slice(timingIndex + 1).join(' '); + const words = wordsFromTimedCue(cueText, startMs, endMs); + if (words.length > 0) paragraphs.push(words); + } + + return paragraphsToHtml(paragraphs); + } + + function parseAttributes(value) { + const attrs = {}; + const attrPattern = /([\w:-]+)\s*=\s*("([^"]*)"|'([^']*)')/g; + let match; + while ((match = attrPattern.exec(value || '')) !== null) { + attrs[match[1]] = match[3] !== undefined ? match[3] : match[4]; + } + return attrs; + } + + function parseSrv3Paragraphs(xml) { + const paragraphs = []; + const pPattern = /]*)>([\s\S]*?)<\/p>/gi; + let pMatch; + + while ((pMatch = pPattern.exec(xml)) !== null) { + const pAttrs = parseAttributes(pMatch[1]); + const pStart = parseXmlTimeMs(pAttrs.t || pAttrs.start, 0, Boolean(pAttrs.start)); + const pDuration = parseXmlTimeMs(pAttrs.d || pAttrs.dur, 0, Boolean(pAttrs.dur)); + const pEnd = pDuration > 0 ? pStart + pDuration : pStart; + const sMatches = Array.from(pMatch[2].matchAll(/]*)>([\s\S]*?)<\/s>/gi)); + + if (sMatches.length === 0) { + const words = wordsForSegment(pMatch[2], pStart, pEnd); + if (words.length > 0) paragraphs.push(words); + continue; + } + + const words = []; + for (let i = 0; i < sMatches.length; i++) { + const sAttrs = parseAttributes(sMatches[i][1]); + const sStart = pStart + parseXmlTimeMs(sAttrs.t, 0, false); + const nextStart = i < sMatches.length - 1 + ? pStart + parseXmlTimeMs(parseAttributes(sMatches[i + 1][1]).t, pEnd - pStart, false) + : pEnd; + const sDuration = parseXmlTimeMs(sAttrs.d, Math.max(0, nextStart - sStart), false); + words.push(...wordsForSegment(sMatches[i][2], sStart, sStart + sDuration)); + } + if (words.length > 0) paragraphs.push(words); + } + + return paragraphs; + } + + function parseTranscriptText(xml) { + const paragraphs = []; + const textPattern = /]*)>([\s\S]*?)<\/text>/gi; + let match; + + while ((match = textPattern.exec(xml)) !== null) { + const attrs = parseAttributes(match[1]); + const startMs = parseXmlTimeMs(attrs.start, 0, true); + const durationMs = parseXmlTimeMs(attrs.dur, 0, true); + const words = wordsForSegment(match[2], startMs, startMs + durationMs); + if (words.length > 0) paragraphs.push(words); + } + + return paragraphs; + } + + function youtubeTimedTextXmlToHtml(data) { + const xml = normalizeLines(data); + const paragraphs = parseSrv3Paragraphs(xml); + if (paragraphs.length > 0) return paragraphsToHtml(paragraphs); + return paragraphsToHtml(parseTranscriptText(xml)); + } + + function normalizeJsonWords(jsonData) { + return ((jsonData && jsonData.words) || []) + .filter((word) => word && word.text !== undefined) + .map((word) => ({ + startMs: Math.round(Number(word.start) * 1000), + endMs: Math.round(Number(word.end) * 1000), + text: String(word.text), + })) + .filter((word) => Number.isFinite(word.startMs) && Number.isFinite(word.endMs)); + } + + function groupWordsByParagraph(jsonData) { + const words = normalizeJsonWords(jsonData); + const paragraphs = (jsonData && jsonData.paragraphs) || []; + if (paragraphs.length === 0) return words.length > 0 ? [words] : []; + + return paragraphs.map((paragraph) => { + const startMs = Math.round(Number(paragraph.start) * 1000); + const endMs = Math.round(Number(paragraph.end) * 1000); + return words.filter((word) => word.startMs >= startMs && word.startMs <= endMs); + }).filter((group) => group.length > 0); + } + + function hyperaudioJsonToYoutubeVtt(jsonData) { + const cues = groupWordsByParagraph(jsonData).map((words) => { + const start = words[0].startMs; + const end = words[words.length - 1].endMs; + const text = words + .map((word) => `<${formatTimestamp(word.startMs)}>${escapeHtml(word.text)}`) + .join(' '); + return `${formatTimestamp(start)} --> ${formatTimestamp(end)}\n${text}`; + }); + + return `WEBVTT\n\n${cues.join('\n\n')}\n`; + } + + function hyperaudioJsonToYoutubeTimedTextXml(jsonData) { + const paragraphs = groupWordsByParagraph(jsonData).map((words) => { + const pStart = words[0].startMs; + const pEnd = words[words.length - 1].endMs; + const spans = words.map((word) => { + const relativeStart = Math.max(0, word.startMs - pStart); + const duration = Math.max(0, word.endMs - word.startMs); + return `${escapeHtml(word.text)}`; + }).join(''); + return `

    ${spans}

    `; + }); + + return `\n\n${paragraphs.join('\n')}\n\n\n`; + } + + const api = { + hyperaudioJsonToYoutubeTimedTextXml, + hyperaudioJsonToYoutubeVtt, + youtubeTimedTextXmlToHtml, + youtubeVttToHtml, + }; + + if (typeof module !== 'undefined' && module.exports) { + module.exports = api; + } + + Object.assign(root, api); +})(typeof window !== 'undefined' ? window : globalThis); diff --git a/test/youtube-caption-converter.test.js b/test/youtube-caption-converter.test.js new file mode 100644 index 0000000..67635c1 --- /dev/null +++ b/test/youtube-caption-converter.test.js @@ -0,0 +1,82 @@ +const assert = require('assert'); + +const { + hyperaudioJsonToYoutubeTimedTextXml, + hyperaudioJsonToYoutubeVtt, + youtubeTimedTextXmlToHtml, + youtubeVttToHtml, +} = require('../js/youtube-caption-converter.js'); + +function spanTuples(html) { + return Array.from(html.matchAll(/([^<]*)<\/span>/g)) + .map((match) => ({ + startMs: Number(match[1]), + durationMs: Number(match[2]), + text: match[3].trim(), + })); +} + +{ + const html = youtubeVttToHtml(`WEBVTT + +00:00:00.000 --> 00:00:03.000 align:start position:0% +<00:00:00.500>Hello <00:00:01.250>world. +`); + + assert.deepStrictEqual(spanTuples(html), [ + { startMs: 500, durationMs: 750, text: 'Hello' }, + { startMs: 1250, durationMs: 1750, text: 'world.' }, + ]); +} + +{ + const html = youtubeTimedTextXmlToHtml(` + + +

    + Hello + world +

    + +
    +`); + + assert.deepStrictEqual(spanTuples(html), [ + { startMs: 1000, durationMs: 400, text: 'Hello' }, + { startMs: 1600, durationMs: 500, text: 'world' }, + ]); +} + +{ + const html = youtubeTimedTextXmlToHtml(` + + Hello & world + +`); + + assert.deepStrictEqual(spanTuples(html), [ + { startMs: 500, durationMs: 500, text: 'Hello' }, + { startMs: 1000, durationMs: 500, text: '&' }, + { startMs: 1500, durationMs: 500, text: 'world' }, + ]); +} + +{ + const json = { + words: [ + { start: 0.5, end: 1.25, text: 'Hello' }, + { start: 1.25, end: 3, text: 'world.' }, + ], + paragraphs: [{ start: 0.5, end: 3 }], + }; + + const vtt = hyperaudioJsonToYoutubeVtt(json); + assert.match(vtt, /WEBVTT/); + assert.match(vtt, /00:00:00\.500 --> 00:00:03\.000/); + assert.match(vtt, /<00:00:00\.500>Hello <00:00:01\.250>world\./); + + const xml = hyperaudioJsonToYoutubeTimedTextXml(json); + assert.match(xml, //); + assert.match(xml, /

    /); + assert.match(xml, /Hello<\/s>world\.<\/s>/); +}