diff --git a/index.html b/index.html
index 1ca7d60..bffdb2b 100644
--- a/index.html
+++ b/index.html
@@ -289,6 +289,9 @@
+
+
+
@@ -1095,6 +1098,7 @@ Caption Regeneration
+
@@ -1173,6 +1177,7 @@ Load from Local Storage
+
diff --git a/js/hyperaudio-lite-editor-export.js b/js/hyperaudio-lite-editor-export.js
index 67907a9..1dc8f88 100644
--- a/js/hyperaudio-lite-editor-export.js
+++ b/js/hyperaudio-lite-editor-export.js
@@ -71,6 +71,56 @@ class ImportJson extends HTMLElement {
customElements.define('import-json', ImportJson);
+class ExportYoutubeVtt extends HTMLElement {
+
+ constructor() {
+ super();
+ }
+
+ exportYoutubeVtt() {
+ const jsonData = getHyperaudioJsonForExport();
+ if (!jsonData) return;
+
+ downloadText(
+ window.hyperaudioJsonToYoutubeVtt(jsonData),
+ 'youtube-captions.vtt',
+ 'text/vtt'
+ );
+ }
+
+ connectedCallback() {
+ this.innerHTML = `Export YouTube VTT`;
+ this.addEventListener('click', this.exportYoutubeVtt);
+ }
+}
+
+customElements.define('export-youtube-vtt', ExportYoutubeVtt);
+
+class ExportYoutubeXml extends HTMLElement {
+
+ constructor() {
+ super();
+ }
+
+ exportYoutubeXml() {
+ const jsonData = getHyperaudioJsonForExport();
+ if (!jsonData) return;
+
+ downloadText(
+ window.hyperaudioJsonToYoutubeTimedTextXml(jsonData),
+ 'youtube-captions.xml',
+ 'application/xml'
+ );
+ }
+
+ connectedCallback() {
+ this.innerHTML = `Export YouTube XML`;
+ this.addEventListener('click', this.exportYoutubeXml);
+ }
+}
+
+customElements.define('export-youtube-xml', ExportYoutubeXml);
+
class ImportDeepgramJson extends HTMLElement {
constructor() {
@@ -428,6 +478,127 @@ class ImportVtt extends HTMLElement {
customElements.define('import-vtt', ImportVtt);
+class ImportYoutubeCaptions extends HTMLElement {
+
+ constructor() {
+ super();
+ }
+
+ clearYoutubeCaptionMediaUrl(event) {
+ event.preventDefault();
+ document.querySelector('#youtube-caption-media').value = "";
+ }
+
+ clearYoutubeCaptionFilePicker(event) {
+ event.preventDefault();
+ document.querySelector('#youtube-caption-media-file').value = "";
+ }
+
+ confirmYoutubeCaptions() {
+ const player = document.querySelector("#hyperplayer");
+ const hypertranscript = document.getElementById('hypertranscript');
+ const track = document.querySelector('#hyperplayer-vtt');
+
+ if (!player || !hypertranscript || !track) {
+ alert("Currently you can only import YouTube captions from the transcript view.");
+ return;
+ }
+
+ if (document.querySelector('#youtube-caption-media-file').value == ""){
+ player.src = document.querySelector('#youtube-caption-media').value;
+ } else {
+ const mediaFile = document.querySelector('[name=youtube-caption-media-file]').files[0];
+ const mediaReader = new FileReader();
+ mediaReader.readAsArrayBuffer(mediaFile);
+ mediaReader.addEventListener('load', () => {
+ mediaFile.arrayBuffer().then((arrayBuffer) => {
+ const blob = new Blob([new Uint8Array(arrayBuffer)], {type: mediaFile.type });
+ player.src = URL.createObjectURL(blob);
+ });
+ });
+ }
+
+ const file = document.querySelector('[name=youtube-caption-file]').files[0];
+ if (!file) {
+ alert("Please select a YouTube XML or VTT caption file.");
+ return;
+ }
+
+ const reader = new FileReader();
+ reader.addEventListener('load', (event) => {
+ const captionData = event.target.result;
+ const isXml = file.name.toLowerCase().endsWith('.xml') || captionData.trim().startsWith('<');
+ const html = isXml
+ ? window.youtubeTimedTextXmlToHtml(captionData)
+ : window.youtubeVttToHtml(captionData);
+
+ hypertranscript.innerHTML = html;
+
+ const jsonData = htmlToJson(hypertranscript);
+ const youtubeVtt = window.hyperaudioJsonToYoutubeVtt(jsonData);
+ track.src = "data:text/vtt," + encodeURIComponent(youtubeVtt);
+
+ updateCaptionsFromTranscript = false;
+ populateCaptionEditorFromVtt(youtubeVtt);
+ document.dispatchEvent(new CustomEvent('hyperaudioInit'));
+ });
+
+ reader.readAsText(file);
+ }
+
+ connectedCallback() {
+ this.innerHTML = `
+
+
+
+
+ `;
+
+ document.querySelector('#youtube-caption-media-file').addEventListener('change',this.clearYoutubeCaptionMediaUrl);
+ document.querySelector('#youtube-caption-media').addEventListener('change',this.clearYoutubeCaptionFilePicker);
+ document.querySelector('#file-import-youtube-caption').addEventListener('click',this.confirmYoutubeCaptions);
+ }
+}
+
+customElements.define('import-youtube-captions', ImportYoutubeCaptions);
+
+function getHyperaudioJsonForExport() {
+ const hypertranscript = document.getElementById('hypertranscript');
+
+ if (hypertranscript === null) {
+ alert("Currently you can only export YouTube captions from the transcript view.");
+ return null;
+ }
+
+ return htmlToJson(hypertranscript);
+}
+
+function downloadText(textData, fileName, mimeType) {
+ const dataStr = `data:${mimeType};charset=utf-8,` + encodeURIComponent(textData);
+ const downloadAnchorNode = document.createElement('a');
+ downloadAnchorNode.setAttribute('href', dataStr);
+ downloadAnchorNode.setAttribute('download', fileName);
+ document.body.appendChild(downloadAnchorNode);
+ downloadAnchorNode.click();
+ downloadAnchorNode.remove();
+}
+
function downloadJson(jsonData) {
// download json file
let dataStr = 'data:text/json;charset=utf-8,' + encodeURIComponent(JSON.stringify(jsonData, null, 2));
diff --git a/js/youtube-caption-converter.js b/js/youtube-caption-converter.js
new file mode 100644
index 0000000..44e3c42
--- /dev/null
+++ b/js/youtube-caption-converter.js
@@ -0,0 +1,298 @@
+(function (root) {
+ function decodeEntities(value) {
+ return String(value)
+ .replace(/(\d+);/g, (_, code) => String.fromCharCode(Number(code)))
+ .replace(/([0-9a-f]+);/gi, (_, code) => String.fromCharCode(parseInt(code, 16)))
+ .replace(/"/g, '"')
+ .replace(/'/g, "'")
+ .replace(/</g, '<')
+ .replace(/>/g, '>')
+ .replace(/&/g, '&');
+ }
+
+ function escapeHtml(value) {
+ return String(value)
+ .replace(/&/g, '&')
+ .replace(//g, '>')
+ .replace(/"/g, '"');
+ }
+
+ function normalizeLines(value) {
+ return String(value || '').replace(/\r\n|\r/g, '\n');
+ }
+
+ function parseTimestampMs(value) {
+ const clean = String(value || '').trim().replace(',', '.');
+ if (!clean) return 0;
+
+ if (!clean.includes(':')) {
+ const numeric = Number(clean);
+ return Number.isFinite(numeric) ? Math.round(numeric * 1000) : 0;
+ }
+
+ const parts = clean.split(':').map(Number);
+ if (parts.some((part) => !Number.isFinite(part))) return 0;
+
+ let seconds = 0;
+ if (parts.length === 3) {
+ seconds = parts[0] * 3600 + parts[1] * 60 + parts[2];
+ } else if (parts.length === 2) {
+ seconds = parts[0] * 60 + parts[1];
+ } else {
+ seconds = parts[0];
+ }
+ return Math.round(seconds * 1000);
+ }
+
+ function parseXmlTimeMs(value, fallbackMs, decimalIsSeconds) {
+ if (value === undefined || value === null || value === '') return fallbackMs;
+ const clean = String(value).trim();
+ if (clean.includes(':')) return parseTimestampMs(clean);
+
+ const numeric = Number(clean);
+ if (!Number.isFinite(numeric)) return fallbackMs;
+ if (clean.includes('.') || decimalIsSeconds) return Math.round(numeric * 1000);
+ return Math.round(numeric);
+ }
+
+ function formatTimestamp(ms) {
+ const safeMs = Math.max(0, Math.round(ms));
+ const hours = Math.floor(safeMs / 3600000);
+ const minutes = Math.floor((safeMs % 3600000) / 60000);
+ const seconds = Math.floor((safeMs % 60000) / 1000);
+ const millis = safeMs % 1000;
+ return [
+ String(hours).padStart(2, '0'),
+ String(minutes).padStart(2, '0'),
+ `${String(seconds).padStart(2, '0')}.${String(millis).padStart(3, '0')}`,
+ ].join(':');
+ }
+
+ function stripTags(value) {
+ return String(value || '').replace(/<[^>]+>/g, ' ');
+ }
+
+ function splitWords(value) {
+ const text = decodeEntities(stripTags(value)).replace(/\s+/g, ' ').trim();
+ return text ? text.split(' ') : [];
+ }
+
+ function wordsForSegment(text, startMs, endMs) {
+ const words = splitWords(text);
+ if (words.length === 0) return [];
+
+ const duration = Math.max(0, endMs - startMs);
+ const step = words.length > 0 ? duration / words.length : 0;
+
+ return words.map((word, index) => {
+ const wordStart = Math.round(startMs + step * index);
+ const wordEnd = index === words.length - 1
+ ? Math.round(endMs)
+ : Math.round(startMs + step * (index + 1));
+ return {
+ startMs: wordStart,
+ endMs: Math.max(wordStart, wordEnd),
+ text: word,
+ };
+ });
+ }
+
+ function wordsFromTimedCue(text, cueStartMs, cueEndMs) {
+ const timestampPattern = /<((?:\d{1,2}:)?\d{2}:\d{2}[\.,]\d{3})>/g;
+ const segments = [];
+ let activeStart = cueStartMs;
+ let lastIndex = 0;
+ let match;
+
+ while ((match = timestampPattern.exec(text)) !== null) {
+ const segmentText = text.slice(lastIndex, match.index);
+ if (segmentText.trim()) {
+ segments.push({ startMs: activeStart, text: segmentText });
+ }
+ activeStart = parseTimestampMs(match[1]);
+ lastIndex = match.index + match[0].length;
+ }
+
+ const finalText = text.slice(lastIndex);
+ if (finalText.trim()) {
+ segments.push({ startMs: activeStart, text: finalText });
+ }
+
+ if (segments.length === 0) {
+ return wordsForSegment(text, cueStartMs, cueEndMs);
+ }
+
+ return segments.flatMap((segment, index) => {
+ const segmentEnd = index < segments.length - 1 ? segments[index + 1].startMs : cueEndMs;
+ return wordsForSegment(segment.text, segment.startMs, segmentEnd);
+ });
+ }
+
+ function paragraphsToHtml(paragraphs) {
+ const body = paragraphs.map((words) => {
+ const spans = words.map((word) => {
+ const start = Math.round(word.startMs);
+ const duration = Math.max(0, Math.round(word.endMs - word.startMs));
+ return `${escapeHtml(word.text)} `;
+ }).join('');
+ return `${spans}
`;
+ }).join('');
+
+ return ``;
+ }
+
+ function youtubeVttToHtml(data) {
+ const blocks = normalizeLines(data)
+ .split(/\n\s*\n/)
+ .map((block) => block.trim())
+ .filter(Boolean);
+
+ const paragraphs = [];
+ for (const block of blocks) {
+ const lines = block.split('\n').map((line) => line.trimEnd());
+ const timingIndex = lines.findIndex((line) => line.includes('-->'));
+ if (timingIndex === -1) continue;
+
+ const timing = lines[timingIndex].split(/[\t ]*-->[\t ]*/);
+ const startMs = parseTimestampMs(timing[0]);
+ const endMs = parseTimestampMs((timing[1] || '').split(/\s+/)[0]);
+ const cueText = lines.slice(timingIndex + 1).join(' ');
+ const words = wordsFromTimedCue(cueText, startMs, endMs);
+ if (words.length > 0) paragraphs.push(words);
+ }
+
+ return paragraphsToHtml(paragraphs);
+ }
+
+ function parseAttributes(value) {
+ const attrs = {};
+ const attrPattern = /([\w:-]+)\s*=\s*("([^"]*)"|'([^']*)')/g;
+ let match;
+ while ((match = attrPattern.exec(value || '')) !== null) {
+ attrs[match[1]] = match[3] !== undefined ? match[3] : match[4];
+ }
+ return attrs;
+ }
+
+ function parseSrv3Paragraphs(xml) {
+ const paragraphs = [];
+ const pPattern = /]*)>([\s\S]*?)<\/p>/gi;
+ let pMatch;
+
+ while ((pMatch = pPattern.exec(xml)) !== null) {
+ const pAttrs = parseAttributes(pMatch[1]);
+ const pStart = parseXmlTimeMs(pAttrs.t || pAttrs.start, 0, Boolean(pAttrs.start));
+ const pDuration = parseXmlTimeMs(pAttrs.d || pAttrs.dur, 0, Boolean(pAttrs.dur));
+ const pEnd = pDuration > 0 ? pStart + pDuration : pStart;
+ const sMatches = Array.from(pMatch[2].matchAll(/]*)>([\s\S]*?)<\/s>/gi));
+
+ if (sMatches.length === 0) {
+ const words = wordsForSegment(pMatch[2], pStart, pEnd);
+ if (words.length > 0) paragraphs.push(words);
+ continue;
+ }
+
+ const words = [];
+ for (let i = 0; i < sMatches.length; i++) {
+ const sAttrs = parseAttributes(sMatches[i][1]);
+ const sStart = pStart + parseXmlTimeMs(sAttrs.t, 0, false);
+ const nextStart = i < sMatches.length - 1
+ ? pStart + parseXmlTimeMs(parseAttributes(sMatches[i + 1][1]).t, pEnd - pStart, false)
+ : pEnd;
+ const sDuration = parseXmlTimeMs(sAttrs.d, Math.max(0, nextStart - sStart), false);
+ words.push(...wordsForSegment(sMatches[i][2], sStart, sStart + sDuration));
+ }
+ if (words.length > 0) paragraphs.push(words);
+ }
+
+ return paragraphs;
+ }
+
+ function parseTranscriptText(xml) {
+ const paragraphs = [];
+ const textPattern = /]*)>([\s\S]*?)<\/text>/gi;
+ let match;
+
+ while ((match = textPattern.exec(xml)) !== null) {
+ const attrs = parseAttributes(match[1]);
+ const startMs = parseXmlTimeMs(attrs.start, 0, true);
+ const durationMs = parseXmlTimeMs(attrs.dur, 0, true);
+ const words = wordsForSegment(match[2], startMs, startMs + durationMs);
+ if (words.length > 0) paragraphs.push(words);
+ }
+
+ return paragraphs;
+ }
+
+ function youtubeTimedTextXmlToHtml(data) {
+ const xml = normalizeLines(data);
+ const paragraphs = parseSrv3Paragraphs(xml);
+ if (paragraphs.length > 0) return paragraphsToHtml(paragraphs);
+ return paragraphsToHtml(parseTranscriptText(xml));
+ }
+
+ function normalizeJsonWords(jsonData) {
+ return ((jsonData && jsonData.words) || [])
+ .filter((word) => word && word.text !== undefined)
+ .map((word) => ({
+ startMs: Math.round(Number(word.start) * 1000),
+ endMs: Math.round(Number(word.end) * 1000),
+ text: String(word.text),
+ }))
+ .filter((word) => Number.isFinite(word.startMs) && Number.isFinite(word.endMs));
+ }
+
+ function groupWordsByParagraph(jsonData) {
+ const words = normalizeJsonWords(jsonData);
+ const paragraphs = (jsonData && jsonData.paragraphs) || [];
+ if (paragraphs.length === 0) return words.length > 0 ? [words] : [];
+
+ return paragraphs.map((paragraph) => {
+ const startMs = Math.round(Number(paragraph.start) * 1000);
+ const endMs = Math.round(Number(paragraph.end) * 1000);
+ return words.filter((word) => word.startMs >= startMs && word.startMs <= endMs);
+ }).filter((group) => group.length > 0);
+ }
+
+ function hyperaudioJsonToYoutubeVtt(jsonData) {
+ const cues = groupWordsByParagraph(jsonData).map((words) => {
+ const start = words[0].startMs;
+ const end = words[words.length - 1].endMs;
+ const text = words
+ .map((word) => `<${formatTimestamp(word.startMs)}>${escapeHtml(word.text)}`)
+ .join(' ');
+ return `${formatTimestamp(start)} --> ${formatTimestamp(end)}\n${text}`;
+ });
+
+ return `WEBVTT\n\n${cues.join('\n\n')}\n`;
+ }
+
+ function hyperaudioJsonToYoutubeTimedTextXml(jsonData) {
+ const paragraphs = groupWordsByParagraph(jsonData).map((words) => {
+ const pStart = words[0].startMs;
+ const pEnd = words[words.length - 1].endMs;
+ const spans = words.map((word) => {
+ const relativeStart = Math.max(0, word.startMs - pStart);
+ const duration = Math.max(0, word.endMs - word.startMs);
+ return `${escapeHtml(word.text)}`;
+ }).join('');
+ return `${spans}
`;
+ });
+
+ return `\n\n${paragraphs.join('\n')}\n\n\n`;
+ }
+
+ const api = {
+ hyperaudioJsonToYoutubeTimedTextXml,
+ hyperaudioJsonToYoutubeVtt,
+ youtubeTimedTextXmlToHtml,
+ youtubeVttToHtml,
+ };
+
+ if (typeof module !== 'undefined' && module.exports) {
+ module.exports = api;
+ }
+
+ Object.assign(root, api);
+})(typeof window !== 'undefined' ? window : globalThis);
diff --git a/test/youtube-caption-converter.test.js b/test/youtube-caption-converter.test.js
new file mode 100644
index 0000000..67635c1
--- /dev/null
+++ b/test/youtube-caption-converter.test.js
@@ -0,0 +1,82 @@
+const assert = require('assert');
+
+const {
+ hyperaudioJsonToYoutubeTimedTextXml,
+ hyperaudioJsonToYoutubeVtt,
+ youtubeTimedTextXmlToHtml,
+ youtubeVttToHtml,
+} = require('../js/youtube-caption-converter.js');
+
+function spanTuples(html) {
+ return Array.from(html.matchAll(/([^<]*)<\/span>/g))
+ .map((match) => ({
+ startMs: Number(match[1]),
+ durationMs: Number(match[2]),
+ text: match[3].trim(),
+ }));
+}
+
+{
+ const html = youtubeVttToHtml(`WEBVTT
+
+00:00:00.000 --> 00:00:03.000 align:start position:0%
+<00:00:00.500>Hello <00:00:01.250>world.
+`);
+
+ assert.deepStrictEqual(spanTuples(html), [
+ { startMs: 500, durationMs: 750, text: 'Hello' },
+ { startMs: 1250, durationMs: 1750, text: 'world.' },
+ ]);
+}
+
+{
+ const html = youtubeTimedTextXmlToHtml(`
+
+
+
+ Hello
+ world
+
+
+
+`);
+
+ assert.deepStrictEqual(spanTuples(html), [
+ { startMs: 1000, durationMs: 400, text: 'Hello' },
+ { startMs: 1600, durationMs: 500, text: 'world' },
+ ]);
+}
+
+{
+ const html = youtubeTimedTextXmlToHtml(`
+
+ Hello & world
+
+`);
+
+ assert.deepStrictEqual(spanTuples(html), [
+ { startMs: 500, durationMs: 500, text: 'Hello' },
+ { startMs: 1000, durationMs: 500, text: '&' },
+ { startMs: 1500, durationMs: 500, text: 'world' },
+ ]);
+}
+
+{
+ const json = {
+ words: [
+ { start: 0.5, end: 1.25, text: 'Hello' },
+ { start: 1.25, end: 3, text: 'world.' },
+ ],
+ paragraphs: [{ start: 0.5, end: 3 }],
+ };
+
+ const vtt = hyperaudioJsonToYoutubeVtt(json);
+ assert.match(vtt, /WEBVTT/);
+ assert.match(vtt, /00:00:00\.500 --> 00:00:03\.000/);
+ assert.match(vtt, /<00:00:00\.500>Hello <00:00:01\.250>world\./);
+
+ const xml = hyperaudioJsonToYoutubeTimedTextXml(json);
+ assert.match(xml, //);
+ assert.match(xml, //);
+ assert.match(xml, /Hello<\/s>world\.<\/s>/);
+}