-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a few tools to process recordings
- Loading branch information
Showing
6 changed files
with
418 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ | |
"irc": "^0.5.2", | ||
"puppeteer": "^20.5.0", | ||
"seedrandom": "^3.0.5", | ||
"webvtt-parser": "^2.2.0", | ||
"yaml": "^2.3.1" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
/** | ||
* This tool is only useful once recordings of breakout sessions have been | ||
* uploaded to Cloudflare. It create HTML recording pages for each of these | ||
* recordings that contain the video and an HTML rendition of the captions as | ||
* a transcript. | ||
* | ||
* To run the tool: | ||
* | ||
* node tools/create-recording-pages.mjs | ||
* | ||
* Pre-requisites: | ||
* 1. Recordings must have been uploaded to Cloudflare with a name that starts | ||
* with a well-known prefix. | ||
* 2. The well-known prefix must appear in a RECORDING_PREFIX env variable. | ||
* 3. Cloudflare account info must appear in CLOUDFLARE_ACCOUNT and | ||
* CLOUDFLARE_TOKEN env variables. | ||
* 4. The RECORDING_FOLDER env variable must target the local folder to use to | ||
* save recordings pages | ||
* 5. The RECORDING_FOLDER folder must contain a "recording-template.html" page | ||
* that contains the template to use for each recording page, see for example: | ||
* https://www.w3.org/2023/09/breakouts/recording-template.html | ||
* | ||
* The tool assumes that the recordings are named prefix-xx.mp4, where xx is | ||
* the breakout session number. It creates "recording-xx.html" pages in the | ||
* recording folder. | ||
*/ | ||
|
||
import path from 'path'; | ||
import fs from 'fs/promises'; | ||
import { convert } from './lib/webvtt2html.mjs'; | ||
import { getEnvKey } from './lib/envkeys.mjs'; | ||
import { fetchProject } from './lib/project.mjs'; | ||
import { validateSession } from './lib/validate.mjs'; | ||
import { todoStrings } from './lib/todostrings.mjs'; | ||
|
||
async function listRecordings(accountId, authToken, recordingPrefix) { | ||
const response = await fetch( | ||
`https://api.cloudflare.com/client/v4/accounts/${accountId}/stream?search=${recordingPrefix}`, | ||
{ | ||
headers: { | ||
'Authorization': `Bearer ${authToken}` | ||
} | ||
} | ||
); | ||
const json = await response.json(); | ||
const recordings = json.result | ||
.map(v => Object.assign({ | ||
sessionId: v.meta.name.match(/-(\d+)\.mp4$/)[1], | ||
name: v.meta.name, | ||
title: v.meta.name, | ||
videoId: v.uid, | ||
preview: v.preview, | ||
embedUrl: v.preview.replace(/watch$/, 'iframe'), | ||
captions: v.preview.replace(/watch$/, 'captions/en') | ||
})) | ||
.sort((v1, v2) => v1.name.localeCompare(v2.name)); | ||
return recordings; | ||
} | ||
|
||
async function createRecordingPage(recording, recordingFolder) { | ||
let template = await fs.readFile(path.join(recordingFolder, 'recording-template.html'), 'utf8'); | ||
|
||
recording.transcript = await convert(recording.captions, { clean: true }); | ||
|
||
// Replace content that needs to be serialized as JSON | ||
for (const property of Object.keys(recording)) { | ||
const regexp = new RegExp(`\{\{\{\{${property}\}\}\}\}`, 'g'); | ||
template = template.replace(regexp, JSON.stringify(recording[property], null, 2)); | ||
} | ||
|
||
// Replace content that needs to be escaped for use in HTML attributes | ||
for (const property of Object.keys(recording)) { | ||
const regexp = new RegExp(`\{\{\{${property}\}\}\}`, 'g'); | ||
template = template.replace(regexp, | ||
('' + recording[property] || '') | ||
.replace(/&/g, '&') | ||
.replace(/</g, '<') | ||
.replace(/>/g, '>') | ||
.replace(/"/g, '"') | ||
.replace(/'/g, ''')); | ||
} | ||
|
||
// Replace raw text content | ||
for (const property of Object.keys(recording)) { | ||
const regexp = new RegExp(`\{\{${property}\}\}`, 'g'); | ||
template = template.replace(regexp, recording[property]); | ||
} | ||
|
||
// Write resulting recording page | ||
await fs.writeFile(path.join(recordingFolder, `recording-${recording.sessionId}.html`), template, 'utf8'); | ||
} | ||
|
||
async function main() { | ||
// First, retrieve known information about the project | ||
const PROJECT_OWNER = await getEnvKey('PROJECT_OWNER'); | ||
const PROJECT_NUMBER = await getEnvKey('PROJECT_NUMBER'); | ||
const CHAIR_W3CID = await getEnvKey('CHAIR_W3CID', {}, true); | ||
console.log(); | ||
console.log(`Retrieve project ${PROJECT_OWNER}/${PROJECT_NUMBER}...`); | ||
const project = await fetchProject(PROJECT_OWNER, PROJECT_NUMBER); | ||
if (!project) { | ||
throw new Error(`Project ${PROJECT_OWNER}/${PROJECT_NUMBER} could not be retrieved`); | ||
} | ||
project.chairsToW3CID = CHAIR_W3CID; | ||
console.log(`- ${project.sessions.length} sessions`); | ||
console.log(`- ${project.rooms.length} rooms`); | ||
console.log(`- ${project.slots.length} slots`); | ||
console.log(`Retrieve project ${PROJECT_OWNER}/${PROJECT_NUMBER}... done`); | ||
|
||
console.log(); | ||
console.log('List recordings...'); | ||
const CLOUDFLARE_ACCOUNT = await getEnvKey('CLOUDFLARE_ACCOUNT'); | ||
const CLOUDFLARE_TOKEN = await getEnvKey('CLOUDFLARE_TOKEN'); | ||
const RECORDING_PREFIX = await getEnvKey('RECORDING_PREFIX'); | ||
const RECORDING_FOLDER = await getEnvKey('RECORDING_FOLDER');; | ||
const recordings = await listRecordings(CLOUDFLARE_ACCOUNT, CLOUDFLARE_TOKEN, RECORDING_PREFIX); | ||
console.log(`- found ${recordings.length} recordings`); | ||
console.log('List recordings... done'); | ||
|
||
console.log(); | ||
console.log('Create recording pages...'); | ||
for (const recording of recordings) { | ||
const session = project.sessions.find(s => s.number === parseInt(recording.sessionId, 10)); | ||
console.log(`- create page for ${recording.sessionId} - ${session.title}`); | ||
await validateSession(session.number, project); | ||
const desc = session.description; | ||
recording.title = session.title; | ||
recording.githubIssue = `https://github.com/${session.repository}/issues/${session.number}`; | ||
const links = [ | ||
{ | ||
title: 'Session proposal on GitHub', | ||
url: recording.githubIssue | ||
} | ||
]; | ||
if (desc.materials.slides && !todoStrings.includes(desc.materials.slides.toUpperCase())) { | ||
links.push({ | ||
title: 'Slides', | ||
url: desc.materials.slides | ||
}); | ||
} | ||
if (desc.materials.minutes && !todoStrings.includes(desc.materials.minutes.toUpperCase())) { | ||
links.push({ | ||
title: 'Session minutes', | ||
url: desc.materials.minutes | ||
}); | ||
} | ||
recording.links = links | ||
.map(l => `<li><a href="${l.url}">${l.title}</a></li>`) | ||
.join('\n'); | ||
await createRecordingPage(recording, RECORDING_FOLDER); | ||
} | ||
console.log('Create recording pages... done'); | ||
} | ||
|
||
main().then(_ => process.exit(0)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
import webvttParser from 'webvtt-parser'; | ||
|
||
const parser = new webvttParser.WebVTTParser(); | ||
|
||
export async function convert(vttUrl, options) { | ||
options = options || {}; | ||
|
||
function cleanSentence(sentence) { | ||
if (options.clean) { | ||
sentence = sentence.replace(/^slide [a-z0-9]*\.?/i, ''); | ||
sentence = sentence.replace(/^next slide\.?/i, ''); | ||
sentence = sentence.replace(/^next page\.?/i, ''); | ||
sentence = sentence.replace(/^moving to next slide\.?/i, ''); | ||
sentence = sentence.replace(/^moving to next page\.?/i, ''); | ||
sentence = sentence.replace(/, you know, ?/g, ' '); | ||
} | ||
return sentence; | ||
} | ||
|
||
const response = await fetch(vttUrl); | ||
const vtt = await response.text(); | ||
|
||
let cues; | ||
try { | ||
({cues} = parser.parse(vtt)); | ||
} catch (e) { | ||
console.error(`Could not parse ${vttUrl} as WebVTT: ` + e); | ||
process.exit(1); | ||
} | ||
|
||
cues.forEach(c => c.text = c.text | ||
.replace(/<v [^>]*>/, '') | ||
.replace(/<\/v>/, '') | ||
.replace('"','')); | ||
if (options.clean) { | ||
cues.forEach(c => c.text = c.text.replace(/^slide [0-9]+$/i, '')); | ||
} | ||
|
||
const divs = [{ | ||
slide: "1", | ||
paragraphs: [] | ||
}]; | ||
let p = ''; | ||
cues.forEach(c => { | ||
if (c.id.startsWith("slide-")) { | ||
if (cleanSentence(p)) { | ||
divs[divs.length-1].paragraphs.push(cleanSentence(p)); | ||
} | ||
divs.push({ | ||
slide: c.id.substring("slide-".length), | ||
paragraphs: [] | ||
}); | ||
p = ''; | ||
} else if (c.id.endsWith("-p")) { | ||
if (cleanSentence(p)) { | ||
divs[divs.length-1].paragraphs.push(cleanSentence(p)); | ||
p = c.text; | ||
} | ||
p = ''; | ||
} else if (c.text.match(/:/)) { | ||
if (cleanSentence(p)) { | ||
divs[divs.length-1].paragraphs.push(cleanSentence(p)); | ||
p = c.text; | ||
} | ||
p = ''; | ||
} | ||
p += (p ? ' ' : '') + c.text; | ||
}); | ||
|
||
// Output final sentence | ||
if (cleanSentence(p)) { | ||
divs[divs.length-1].paragraphs.push(cleanSentence(p)); | ||
} | ||
|
||
let content = ''; | ||
let pid = 1; | ||
if (options.splitPerSlide) { | ||
for (let i = 0 ; i < divs.length; i++) { | ||
if (options.slideset) { | ||
content += `<div id="ts-${divs[i].slide}">`; | ||
content += `<i-slide src="${options.slideset}#${divs[i].slide}" class="slide">Slide ${divs[i].slide} of ${divs.length}</i-slide>\n`; | ||
} | ||
content += (options.markupStart || `<div>`) + "\n"; | ||
|
||
for (const p of divs[i].paragraphs) { | ||
const match = p.match(/^(.*):\s*(.*)$/); | ||
if (match) { | ||
content += ` <p id="tp-${pid}"><cite>${match[1]}:</cite> ${match[2]}</p>\n`; | ||
} | ||
else { | ||
content += ` <p id="tp-${pid}">${p}</p>\n`; | ||
} | ||
pid += 1; | ||
} | ||
content += (options.markupEnd || '</div>') + "\n\n"; | ||
if (options.slideset) { | ||
content += `</div>`; | ||
} | ||
} | ||
} else { | ||
let last = ''; | ||
content += '<p>'; | ||
for (const p of divs.map(d => d.paragraphs).flat().flat()) { | ||
const match = p.match(/^(.*):\s*(.*)$/); | ||
if (match) { | ||
if (last && match[1] === last) { | ||
content += `<br/>\n … ${match[2]}`; | ||
} | ||
else { | ||
content += `</p>\n <p><cite>${match[1]}:</cite> ${match[2]}`; | ||
} | ||
last = match[1]; | ||
} | ||
else { | ||
content += `</p>\n ${p}`; | ||
} | ||
} | ||
} | ||
|
||
return content; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/** | ||
* This tool is only useful once there are Zoom recordings of the breakout | ||
* sessions available. It pulls and renames the recordings from local storage. | ||
* | ||
* To run the tool: | ||
* | ||
* node tools/rename-recordings.mjs | ||
* | ||
* Pre-requisites: | ||
* 1. Zoom recordings must have been downloaed to a local folder, with one | ||
* subfolder per recording. The subfolder name must start with the session | ||
* number followed by a "-", e.g., "10-ecija" (the rest does not matter). | ||
* 2. The local folder must appear in a RECORDING_FOLDER_RAW env variable. | ||
* 3. The prefix to use to rename the recordings must be in a RECORDING_PREFIX | ||
* env variable. | ||
* | ||
* The tool assumes that the video file to use each time has a name that ends | ||
* with "_Recording_wwwwxhhhh.mp4". | ||
* | ||
* The tool also extracts the captions file, provided that its name ends with | ||
* "_Recording.transcript.vtt". | ||
* | ||
* Renamed recordings and captions file are saved at the root of the | ||
* RECORDING_FOLDER_RAW folder. | ||
*/ | ||
|
||
import path from 'path'; | ||
import fs from 'fs/promises'; | ||
import { getEnvKey } from './lib/envkeys.mjs'; | ||
|
||
async function main() { | ||
const RECORDING_FOLDER_RAW = await getEnvKey('RECORDING_FOLDER_RAW'); | ||
const RECORDING_PREFIX = await getEnvKey('RECORDING_PREFIX'); | ||
const folders = await fs.readdir(RECORDING_FOLDER_RAW); | ||
for (const folder of folders) { | ||
if (folder.includes('.')) { | ||
continue; | ||
} | ||
let files = await fs.readdir(path.join(rootFolder, folder)); | ||
const prefix = `${RECORDING_PREFIX}-${folder.split('-')[0]}`; | ||
|
||
const recording = files.find(f => f.match(/_Recording_\d{3,4}x\d{3,4}\.mp4$/)); | ||
if (recording) { | ||
await fs.copyFile( | ||
path.join(rootFolder, folder, recording), | ||
path.join(rootFolder, prefix + '.mp4')); | ||
} | ||
|
||
const subtitles = files.find(f => f.match(/_Recording\.transcript\.vtt$/)); | ||
if (subtitles) { | ||
await fs.copyFile( | ||
path.join(rootFolder, folder, subtitles), | ||
path.join(rootFolder, prefix + '.vtt')); | ||
} | ||
} | ||
} | ||
|
||
main().then(_ => process.exit(0)); |
Oops, something went wrong.