Skip to content

Commit

Permalink
Add a few tools to process recordings
Browse files Browse the repository at this point in the history
  • Loading branch information
tidoust committed Sep 20, 2023
1 parent 5b6ef85 commit a8cf527
Show file tree
Hide file tree
Showing 6 changed files with 418 additions and 0 deletions.
7 changes: 7 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"irc": "^0.5.2",
"puppeteer": "^20.5.0",
"seedrandom": "^3.0.5",
"webvtt-parser": "^2.2.0",
"yaml": "^2.3.1"
}
}
155 changes: 155 additions & 0 deletions tools/create-recording-pages.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/**
* This tool is only useful once recordings of breakout sessions have been
* uploaded to Cloudflare. It create HTML recording pages for each of these
* recordings that contain the video and an HTML rendition of the captions as
* a transcript.
*
* To run the tool:
*
* node tools/create-recording-pages.mjs
*
* Pre-requisites:
* 1. Recordings must have been uploaded to Cloudflare with a name that starts
* with a well-known prefix.
* 2. The well-known prefix must appear in a RECORDING_PREFIX env variable.
* 3. Cloudflare account info must appear in CLOUDFLARE_ACCOUNT and
* CLOUDFLARE_TOKEN env variables.
* 4. The RECORDING_FOLDER env variable must target the local folder to use to
* save recordings pages
* 5. The RECORDING_FOLDER folder must contain a "recording-template.html" page
* that contains the template to use for each recording page, see for example:
* https://www.w3.org/2023/09/breakouts/recording-template.html
*
* The tool assumes that the recordings are named prefix-xx.mp4, where xx is
* the breakout session number. It creates "recording-xx.html" pages in the
* recording folder.
*/

import path from 'path';
import fs from 'fs/promises';
import { convert } from './lib/webvtt2html.mjs';
import { getEnvKey } from './lib/envkeys.mjs';
import { fetchProject } from './lib/project.mjs';
import { validateSession } from './lib/validate.mjs';
import { todoStrings } from './lib/todostrings.mjs';

async function listRecordings(accountId, authToken, recordingPrefix) {
const response = await fetch(
`https://api.cloudflare.com/client/v4/accounts/${accountId}/stream?search=${recordingPrefix}`,
{
headers: {
'Authorization': `Bearer ${authToken}`
}
}
);
const json = await response.json();
const recordings = json.result
.map(v => Object.assign({
sessionId: v.meta.name.match(/-(\d+)\.mp4$/)[1],
name: v.meta.name,
title: v.meta.name,
videoId: v.uid,
preview: v.preview,
embedUrl: v.preview.replace(/watch$/, 'iframe'),
captions: v.preview.replace(/watch$/, 'captions/en')
}))
.sort((v1, v2) => v1.name.localeCompare(v2.name));
return recordings;
}

async function createRecordingPage(recording, recordingFolder) {
let template = await fs.readFile(path.join(recordingFolder, 'recording-template.html'), 'utf8');

recording.transcript = await convert(recording.captions, { clean: true });

// Replace content that needs to be serialized as JSON
for (const property of Object.keys(recording)) {
const regexp = new RegExp(`\{\{\{\{${property}\}\}\}\}`, 'g');
template = template.replace(regexp, JSON.stringify(recording[property], null, 2));
}

// Replace content that needs to be escaped for use in HTML attributes
for (const property of Object.keys(recording)) {
const regexp = new RegExp(`\{\{\{${property}\}\}\}`, 'g');
template = template.replace(regexp,
('' + recording[property] || '')
.replace(/&/g, '&')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&apos;'));
}

// Replace raw text content
for (const property of Object.keys(recording)) {
const regexp = new RegExp(`\{\{${property}\}\}`, 'g');
template = template.replace(regexp, recording[property]);
}

// Write resulting recording page
await fs.writeFile(path.join(recordingFolder, `recording-${recording.sessionId}.html`), template, 'utf8');
}

async function main() {
// First, retrieve known information about the project
const PROJECT_OWNER = await getEnvKey('PROJECT_OWNER');
const PROJECT_NUMBER = await getEnvKey('PROJECT_NUMBER');
const CHAIR_W3CID = await getEnvKey('CHAIR_W3CID', {}, true);
console.log();
console.log(`Retrieve project ${PROJECT_OWNER}/${PROJECT_NUMBER}...`);
const project = await fetchProject(PROJECT_OWNER, PROJECT_NUMBER);
if (!project) {
throw new Error(`Project ${PROJECT_OWNER}/${PROJECT_NUMBER} could not be retrieved`);
}
project.chairsToW3CID = CHAIR_W3CID;
console.log(`- ${project.sessions.length} sessions`);
console.log(`- ${project.rooms.length} rooms`);
console.log(`- ${project.slots.length} slots`);
console.log(`Retrieve project ${PROJECT_OWNER}/${PROJECT_NUMBER}... done`);

console.log();
console.log('List recordings...');
const CLOUDFLARE_ACCOUNT = await getEnvKey('CLOUDFLARE_ACCOUNT');
const CLOUDFLARE_TOKEN = await getEnvKey('CLOUDFLARE_TOKEN');
const RECORDING_PREFIX = await getEnvKey('RECORDING_PREFIX');
const RECORDING_FOLDER = await getEnvKey('RECORDING_FOLDER');;
const recordings = await listRecordings(CLOUDFLARE_ACCOUNT, CLOUDFLARE_TOKEN, RECORDING_PREFIX);
console.log(`- found ${recordings.length} recordings`);
console.log('List recordings... done');

console.log();
console.log('Create recording pages...');
for (const recording of recordings) {
const session = project.sessions.find(s => s.number === parseInt(recording.sessionId, 10));
console.log(`- create page for ${recording.sessionId} - ${session.title}`);
await validateSession(session.number, project);
const desc = session.description;
recording.title = session.title;
recording.githubIssue = `https://github.com/${session.repository}/issues/${session.number}`;
const links = [
{
title: 'Session proposal on GitHub',
url: recording.githubIssue
}
];
if (desc.materials.slides && !todoStrings.includes(desc.materials.slides.toUpperCase())) {
links.push({
title: 'Slides',
url: desc.materials.slides
});
}
if (desc.materials.minutes && !todoStrings.includes(desc.materials.minutes.toUpperCase())) {
links.push({
title: 'Session minutes',
url: desc.materials.minutes
});
}
recording.links = links
.map(l => `<li><a href="${l.url}">${l.title}</a></li>`)
.join('\n');
await createRecordingPage(recording, RECORDING_FOLDER);
}
console.log('Create recording pages... done');
}

main().then(_ => process.exit(0));
121 changes: 121 additions & 0 deletions tools/lib/webvtt2html.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import webvttParser from 'webvtt-parser';

const parser = new webvttParser.WebVTTParser();

export async function convert(vttUrl, options) {
options = options || {};

function cleanSentence(sentence) {
if (options.clean) {
sentence = sentence.replace(/^slide [a-z0-9]*\.?/i, '');
sentence = sentence.replace(/^next slide\.?/i, '');
sentence = sentence.replace(/^next page\.?/i, '');
sentence = sentence.replace(/^moving to next slide\.?/i, '');
sentence = sentence.replace(/^moving to next page\.?/i, '');
sentence = sentence.replace(/, you know, ?/g, ' ');
}
return sentence;
}

const response = await fetch(vttUrl);
const vtt = await response.text();

let cues;
try {
({cues} = parser.parse(vtt));
} catch (e) {
console.error(`Could not parse ${vttUrl} as WebVTT: ` + e);
process.exit(1);
}

cues.forEach(c => c.text = c.text
.replace(/<v [^>]*>/, '')
.replace(/<\/v>/, '')
.replace('"',''));
if (options.clean) {
cues.forEach(c => c.text = c.text.replace(/^slide [0-9]+$/i, ''));
}

const divs = [{
slide: "1",
paragraphs: []
}];
let p = '';
cues.forEach(c => {
if (c.id.startsWith("slide-")) {
if (cleanSentence(p)) {
divs[divs.length-1].paragraphs.push(cleanSentence(p));
}
divs.push({
slide: c.id.substring("slide-".length),
paragraphs: []
});
p = '';
} else if (c.id.endsWith("-p")) {
if (cleanSentence(p)) {
divs[divs.length-1].paragraphs.push(cleanSentence(p));
p = c.text;
}
p = '';
} else if (c.text.match(/:/)) {
if (cleanSentence(p)) {
divs[divs.length-1].paragraphs.push(cleanSentence(p));
p = c.text;
}
p = '';
}
p += (p ? ' ' : '') + c.text;
});

// Output final sentence
if (cleanSentence(p)) {
divs[divs.length-1].paragraphs.push(cleanSentence(p));
}

let content = '';
let pid = 1;
if (options.splitPerSlide) {
for (let i = 0 ; i < divs.length; i++) {
if (options.slideset) {
content += `<div id="ts-${divs[i].slide}">`;
content += `<i-slide src="${options.slideset}#${divs[i].slide}" class="slide">Slide ${divs[i].slide} of ${divs.length}</i-slide>\n`;
}
content += (options.markupStart || `<div>`) + "\n";

for (const p of divs[i].paragraphs) {
const match = p.match(/^(.*):\s*(.*)$/);
if (match) {
content += ` <p id="tp-${pid}"><cite>${match[1]}:</cite> ${match[2]}</p>\n`;
}
else {
content += ` <p id="tp-${pid}">${p}</p>\n`;
}
pid += 1;
}
content += (options.markupEnd || '</div>') + "\n\n";
if (options.slideset) {
content += `</div>`;
}
}
} else {
let last = '';
content += '<p>';
for (const p of divs.map(d => d.paragraphs).flat().flat()) {
const match = p.match(/^(.*):\s*(.*)$/);
if (match) {
if (last && match[1] === last) {
content += `<br/>\n … ${match[2]}`;
}
else {
content += `</p>\n <p><cite>${match[1]}:</cite> ${match[2]}`;
}
last = match[1];
}
else {
content += `</p>\n ${p}`;
}
}
}

return content;
}
58 changes: 58 additions & 0 deletions tools/rename-recordings.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/**
* This tool is only useful once there are Zoom recordings of the breakout
* sessions available. It pulls and renames the recordings from local storage.
*
* To run the tool:
*
* node tools/rename-recordings.mjs
*
* Pre-requisites:
* 1. Zoom recordings must have been downloaed to a local folder, with one
* subfolder per recording. The subfolder name must start with the session
* number followed by a "-", e.g., "10-ecija" (the rest does not matter).
* 2. The local folder must appear in a RECORDING_FOLDER_RAW env variable.
* 3. The prefix to use to rename the recordings must be in a RECORDING_PREFIX
* env variable.
*
* The tool assumes that the video file to use each time has a name that ends
* with "_Recording_wwwwxhhhh.mp4".
*
* The tool also extracts the captions file, provided that its name ends with
* "_Recording.transcript.vtt".
*
* Renamed recordings and captions file are saved at the root of the
* RECORDING_FOLDER_RAW folder.
*/

import path from 'path';
import fs from 'fs/promises';
import { getEnvKey } from './lib/envkeys.mjs';

async function main() {
const RECORDING_FOLDER_RAW = await getEnvKey('RECORDING_FOLDER_RAW');
const RECORDING_PREFIX = await getEnvKey('RECORDING_PREFIX');
const folders = await fs.readdir(RECORDING_FOLDER_RAW);
for (const folder of folders) {
if (folder.includes('.')) {
continue;
}
let files = await fs.readdir(path.join(rootFolder, folder));
const prefix = `${RECORDING_PREFIX}-${folder.split('-')[0]}`;

const recording = files.find(f => f.match(/_Recording_\d{3,4}x\d{3,4}\.mp4$/));
if (recording) {
await fs.copyFile(
path.join(rootFolder, folder, recording),
path.join(rootFolder, prefix + '.mp4'));
}

const subtitles = files.find(f => f.match(/_Recording\.transcript\.vtt$/));
if (subtitles) {
await fs.copyFile(
path.join(rootFolder, folder, subtitles),
path.join(rootFolder, prefix + '.vtt'));
}
}
}

main().then(_ => process.exit(0));
Loading

0 comments on commit a8cf527

Please sign in to comment.