Skip to content

Commit

Permalink
ADD kaltura migration script to cli tools
Browse files Browse the repository at this point in the history
We're switching from on-prem hosted Kaltura to cloud Kaltura. When
videos are transferred to the new Kaltura instance, a new entry ID is
generated. We will be given a CSV mapping of old to new entry IDs, and
will have to update our Kaltura data accordingly.

The CSV is expected to be in the following format, where the first row
is a header and can be ignored:

old entry id, new entry id
0_someenid,0_newentid
0_someenid,0_newentid
0_someenid,0_newentid

Assuming that collision between old and new entry IDs are impossible.

Requires that these Kaltura env vars are set to the new Kaltura
environment:

* KALTURA_SERVICE_URL
* KALTURA_PARTNER_ID
* KALTURA_SECRET
* KALTURA_USER_ID
* KALTURA_PLAYER_ID

Usage options, run in app root:

    python manage.py kaltura migrate /path/to/mappingCsv.csv

-d Do a dry run, without actually making any changes to the database:

    python manage.py kaltura migrate -d /path/to/mappingCsv.csv

-n If present, tells the CSV parser not to skip the first row. By
default, we assume the first row is a header row and skip it:

    python manage.py kaltura migrate -n /path/to/mappingCsv.csv

This will migrate all the normal Kaltura integration uploaded videos.
There's a handful of special cases where people inserted Kaltura video
links into answers that this does NOT deal with. We're not sure how to
deal with that on the Kaltura side yet, as those videos were uploaded to
a different section that wasn't meant for ComPAIR.
  • Loading branch information
ionparticle committed Feb 14, 2023
1 parent 0e4b738 commit a82c1b3
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 0 deletions.
161 changes: 161 additions & 0 deletions compair/manage/kaltura.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
"""
Migrate Kaltura media to new Kaltura instance. We're switching from on-prem
hosted Kaltura to cloud Kaltura. When videos are transferred to the new Kaltura
instance, a new entry ID is generated. We will be given a CSV mapping of old
to new entry IDs, and will have to update our Kaltura data accordingly.
Assuming that collision between old and new entry IDs are impossible.
Requires that these Kaltura env vars are set to the new Kaltura environment:
* KALTURA_SERVICE_URL
* KALTURA_PARTNER_ID
* KALTURA_SECRET
* KALTURA_USER_ID
* KALTURA_PLAYER_ID
Usage options, run in app root:
python manage.py kaltura migrate /path/to/mappingCsv.csv
-d Do a dry run, without actually making any changes to the database:
python manage.py kaltura migrate -d /path/to/mappingCsv.csv
-n If present, tells the CSV parser not to skip the first row. By default, we
assume the first row is a header row and skip it:
python manage.py kaltura migrate -n /path/to/mappingCsv.csv
"""

import csv
from datetime import datetime
import re

from KalturaClient import KalturaClient, KalturaConfiguration
from KalturaClient.Plugins.Core import (KalturaSessionType, KalturaMediaEntry,
KalturaMediaType)
from flask_script import Manager

from compair.core import db
from compair.kaltura.core import KalturaCore
from compair.models import Answer, KalturaMedia, File
from flask import current_app

manager = Manager(usage="Kaltura Migration")

def readMappingCsv(mappingCsv, noHeader):
oldToNewEntryIds = {}
idRe = re.compile(r"\d_\w{8}")
with open(mappingCsv, 'r') as csvFile:
csvReader = csv.reader(csvFile, skipinitialspace=True)
for row in csvReader:
if not noHeader and csvReader.line_num == 1:
continue
oldEntryId = row[0]
newEntryId = row[1]
if not (re.match(idRe, oldEntryId) and re.match(idRe, newEntryId)):
raise ValueError(f"Mapping file line {csvReader.line_num} has a value not in entry ID format.")
oldToNewEntryIds[oldEntryId] = newEntryId
if oldToNewEntryIds:
return oldToNewEntryIds
raise ValueError("Mapping file is empty")


def msg(msg, logfile):
print(msg)
logfile.write(f'{msg}\n')
logfile.flush()


def summarize(numToMigrate, numInvalid, numMigrated, numNoMapping, numTotal,
logfile):
msg( '-------- Summary --------', logfile)
msg(f' To be Migrated: {numToMigrate}', logfile)
msg(f' To be Deleted: {numInvalid}', logfile)
msg(f'Already Migrated: {numMigrated}', logfile)
msg(f' No Mapping: {numNoMapping}', logfile)
msg(f' Total: {numTotal}', logfile)
msg( '-------- ------- --------', logfile)


def deleteInvalidKalturaMedias(medias, logfile):
for media in medias:
msg(f'Deleting invalid kaltura media id {media.id}', logfile)
db.session.delete(media)


def migrateKalturaMedias(medias, oldToNewEntryIds, logfile):
# connect to the Kaltura API
kClient = KalturaClient(KalturaConfiguration(
serviceUrl=KalturaCore.service_url()))
kSession = kClient.session.start(
KalturaCore.secret(),
KalturaCore.user_id(),
KalturaSessionType.ADMIN,
KalturaCore.partner_id(),
86400, # session expires in 1 hour
"appID:compair"
)
kClient.setKs(kSession)

for media in medias:
mediaId = media.id
oldEntryId = media.entry_id
newEntryId = oldToNewEntryIds[oldEntryId]
msg(f'Processing id {mediaId}: Old {oldEntryId} to New {newEntryId}',
logfile)
newInfo = kClient.media.get(newEntryId, -1)
media.download_url = newInfo.getDownloadUrl()
media.partner_id = newInfo.getPartnerId()
media.service_url = KalturaCore.service_url()
media.player_id = KalturaCore.player_id()
media.entry_id = newEntryId
#db.session.add(media)


@manager.command
def migrate(mappingCsv, noHeader=False, dryRun=False):
ts = datetime.now().isoformat(timespec='seconds')
logfile = open(f'kaltura-migration-log-{ts}.log', 'a')
msg('Starting Kaltura migration', logfile)
oldToNewEntryIds = readMappingCsv(mappingCsv, noHeader)
newToOldEntryIds = dict(map(reversed, oldToNewEntryIds.items()))
invalidKalturaMedias = [] # can't be migrated, might as well delete
needMigrationMedias = [] # needs to be migrated
numAlreadyMigrated = 0
numNoMapping = 0
numTotal = 0
kalturaMedias = KalturaMedia.query.all()
# find out how much work needs to be done
for kalturaMedia in kalturaMedias:
numTotal += 1
mediaId = kalturaMedia.id
entryId = kalturaMedia.entry_id
if not entryId:
msg(f'Empty entry ID for id {mediaId}', logfile)
invalidKalturaMedias.append(kalturaMedia)
elif entryId in oldToNewEntryIds:
msg(f"Migration needed for id {mediaId}: Entry {entryId}", logfile)
needMigrationMedias.append(kalturaMedia)
elif entryId in newToOldEntryIds:
msg(f"Already migrated id {mediaId}: Entry {entryId}", logfile)
numAlreadyMigrated += 1
else:
# didn't find a mapping, perhaps missing from migration?
msg(f'No mapping for id {mediaId}: Entry {entryId}', logfile)
numNoMapping += 1
# summarize what needs to be done
summarize(len(needMigrationMedias), len(invalidKalturaMedias),
numAlreadyMigrated, numNoMapping, numTotal, logfile)
# do the actual work in a transaction
if dryRun:
msg(f'*** Dry run completed, no changes were made ***', logfile)
else:
msg(f'Starting database session', logfile)
deleteInvalidKalturaMedias(invalidKalturaMedias, logfile)
migrateKalturaMedias(needMigrationMedias, oldToNewEntryIds, logfile)
msg(f'Committing to database', logfile)
db.session.commit()
logfile.close()
2 changes: 2 additions & 0 deletions manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from compair.manage.score import manager as score_generator
from compair.manage.user import manager as user_manager
from compair.manage.utils import manager as util_manager
from compair.manage.kaltura import manager as kaltura_manager
from compair import create_app

manager = Manager(create_app(skip_assets=True))
Expand All @@ -20,6 +21,7 @@
manager.add_command("runserver", Server(port=8080))
manager.add_command("user", user_manager)
manager.add_command("util", util_manager)
manager.add_command("kaltura", kaltura_manager)


@manager.command
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,5 @@ markupsafe==2.0.1
# json api was deprecated in latest version, so need older version, can be
# removed once upgraded to flask 2
itsdangerous==2.0.1
# for the kaltura migration script
kalturaapiclient==19.1.0

0 comments on commit a82c1b3

Please sign in to comment.