Skip to content

Commit

Permalink
Merge pull request #126 from edx/mrehan/refactor-val-api-utils
Browse files Browse the repository at this point in the history
Refactor/add transcript api utils.
  • Loading branch information
Qubad786 authored Feb 16, 2018
2 parents 7dc65bc + bba6213 commit 776b243
Show file tree
Hide file tree
Showing 5 changed files with 224 additions and 53 deletions.
96 changes: 65 additions & 31 deletions edxval/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""
import logging
from enum import Enum
from uuid import uuid4

from django.core.exceptions import ObjectDoesNotExist, ValidationError
from lxml import etree
Expand Down Expand Up @@ -39,6 +40,13 @@ class SortDirection(Enum):
desc = "desc"


def generate_video_id():
"""
Generates a video ID.
"""
return unicode(uuid4())


def create_video(video_data):
"""
Called on to create Video objects in the database
Expand Down Expand Up @@ -78,6 +86,23 @@ def create_video(video_data):
raise ValCannotCreateError(serializer.errors)


def create_external_video(display_name):
"""
Create an external video.
Arguments:
display_name(unicode): Client title for the external video
"""
return create_video({
'edx_video_id': generate_video_id(),
'status': 'external',
'client_video_id': display_name,
'duration': 0,
'encoded_videos': [],
'courses': []
})


def update_video(video_data):
"""
Called on to update Video objects in the database
Expand Down Expand Up @@ -213,30 +238,21 @@ def get_video_transcript(video_id, language_code):
return TranscriptSerializer(transcript).data if transcript else None


def get_video_transcript_data(video_ids, language_code):
def get_video_transcript_data(video_id, language_code):
"""
Get video transcript data
Arguments:
video_ids(list): list containing edx_video_id and external video ids extracted from
external sources from a video component.
video_id(unicode): An id identifying the Video.
language_code(unicode): it will be the language code of the requested transcript.
Returns:
A dict containing transcript file name and its content. It will be for a video whose transcript
found first while iterating the video ids.
A dict containing transcript file name and its content.
"""
transcript_data = None
for video_id in video_ids:
video_transcript = VideoTranscript.get_or_none(video_id, language_code)
if video_transcript:
try:
video_transcript = VideoTranscript.objects.get(video__edx_video_id=video_id, language_code=language_code)
transcript_data = dict(
file_name=video_transcript.filename,
content=video_transcript.transcript.file.read()
)
break
except VideoTranscript.DoesNotExist:
continue
return dict(file_name=video_transcript.filename, content=video_transcript.transcript.file.read())
except Exception:
logger.exception(
'[edx-val] Error while retrieving transcript for video=%s -- language_code=%s',
Expand All @@ -245,26 +261,23 @@ def get_video_transcript_data(video_ids, language_code):
)
raise

return transcript_data


def get_available_transcript_languages(video_ids):
def get_available_transcript_languages(video_id):
"""
Get available transcript languages
Arguments:
video_ids(list): list containing edx_video_id and external video ids extracted from
external sources of a video component.
video_id(unicode): An id identifying the Video.
Returns:
A list containing unique transcript language codes for the video ids.
A list containing transcript language codes for the Video.
"""
available_languages = VideoTranscript.objects.filter(
video__edx_video_id__in=video_ids
video__edx_video_id=video_id
).values_list(
'language_code', flat=True
)
return list(set(available_languages))
return list(available_languages)


def get_video_transcript_url(video_id, language_code):
Expand All @@ -280,6 +293,28 @@ def get_video_transcript_url(video_id, language_code):
return video_transcript.url()


def create_video_transcript(video_id, language_code, file_format, content, provider=TranscriptProviderType.CUSTOM):
"""
Create a video transcript.
Arguments:
video_id(unicode): An Id identifying the Video data model object.
language_code(unicode): A language code.
file_format(unicode): Transcript file format.
content(InMemoryUploadedFile): Transcript content.
provider(unicode): Transcript provider (it will be 'custom' by default if not selected).
"""
transcript_serializer = TranscriptSerializer(
data=dict(provider=provider, language_code=language_code, file_format=file_format),
context=dict(video_id=video_id),
)
if transcript_serializer.is_valid():
transcript_serializer.save(content=content)
return transcript_serializer.data
else:
raise ValCannotCreateError(transcript_serializer.errors)


def create_or_update_video_transcript(video_id, language_code, metadata, file_data=None):
"""
Create or Update video transcript for an existing video.
Expand Down Expand Up @@ -323,17 +358,16 @@ def delete_video_transcript(video_id, language_code):
Delete transcript for an existing video.
Arguments:
video_id: id of the video with which transcript is associated
language_code: language code of a video transcript
video_id: id identifying the video to which the transcript is associated.
language_code: language code of a video transcript.
"""
try:
video_transcript = VideoTranscript.objects.get(video__edx_video_id=video_id, language_code=language_code)
# delete the actual transcript file from storage
video_transcript = VideoTranscript.get_or_none(video_id, language_code)
if video_transcript:
# delete the transcript content from storage.
video_transcript.transcript.delete()
# delete the record from db
# delete the transcript metadata from db.
video_transcript.delete()
except VideoTranscript.DoesNotExist:
pass
logger.info('Transcript is removed for video "%s" and language code "%s"', video_id, language_code)


def get_3rd_party_transcription_plans():
Expand Down
46 changes: 45 additions & 1 deletion edxval/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,18 @@ def get_absolute_url(self):
def __str__(self):
return self.edx_video_id

@classmethod
def get_or_none(cls, **filter_kwargs):
"""
Returns a video or None.
"""
try:
video = cls.objects.get(**filter_kwargs)
except cls.DoesNotExist:
video = None

return video

@classmethod
def by_youtube_id(cls, youtube_id):
"""
Expand Down Expand Up @@ -447,6 +459,34 @@ def get_or_none(cls, video_id, language_code):

return transcript

@classmethod
def create(cls, video, language_code, file_format, content, provider):
"""
Create a Video Transcript.
Arguments:
video(Video): Video data model object
language_code(unicode): A language code.
file_format(unicode): Transcript file format.
content(InMemoryUploadedFile): Transcript content.
provider(unicode): Transcript provider.
"""
video_transcript = cls(video=video, language_code=language_code, file_format=file_format, provider=provider)
with closing(content) as transcript_content:
try:
file_name = '{uuid}.{ext}'.format(uuid=uuid4().hex, ext=video_transcript.file_format)
video_transcript.transcript.save(file_name, transcript_content)
video_transcript.save()
except Exception:
logger.exception(
'[VAL] Transcript save failed to storage for video_id "%s" language code "%s"',
video.edx_video_id,
language_code
)
raise

return video_transcript

@classmethod
def create_or_update(cls, video, language_code, metadata, file_data=None):
"""
Expand Down Expand Up @@ -481,7 +521,11 @@ def create_or_update(cls, video, language_code, metadata, file_data=None):
try:
video_transcript.transcript.save(file_name, transcript_file_data)
except Exception:
logger.exception('VAL: Transcript save failed to storage for video_id [%s]', video.edx_video_id)
logger.exception(
'[VAL] Transcript save failed to storage for video_id "%s" language code "%s"',
video.edx_video_id,
language_code
)
raise

video_transcript.save()
Expand Down
18 changes: 18 additions & 0 deletions edxval/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,24 @@ def get_url(self, transcript):
"""
return transcript.url()

def validate(self, data):
"""
Validates the transcript data.
"""
video_id = self.context.get('video_id')
video = Video.get_or_none(edx_video_id=video_id)
if not video:
raise serializers.ValidationError('Video "{video_id}" is not valid.'.format(video_id=video_id))

data.update(video=video)
return data

def create(self, validated_data):
"""
Create the video transcript.
"""
return VideoTranscript.create(**validated_data)


class CourseSerializer(serializers.RelatedField):
"""
Expand Down
Loading

0 comments on commit 776b243

Please sign in to comment.