diff --git a/edxval/api.py b/edxval/api.py index 7176c45d..14bc11c8 100644 --- a/edxval/api.py +++ b/edxval/api.py @@ -5,6 +5,7 @@ """ import logging from enum import Enum +from uuid import uuid4 from django.core.exceptions import ObjectDoesNotExist, ValidationError from lxml import etree @@ -39,6 +40,13 @@ class SortDirection(Enum): desc = "desc" +def generate_video_id(): + """ + Generates a video ID. + """ + return unicode(uuid4()) + + def create_video(video_data): """ Called on to create Video objects in the database @@ -78,6 +86,23 @@ def create_video(video_data): raise ValCannotCreateError(serializer.errors) +def create_external_video(display_name): + """ + Create an external video. + + Arguments: + display_name(unicode): Client title for the external video + """ + return create_video({ + 'edx_video_id': generate_video_id(), + 'status': 'external', + 'client_video_id': display_name, + 'duration': 0, + 'encoded_videos': [], + 'courses': [] + }) + + def update_video(video_data): """ Called on to update Video objects in the database @@ -213,30 +238,21 @@ def get_video_transcript(video_id, language_code): return TranscriptSerializer(transcript).data if transcript else None -def get_video_transcript_data(video_ids, language_code): +def get_video_transcript_data(video_id, language_code): """ Get video transcript data Arguments: - video_ids(list): list containing edx_video_id and external video ids extracted from - external sources from a video component. + video_id(unicode): An id identifying the Video. language_code(unicode): it will be the language code of the requested transcript. Returns: - A dict containing transcript file name and its content. It will be for a video whose transcript - found first while iterating the video ids. + A dict containing transcript file name and its content. """ - transcript_data = None - for video_id in video_ids: + video_transcript = VideoTranscript.get_or_none(video_id, language_code) + if video_transcript: try: - video_transcript = VideoTranscript.objects.get(video__edx_video_id=video_id, language_code=language_code) - transcript_data = dict( - file_name=video_transcript.filename, - content=video_transcript.transcript.file.read() - ) - break - except VideoTranscript.DoesNotExist: - continue + return dict(file_name=video_transcript.filename, content=video_transcript.transcript.file.read()) except Exception: logger.exception( '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s', @@ -245,26 +261,23 @@ def get_video_transcript_data(video_ids, language_code): ) raise - return transcript_data - -def get_available_transcript_languages(video_ids): +def get_available_transcript_languages(video_id): """ Get available transcript languages Arguments: - video_ids(list): list containing edx_video_id and external video ids extracted from - external sources of a video component. + video_id(unicode): An id identifying the Video. Returns: - A list containing unique transcript language codes for the video ids. + A list containing transcript language codes for the Video. """ available_languages = VideoTranscript.objects.filter( - video__edx_video_id__in=video_ids + video__edx_video_id=video_id ).values_list( 'language_code', flat=True ) - return list(set(available_languages)) + return list(available_languages) def get_video_transcript_url(video_id, language_code): @@ -280,6 +293,28 @@ def get_video_transcript_url(video_id, language_code): return video_transcript.url() +def create_video_transcript(video_id, language_code, file_format, content, provider=TranscriptProviderType.CUSTOM): + """ + Create a video transcript. + + Arguments: + video_id(unicode): An Id identifying the Video data model object. + language_code(unicode): A language code. + file_format(unicode): Transcript file format. + content(InMemoryUploadedFile): Transcript content. + provider(unicode): Transcript provider (it will be 'custom' by default if not selected). + """ + transcript_serializer = TranscriptSerializer( + data=dict(provider=provider, language_code=language_code, file_format=file_format), + context=dict(video_id=video_id), + ) + if transcript_serializer.is_valid(): + transcript_serializer.save(content=content) + return transcript_serializer.data + else: + raise ValCannotCreateError(transcript_serializer.errors) + + def create_or_update_video_transcript(video_id, language_code, metadata, file_data=None): """ Create or Update video transcript for an existing video. @@ -323,17 +358,16 @@ def delete_video_transcript(video_id, language_code): Delete transcript for an existing video. Arguments: - video_id: id of the video with which transcript is associated - language_code: language code of a video transcript + video_id: id identifying the video to which the transcript is associated. + language_code: language code of a video transcript. """ - try: - video_transcript = VideoTranscript.objects.get(video__edx_video_id=video_id, language_code=language_code) - # delete the actual transcript file from storage + video_transcript = VideoTranscript.get_or_none(video_id, language_code) + if video_transcript: + # delete the transcript content from storage. video_transcript.transcript.delete() - # delete the record from db + # delete the transcript metadata from db. video_transcript.delete() - except VideoTranscript.DoesNotExist: - pass + logger.info('Transcript is removed for video "%s" and language code "%s"', video_id, language_code) def get_3rd_party_transcription_plans(): diff --git a/edxval/models.py b/edxval/models.py index dad91747..6626ca5f 100644 --- a/edxval/models.py +++ b/edxval/models.py @@ -122,6 +122,18 @@ def get_absolute_url(self): def __str__(self): return self.edx_video_id + @classmethod + def get_or_none(cls, **filter_kwargs): + """ + Returns a video or None. + """ + try: + video = cls.objects.get(**filter_kwargs) + except cls.DoesNotExist: + video = None + + return video + @classmethod def by_youtube_id(cls, youtube_id): """ @@ -447,6 +459,34 @@ def get_or_none(cls, video_id, language_code): return transcript + @classmethod + def create(cls, video, language_code, file_format, content, provider): + """ + Create a Video Transcript. + + Arguments: + video(Video): Video data model object + language_code(unicode): A language code. + file_format(unicode): Transcript file format. + content(InMemoryUploadedFile): Transcript content. + provider(unicode): Transcript provider. + """ + video_transcript = cls(video=video, language_code=language_code, file_format=file_format, provider=provider) + with closing(content) as transcript_content: + try: + file_name = '{uuid}.{ext}'.format(uuid=uuid4().hex, ext=video_transcript.file_format) + video_transcript.transcript.save(file_name, transcript_content) + video_transcript.save() + except Exception: + logger.exception( + '[VAL] Transcript save failed to storage for video_id "%s" language code "%s"', + video.edx_video_id, + language_code + ) + raise + + return video_transcript + @classmethod def create_or_update(cls, video, language_code, metadata, file_data=None): """ @@ -481,7 +521,11 @@ def create_or_update(cls, video, language_code, metadata, file_data=None): try: video_transcript.transcript.save(file_name, transcript_file_data) except Exception: - logger.exception('VAL: Transcript save failed to storage for video_id [%s]', video.edx_video_id) + logger.exception( + '[VAL] Transcript save failed to storage for video_id "%s" language code "%s"', + video.edx_video_id, + language_code + ) raise video_transcript.save() diff --git a/edxval/serializers.py b/edxval/serializers.py index e74cd015..8b13708a 100644 --- a/edxval/serializers.py +++ b/edxval/serializers.py @@ -74,6 +74,24 @@ def get_url(self, transcript): """ return transcript.url() + def validate(self, data): + """ + Validates the transcript data. + """ + video_id = self.context.get('video_id') + video = Video.get_or_none(edx_video_id=video_id) + if not video: + raise serializers.ValidationError('Video "{video_id}" is not valid.'.format(video_id=video_id)) + + data.update(video=video) + return data + + def create(self, validated_data): + """ + Create the video transcript. + """ + return VideoTranscript.create(**validated_data) + class CourseSerializer(serializers.RelatedField): """ diff --git a/edxval/tests/test_api.py b/edxval/tests/test_api.py index eb180f79..ad5c6a0d 100644 --- a/edxval/tests/test_api.py +++ b/edxval/tests/test_api.py @@ -30,6 +30,7 @@ TranscriptFormat, TranscriptPreference, TranscriptProviderType, Video, VideoImage, VideoTranscript) +from edxval.serializers import VideoSerializer from edxval.tests import APIAuthTestCase, constants @@ -139,6 +140,21 @@ def test_create_invalid_video(self, data): # pylint: disable=W0621 with self.assertRaises(ValCannotCreateError): api.create_video(data) + def test_create_external_video(self): + """ + Tests the creation of an external video. + """ + expected_video = { + 'status': u'external', + 'client_video_id': u'Test Video', + 'duration': 0, + 'encoded_videos': [], + 'courses': [] + } + edx_video_id = api.create_external_video(display_name=expected_video['client_video_id']) + video = VideoSerializer(Video.objects.get(edx_video_id=edx_video_id)).data + self.assertDictContainsSubset(expected_video, video) + @ddt class UpdateVideoTest(TestCase): @@ -1762,26 +1778,23 @@ def test_get_video_transcript_data_exception(self, mock_logger): """ Verify that `get_video_transcript_data` logs and raises an exception. """ + video_id = u'medium-soaker' + language_code = u'zh' with self.assertRaises(IOError): - api.get_video_transcript_data(video_ids=['medium-soaker'], language_code=u'zh') + api.get_video_transcript_data(video_id, language_code) mock_logger.exception.assert_called_with( '[edx-val] Error while retrieving transcript for video=%s -- language_code=%s', - 'medium-soaker', - 'zh', + video_id, + language_code, ) - @data( - {'video_ids': ['non-existant-video', 'another-non-existant-id'], 'language_code': 'en', 'result': None}, - {'video_ids': ['non-existant-video', 'super-soaker'], 'language_code': 'zh', 'result': None}, - ) - @unpack - def test_get_video_transcript_data_not_found(self, video_ids, language_code, result): + def test_get_video_transcript_data_not_found(self): """ - Verify that `get_video_transcript_data` api function works as expected. + Verify the `get_video_transcript_data` returns none if transcript is not present for a video. """ - transcript = api.get_video_transcript_data(video_ids, language_code) - self.assertEqual(transcript, result) + transcript = api.get_video_transcript_data(u'non-existant-video', u'en') + self.assertIsNone(transcript) @data( ('super-soaker', 'en', 'Shallow Swordfish-en.srt', 'edxval/tests/data/The_Flash.srt'), @@ -1796,10 +1809,7 @@ def test_get_video_transcript_data(self, video_id, language_code, expected_file_ 'file_name': expected_file_name, 'content': File(open(expected_transcript_path)).read() } - transcript = api.get_video_transcript_data( - video_ids=[video_id, '0987654321'], - language_code=language_code - ) + transcript = api.get_video_transcript_data(video_id=video_id, language_code=language_code) self.assertDictEqual(transcript, expected_transcript) def test_get_video_transcript_url(self): @@ -1894,6 +1904,67 @@ def test_create_or_update_video_exceptions(self, video_id, file_format, provider self.assertEqual(transcript_exception.exception.message, exception_message) + def test_create_video_transcript(self): + """ + Verify that `create_video_transcript` api function creates transcript as expected. + """ + edx_video_id = u'1234' + language_code = u'en' + transcript_props = dict( + video_id=edx_video_id, + language_code=language_code, + provider=TranscriptProviderType.THREE_PLAY_MEDIA, + file_format=TranscriptFormat.SRT, + content=ContentFile(FILE_DATA) + ) + + # setup video with the `edx_video_id` above. + self.setup_video_with_transcripts( + video_data=dict(constants.VIDEO_DICT_DIFFERENT_ID_FISH, edx_video_id=edx_video_id), + transcripts_data=[] + ) + + # Assert that 'en' transcript is not already present. + video_transcript = VideoTranscript.get_or_none(edx_video_id, language_code) + self.assertIsNone(video_transcript) + + # Create the transcript + api.create_video_transcript(**transcript_props) + + # Assert the transcript object and its content + video_transcript = VideoTranscript.get_or_none(edx_video_id, language_code) + self.assertIsNotNone(video_transcript) + self.assertEqual(video_transcript.file_format, transcript_props['file_format']) + self.assertEqual(video_transcript.provider, transcript_props['provider']) + with open(video_transcript.transcript.name) as created_transcript: + self.assertEqual(created_transcript.read(), FILE_DATA) + + @data( + { + 'video_id': 'super-soaker', + 'language_code': 'en', + 'file_format': '123', + 'provider': TranscriptProviderType.CIELO24, + 'exception_msg': '"123" is not a valid choice.' + }, + { + 'video_id': 'medium-soaker', + 'language_code': 'en', + 'file_format': TranscriptFormat.SRT, + 'provider': 'unknown provider', + 'exception_msg': '"unknown provider" is not a valid choice.' + } + ) + @unpack + def test_create_video_transcript_exceptions(self, video_id, language_code, file_format, provider, exception_msg): + """ + Verify that `create_video_transcript` api function raise exceptions on invalid values. + """ + with self.assertRaises(ValCannotCreateError) as transcript_exception: + api.create_video_transcript(video_id, language_code, file_format, ContentFile(FILE_DATA), provider) + + self.assertIn(exception_msg, unicode(transcript_exception.exception.message)) + def test_video_transcript_deletion(self): """ Test video transcript deletion works as expected. @@ -1930,12 +2001,11 @@ def test_get_available_transcript_languages(self): Verify that `get_available_transcript_languages` works as expected. """ # `super-soaker` has got 'en' and 'fr' transcripts - # `non_existent_video_id` that does not have transcript - video_ids = ['super-soaker', 'non_existent_video_id'] - transcript_languages = api.get_available_transcript_languages(video_ids=video_ids) + transcript_languages = api.get_available_transcript_languages(video_id=u'super-soaker') self.assertItemsEqual(transcript_languages, ['en', 'fr']) - def test_delete_video_transcript(self): + @patch('edxval.api.logger') + def test_delete_video_transcript(self, mock_logger): """ Verify that `delete_video_transcript` works as expected. """ @@ -1954,6 +2024,11 @@ def test_delete_video_transcript(self): # assert that the transcript does not exist on the path anymore. self.assertFalse(os.path.exists(transcript_path)) self.assertEqual(VideoTranscript.objects.filter(**query_filter).count(), 0) + mock_logger.info.assert_called_with( + 'Transcript is removed for video "%s" and language code "%s"', + query_filter['video__edx_video_id'], + query_filter['language_code'] + ) @ddt diff --git a/setup.py b/setup.py index 2c459de8..16aadd07 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ def load_requirements(*requirements_paths): setup( name='edxval', - version='0.1.10', + version='0.1.11', author='edX', url='http://github.com/edx/edx-val', description='edx-val',