Merge pull request #6 from mediacatch/5-switch-to-ffprobe-for-more-ro…

…bust-duration-calculations 5 switch to ffprobe for more robust duration calculations
mediacatch · Apr 3, 2023 · 9c8fbe1 · 9c8fbe1
2 parents d26cf4a + 32ba198
commit 9c8fbe1
Show file tree

Hide file tree

Showing 5 changed files with 51 additions and 26 deletions.
diff --git a/.github/workflows/github-action.yml b/.github/workflows/github-action.yml
@@ -20,6 +20,8 @@ jobs:
           pip install flake8
           pip install -e .
           pip install pytest-cov responses
+      - uses: FedericoCarboni/setup-ffmpeg@v2
+        id: setup-ffmpeg
       - name: Lint with flake8
         run: |
           # stop the build if there are Python syntax errors or undefined names

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "mediacatch-s2t"
-version = "0.0.3"
+version = "0.0.4"
 description = "Upload a media file and get the transcription link."
 readme = "README.md"
 authors = [{ name = "MediaCatch", email = "[email protected]" }]

diff --git a/src/mediacatch_s2t/__init__.py b/src/mediacatch_s2t/__init__.py
@@ -3,7 +3,7 @@
 """
 
 # Version of the mc-s2t-mediacatch_s2t
-__version__ = "0.0.3"
+__version__ = "0.0.4"
 
 import os
 

diff --git a/src/mediacatch_s2t/uploader.py b/src/mediacatch_s2t/uploader.py
@@ -2,13 +2,20 @@
 import os
 import pathlib
 
-import pymediainfo
 import requests
+import subprocess
+import json
+from typing import NamedTuple
+
 
 from mediacatch_s2t import (
     URL, PRESIGNED_ENDPOINT, TRANSCRIPT_ENDPOINT, UPDATE_STATUS_ENDPOINT, PROCESSING_TIME_RATIO
 )
 
+class FFProbeResult(NamedTuple):
+    return_code: int
+    json: str
+    error: str
 
 class UploaderException(Exception):
     pass
@@ -44,21 +51,37 @@ def _make_post_request(self, *args, **kwargs):
     def _transcript_link(self):
         return f"{URL}{TRANSCRIPT_ENDPOINT}?id={self.file_id}&api_key={self.api_key}"
 
+    @staticmethod
+    def _ffprobe(file_path) -> FFProbeResult:
+        command_array = ["ffprobe",
+                        "-v", "quiet",
+                        "-print_format", "json",
+                        "-show_format",
+                        "-show_streams",
+                        file_path]
+        result = subprocess.run(command_array, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+        return FFProbeResult(return_code=result.returncode,
+                            json=json.loads(result.stdout),
+                            error=result.stderr)
+
     def get_duration(self):
         """Get audio track duration of a file.
 
         :return
-        tuple: (bool, duration_in_miliseconds)
+        tuple: (duration_in_miliseconds, stream_json | error_msg)
         """
         try:
-            mi = pymediainfo.MediaInfo.parse(self.file)
-            if not mi.audio_tracks:
-                return True, 0
-            return True, mi.audio_tracks[0].duration
-        except OSError:
-            return False, 0
-        except Exception:
-            return False, 0
+            probe = self._ffprobe(self.file)
+            if probe.return_code:
+                return 0, probe.error
+            else:
+                for stream in probe.json['streams']:
+                    if stream['codec_type'] == 'audio':
+                        return int(float(stream['duration']) * 1000), stream
+                else:
+                    return 0, "The file doesn't have an audio track"
+        except OSError as e:
+            return 0, 'FFmpeg not installed (sudo apt install ffmpeg)'
 
     def estimated_result_time(self, audio_length=0):
         """Estimated processing time in seconds"""
@@ -119,10 +142,10 @@ def upload_file(self):
             result["message"] = "The file doesn't exist"
             return result
 
-        is_having_duration, file_duration = self.get_duration()
-        if is_having_duration and not file_duration:
+        file_duration, msg = self.get_duration()
+        if not file_duration:
             result["status"] = "error"
-            result["message"] = "The file doesn't have an audio track"
+            result["message"] = msg
             return result
 
         mime_file = {

diff --git a/tests/test_uploader.py b/tests/test_uploader.py
@@ -10,12 +10,12 @@ def test_is_file_exist_mocked_return_true(mock_is_file):
     assert Uploader('fake file', 'fake key')._is_file_exist() is True
 
 
-@mock.patch("pymediainfo.MediaInfo.parse")
-def test_get_duration_mocked_return_value(mock_pymedia):
-    class MockDuration:
-        duration = 1000
-    mock_pymedia.return_value.audio_tracks = [MockDuration]
-    assert Uploader('fake file', 'fake key').get_duration() == (True, 1000)
+@mock.patch("subprocess.run")
+def test_get_duration_mocked_return_value(mock_subprocess):
+    mock_subprocess.return_value.returncode = 0
+    mock_subprocess.return_value.stdout = '{"streams": [{"codec_type": "audio", "duration": 1}]}'
+    mock_subprocess.return_value.stderr = None
+    assert Uploader('fake file', 'fake key').get_duration() == (1000, {'codec_type': 'audio', 'duration': 1})
 
 
 def test_estimated_result_time():
@@ -26,8 +26,8 @@ def test_estimated_result_time():
             read_data="bytes of data")
 @mock.patch("pathlib.Path")
 @mock.patch("os.path.getsize", return_value=100)
-@mock.patch("pymediainfo.MediaInfo.parse")
-def test_upload_succeed(mock_pymedia, mock_getsize, mock_Path, mock_open):
+@mock.patch("subprocess.run")
+def test_upload_succeed(mock_subprocess, mock_getsize, mock_Path, mock_open):
     URL_EXAMPLE = 'http://url-for-upload.example.com'
 
     def side_effect():
@@ -36,9 +36,9 @@ def side_effect():
     mock_Path.return_value.suffix = '.avi'
     mock_Path.return_value.is_file.side_effect = side_effect
 
-    class MockDuration:
-        duration = 100000
-    mock_pymedia.return_value.audio_tracks = [MockDuration]
+    mock_subprocess.return_value.returncode = 0
+    mock_subprocess.return_value.stdout = '{"streams": [{"codec_type": "audio", "duration": 100}]}'
+    mock_subprocess.return_value.stderr = None
 
     responses.add(
         responses.POST, f'{URL}{PRESIGNED_ENDPOINT}', status=200,