From f54ef7a292d7a704589f2239c7fb2a52424952a2 Mon Sep 17 00:00:00 2001
From: Joe Todd <59872435+joetoddsonos@users.noreply.github.com>
Date: Tue, 16 Apr 2024 14:45:09 +0100
Subject: [PATCH] Fix bug in the shutdown behaviour of the decoder (#26)

- This fixes a bug where decoder threads are left hanging waiting for more data to process, when the stream has already finished.
- Switching polling for thread signalling with `Event`s, and also added a `Lock` when reading/writing the buffer.
- I also updated the `FileEncoder` to automatically detect the bit depth of the input file, and use this for encoding. An error is raised if it is not 16 or 32 bit PCM. Since this made the `dtype` variable redundant, I updated the version to v3.
- Added @GOAE 's suggestion of a `OneShotDecoder` for anyone that just wants to decode a buffer once, rather than in real time.
- Added official support for Python3.12
---
 .github/workflows/lint.yml |   2 +-
 .github/workflows/test.yml |  14 ++---
 CHANGELOG.rst              |   8 +++
 docs/index.rst             |   2 +
 examples/passthrough.py    |  11 +++-
 pyflac/__init__.py         |   6 +-
 pyflac/__main__.py         |   2 -
 pyflac/decoder.py          | 118 +++++++++++++++++++++++++++++++++----
 pyflac/encoder.py          |  16 +++--
 pyproject.toml             |   5 +-
 tests/test_decoder.py      |  30 +++++++++-
 tests/test_encoder.py      |   9 +--
 tox.ini                    |   2 +-
 13 files changed, 184 insertions(+), 41 deletions(-)

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 38920b6..1813b15 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -14,7 +14,7 @@ jobs:
   flake8:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Lint the Python code
         uses: TrueBrain/actions-flake8@master
         with:
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index a5d095b..7c22ea9 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -15,22 +15,22 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Install dependencies
         run: |
           sudo apt-get update -y
           sudo apt-get install libsndfile1
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install coverage
+          pip install coverage setuptools
       - name: Run tests
         run: coverage run setup.py test
       - name: Run coveralls
@@ -42,9 +42,9 @@ jobs:
   macos:
     runs-on: macos-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: 3.11
       - name: Install dependencies
@@ -58,7 +58,7 @@ jobs:
   windows:
     runs-on: windows-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: recursive
       - name: Check install
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index eca7004..cd9b68a 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -1,6 +1,14 @@
 pyFLAC Changelog
 ----------------
 
+**v3.0.0**
+
+* Fixed bug in the shutdown behaviour of the `StreamDecoder` (see #22 and #23).
+* Automatically detect bit depth of input data in the `FileEncoder`, and
+  raise an error if not 16-bit or 32-bit PCM (see #24).
+* Added a new `OneShotDecoder` to decode a buffer of FLAC data in a single
+  blocking operation, without the use of threads. Courtesy of @GOAE.
+
 **v2.2.0**
 
 * Updated FLAC library to v1.4.3.
diff --git a/docs/index.rst b/docs/index.rst
index 2108372..0cec563 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -68,6 +68,8 @@ data directly from a file or process in real-time.
    :undoc-members:
    :inherited-members:
 
+.. autoclass:: pyflac.OneShotDecoder
+
 State
 -----
 
diff --git a/examples/passthrough.py b/examples/passthrough.py
index eb329ae..b9d1983 100755
--- a/examples/passthrough.py
+++ b/examples/passthrough.py
@@ -29,7 +29,16 @@ def __init__(self, args):
         self.idx = 0
         self.total_bytes = 0
         self.queue = queue.SimpleQueue()
-        self.data, self.sr = sf.read(args.input_file, dtype='int16', always_2d=True)
+
+        info = sf.info(str(args.input_file))
+        if info.subtype == 'PCM_16':
+            dtype = 'int16'
+        elif info.subtype == 'PCM_32':
+            dtype = 'int32'
+        else:
+            raise ValueError(f'WAV input data type must be either PCM_16 or PCM_32: Got {info.subtype}')
+
+        self.data, self.sr = sf.read(args.input_file, dtype=dtype, always_2d=True)
 
         self.encoder = pyflac.StreamEncoder(
             write_callback=self.encoder_callback,
diff --git a/pyflac/__init__.py b/pyflac/__init__.py
index fb72601..dd02e70 100644
--- a/pyflac/__init__.py
+++ b/pyflac/__init__.py
@@ -4,13 +4,13 @@
 #
 #  pyFLAC
 #
-#  Copyright (c) 2020-2021, Sonos, Inc.
+#  Copyright (c) 2020-2024, Sonos, Inc.
 #  All rights reserved.
 #
 # ------------------------------------------------------------------------------
 
 __title__ = 'pyFLAC'
-__version__ = '2.2.0'
+__version__ = '3.0.0'
 __all__ = [
     'StreamEncoder',
     'FileEncoder',
@@ -19,6 +19,7 @@
     'EncoderProcessException',
     'StreamDecoder',
     'FileDecoder',
+    'OneShotDecoder',
     'DecoderState',
     'DecoderInitException',
     'DecoderProcessException'
@@ -55,6 +56,7 @@
 from .decoder import (
     StreamDecoder,
     FileDecoder,
+    OneShotDecoder,
     DecoderState,
     DecoderInitException,
     DecoderProcessException
diff --git a/pyflac/__main__.py b/pyflac/__main__.py
index 2a14dd7..fdbe233 100644
--- a/pyflac/__main__.py
+++ b/pyflac/__main__.py
@@ -27,7 +27,6 @@ def get_args():
     parser.add_argument('-c', '--compression-level', type=int, choices=range(0, 9), default=5,
                         help='0 is the fastest compression, 5 is the default, 8 is the highest compression')
     parser.add_argument('-b', '--block-size', type=int, default=0, help='The block size')
-    parser.add_argument('-d', '--dtype', default='int16', help='The encoded data type (int16 or int32)')
     parser.add_argument('-v', '--verify', action='store_false', default=True, help='Verify the compressed data')
     args = parser.parse_args()
     return args
@@ -45,7 +44,6 @@ def main():
             input_file=args.input_file,
             output_file=args.output_file,
             blocksize=args.block_size,
-            dtype=args.dtype,
             compression_level=args.compression_level,
             verify=args.verify
         )
diff --git a/pyflac/decoder.py b/pyflac/decoder.py
index d848981..ae6e572 100644
--- a/pyflac/decoder.py
+++ b/pyflac/decoder.py
@@ -4,7 +4,7 @@
 #
 #  pyFLAC decoder
 #
-#  Copyright (c) 2020-2021, Sonos, Inc.
+#  Copyright (c) 2020-2024, Sonos, Inc.
 #  All rights reserved.
 #
 # ------------------------------------------------------------------------------
@@ -93,7 +93,8 @@ def finish(self):
         Flushes the decoding buffer, releases resources, resets the decoder
         settings to their defaults, and returns the decoder state to `DecoderState.UNINITIALIZED`.
 
-        A well behaved program should always call this at the end.
+        A well behaved program should always call this at the end, otherwise the processing
+        thread will be left running, awaiting more data.
         """
         _lib.FLAC__stream_decoder_finish(self._decoder)
 
@@ -121,6 +122,9 @@ class StreamDecoder(_Decoder):
     blocks of raw uncompressed audio is passed back to the user via
     the `callback`.
 
+    The `finish` method must be called at the end of the decoding process,
+    otherwise the processing thread will be left running.
+
     Args:
         write_callback (fn): Function to call when there is uncompressed
             audio data ready, see the example below for more information.
@@ -159,6 +163,8 @@ def __init__(self,
 
         self._done = False
         self._buffer = deque()
+        self._event = threading.Event()
+        self._lock = threading.Lock()
         self.write_callback = write_callback
 
         rc = _lib.FLAC__stream_decoder_init_stream(
@@ -200,7 +206,10 @@ def process(self, data: bytes):
         Args:
             data (bytes): Bytes of FLAC data
         """
+        self._lock.acquire()
         self._buffer.append(data)
+        self._lock.release()
+        self._event.set()
 
     def finish(self):
         """
@@ -225,8 +234,9 @@ def finish(self):
         # Instruct the decoder to finish up and wait until it is done
         # --------------------------------------------------------------
         self._done = True
+        self._event.set()
+        self._thread.join()
         super().finish()
-        self._thread.join(timeout=3)
         if self._error:
             raise DecoderProcessException(self._error)
 
@@ -303,6 +313,84 @@ def _write_callback(self, data: np.ndarray, sample_rate: int, num_channels: int,
         self.__output.write(data)
 
 
+class OneShotDecoder(_Decoder):
+    """
+    A pyFLAC one-shot decoder converts a buffer of FLAC encoded
+    bytes back to raw audio data. Unlike the `StreamDecoder` class,
+    the one-shot decoder operates on a single block of data, and
+    runs in a blocking manner, as opposed to in a background thread.
+
+    The compressed data is passed in via the constructor, and
+    blocks of raw uncompressed audio is passed back to the user via
+    the `callback`.
+
+    Args:
+        write_callback (fn): Function to call when there is uncompressed
+            audio data ready, see the example below for more information.
+        buffer (bytes): The FLAC encoded audio data
+
+    Examples:
+        An example callback which writes the audio data to file
+        using SoundFile.
+
+        .. code-block:: python
+            :linenos:
+
+            import soundfile as sf
+
+            def callback(self,
+                         audio: np.ndarray,
+                         sample_rate: int,
+                         num_channels: int,
+                         num_samples: int):
+
+                # ------------------------------------------------------
+                # Note: num_samples is the number of samples per channel
+                # ------------------------------------------------------
+                if self.output is None:
+                    self.output = sf.SoundFile(
+                        'output.wav', mode='w', channels=num_channels,
+                        samplerate=sample_rate
+                    )
+                self.output.write(audio)
+
+    Raises:
+        DecoderInitException: If initialisation of the decoder fails
+    """
+    def __init__(self,
+                 write_callback: Callable[[np.ndarray, int, int, int], None],
+                 buffer: bytes):
+        super().__init__()
+        self._done = False
+        self._buffer = deque()
+        self._buffer.append(buffer)
+        self._event = threading.Event()
+        self._event.set()
+        self._lock = threading.Lock()
+        self.write_callback = write_callback
+
+        rc = _lib.FLAC__stream_decoder_init_stream(
+            self._decoder,
+            _lib._read_callback,
+            _ffi.NULL,
+            _ffi.NULL,
+            _ffi.NULL,
+            _ffi.NULL,
+            _lib._write_callback,
+            _ffi.NULL,
+            _lib._error_callback,
+            self._decoder_handle
+        )
+        if rc != _lib.FLAC__STREAM_DECODER_INIT_STATUS_OK:
+            raise DecoderInitException(rc)
+
+        while len(self._buffer) > 0:
+            _lib.FLAC__stream_decoder_process_single(self._decoder)
+
+        self._done = True
+        super().finish()
+
+
 @_ffi.def_extern(error=_lib.FLAC__STREAM_DECODER_READ_STATUS_ABORT)
 def _read_callback(_decoder,
                    byte_buffer,
@@ -314,6 +402,12 @@ def _read_callback(_decoder,
     If an exception is raised here, the abort status is returned.
     """
     decoder = _ffi.from_handle(client_data)
+
+    # ----------------------------------------------------------
+    # Wait until there is something in the buffer, or an error
+    # occurs, or the end of the stream is reached.
+    # ----------------------------------------------------------
+    decoder._event.wait()
     if decoder._error:
         # ----------------------------------------------------------
         # If an error has been issued via the error callback, then
@@ -329,18 +423,13 @@ def _read_callback(_decoder,
         num_bytes[0] = 0
         return _lib.FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM
 
-    maximum_bytes = int(num_bytes[0])
-    while len(decoder._buffer) == 0:
-        # ----------------------------------------------------------
-        # Wait until there is something in the buffer
-        # ----------------------------------------------------------
-        time.sleep(0.01)
-
     # --------------------------------------------------------------
     # Ensure only the maximum bytes or less is taken from
     # the thread safe queue.
     # --------------------------------------------------------------
     data = bytes()
+    maximum_bytes = int(num_bytes[0])
+    decoder._lock.acquire()
     if len(decoder._buffer[0]) <= maximum_bytes:
         data = decoder._buffer.popleft()
         maximum_bytes -= len(data)
@@ -349,6 +438,14 @@ def _read_callback(_decoder,
         data += decoder._buffer[0][0:maximum_bytes]
         decoder._buffer[0] = decoder._buffer[0][maximum_bytes:]
 
+    # --------------------------------------------------------------
+    # If there is no more data to process from the buffer, then
+    # clear the event, the thread will await more data to process.
+    # --------------------------------------------------------------
+    if len(decoder._buffer) == 0 or (len(decoder._buffer) > 0 and len(decoder._buffer[0]) == 0):
+        decoder._event.clear()
+    decoder._lock.release()
+
     actual_bytes = len(data)
     num_bytes[0] = actual_bytes
     _ffi.memmove(byte_buffer, data, actual_bytes)
@@ -449,3 +546,4 @@ def _error_callback(_decoder,
         _lib.FLAC__StreamDecoderErrorStatusString[status]).decode()
     decoder.logger.error(f'Error in libFLAC decoder: {message}')
     decoder._error = message
+    decoder._event.set()
diff --git a/pyflac/encoder.py b/pyflac/encoder.py
index fde1797..5261f75 100644
--- a/pyflac/encoder.py
+++ b/pyflac/encoder.py
@@ -4,7 +4,7 @@
 #
 #  pyFLAC encoder
 #
-#  Copyright (c) 2020-2021, Sonos, Inc.
+#  Copyright (c) 2020-2024, Sonos, Inc.
 #  All rights reserved.
 #
 # ------------------------------------------------------------------------------
@@ -335,6 +335,8 @@ class FileEncoder(_Encoder):
     The pyFLAC file encoder reads the raw audio data from the WAV file and
     writes the encoded audio data to a FLAC file.
 
+    Note that the input WAV file must be either PCM_16 or PCM_32.
+
     Args:
         input_file (pathlib.Path): Path to the input WAV file
         output_file (pathlib.Path): Path to the output FLAC file, a temporary
@@ -345,8 +347,6 @@ class FileEncoder(_Encoder):
         blocksize (int): The size of the block to be returned in the
             callback. The default is 0 which allows libFLAC to determine
             the best block size.
-        dtype (str): The data type to use in the FLAC encoder, either int16 or int32,
-            defaults to int16.
         streamable_subset (bool): Whether to use the streamable subset for encoding.
             If true the encoder will check settings for compatibility. If false,
             the settings may take advantage of the full range that the format allows.
@@ -365,13 +365,17 @@ def __init__(self,
                  output_file: Path = None,
                  compression_level: int = 5,
                  blocksize: int = 0,
-                 dtype: str = 'int16',
                  streamable_subset: bool = True,
                  verify: bool = False):
         super().__init__()
 
-        if dtype not in ('int16', 'int32'):
-            raise ValueError('FLAC encoding data type must be either int16 or int32')
+        info = sf.info(str(input_file))
+        if info.subtype == 'PCM_16':
+            dtype = 'int16'
+        elif info.subtype == 'PCM_32':
+            dtype = 'int32'
+        else:
+            raise ValueError(f'WAV input data type must be either PCM_16 or PCM_32: Got {info.subtype}')
 
         self.__raw_audio, sample_rate = sf.read(str(input_file), dtype=dtype)
         if output_file:
diff --git a/pyproject.toml b/pyproject.toml
index ae42b9e..23a63f2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 #
 #  pyFLAC
 #
-#  Copyright (c) 2020-2023, Sonos, Inc.
+#  Copyright (c) 2020-2024, Sonos, Inc.
 #  All rights reserved.
 #
 # ------------------------------------------------------------------------------
@@ -12,7 +12,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "pyFLAC"
-version = "2.2.0"
+version = "3.0.0"
 description = "A Python wrapper for libFLAC"
 readme = "README.rst"
 authors = [{ name = "Joe Todd", email = "joe.todd@sonos.com" }]
@@ -29,6 +29,7 @@ classifiers = [
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "Topic :: Multimedia :: Sound/Audio",
 ]
 dependencies = ["cffi>=1.4.0", "numpy>=1.22", "SoundFile>=0.11"]
diff --git a/tests/test_decoder.py b/tests/test_decoder.py
index acfdc88..61b2d43 100644
--- a/tests/test_decoder.py
+++ b/tests/test_decoder.py
@@ -4,7 +4,7 @@
 #
 #  pyFLAC decoder test suite
 #
-#  Copyright (c) 2020-2021, Sonos, Inc.
+#  Copyright (c) 2020-2024, Sonos, Inc.
 #  All rights reserved.
 #
 # ------------------------------------------------------------------------------
@@ -20,6 +20,7 @@
 from pyflac import (
     FileDecoder,
     StreamDecoder,
+    OneShotDecoder,
     DecoderState,
     DecoderInitException,
     DecoderProcessException
@@ -76,6 +77,7 @@ def test_process(self):
         self.decoder.process(test_data)
         self.decoder.finish()
         self.assertTrue(self.write_callback_called)
+        self.assertFalse(self.decoder._thread.is_alive())
 
     def test_process_blocks(self):
         """ Test that FLAC data can be decoded in blocks """
@@ -149,5 +151,31 @@ def test_process_32_bit_file(self):
         self.assertIsNotNone(self.decoder.process())
 
 
+class TestOneShotDecoder(unittest.TestCase):
+    """
+    Test suite for the one-shot decoder class.
+    """
+    def setUp(self):
+        self.decoder = None
+        self.write_callback_called = False
+        self.tests_path = pathlib.Path(__file__).parent.absolute()
+
+    def _write_callback(self, data, rate, channels, samples):
+        assert isinstance(data, np.ndarray)
+        assert isinstance(rate, int)
+        assert isinstance(channels, int)
+        assert isinstance(samples, int)
+        self.write_callback_called = True
+
+    def test_process(self):
+        """ Test that FLAC data can be decoded """
+        test_path = self.tests_path / 'data/stereo.flac'
+        with open(test_path, 'rb') as flac:
+            test_data = flac.read()
+
+        self.decoder = OneShotDecoder(write_callback=self._write_callback, buffer=test_data)
+        self.assertTrue(self.write_callback_called)
+
+
 if __name__ == '__main__':
     unittest.main(failfast=True)
diff --git a/tests/test_encoder.py b/tests/test_encoder.py
index f84dca7..ac876e0 100644
--- a/tests/test_encoder.py
+++ b/tests/test_encoder.py
@@ -4,7 +4,7 @@
 #
 #  pyFLAC encoder test suite
 #
-#  Copyright (c) 2020-2021, Sonos, Inc.
+#  Copyright (c) 2020-2024, Sonos, Inc.
 #  All rights reserved.
 #
 # ------------------------------------------------------------------------------
@@ -237,12 +237,6 @@ def test_invalid_blocksize(self):
         with self.assertRaisesRegex(EncoderInitException, 'FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_BLOCK_SIZE'):
             self.encoder._init()
 
-    def test_invalid_dtype(self):
-        """ Test than an exception is raised if given an invalid dtype """
-        self.default_kwargs['dtype'] = 'int24'
-        with self.assertRaisesRegex(ValueError, 'FLAC encoding data type must be either int16 or int32'):
-            self.encoder = FileEncoder(**self.default_kwargs)
-
     def test_blocksize_streamable_subset(self):
         """ Test that an exception is raised if blocksize is outside the streamable subset """
         self.default_kwargs['blocksize'] = 65535
@@ -291,7 +285,6 @@ def test_process_32_bit_file(self):
         test_path = pathlib.Path(__file__).parent.absolute() / 'data/32bit.wav'
         self.default_kwargs['input_file'] = test_path
         self.default_kwargs['output_file'] = pathlib.Path(self.temp_file.name)
-        self.default_kwargs['dtype'] = 'int32'
         self.encoder = FileEncoder(**self.default_kwargs)
         self.encoder.process()
 
diff --git a/tox.ini b/tox.ini
index 7d87a70..9dfabc9 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py38, py39, py310, py311
+envlist = py38, py39, py310, py311, py312
 
 [testenv]
 deps = pytest==7.4.0