Merge branch 'feature-smpl' into maint-poetry

iluvcapra · Nov 24, 2024 · 7104f3c · 7104f3c
2 parents 28e0532 + f04c563
commit 7104f3c
Show file tree

Hide file tree

Showing 5 changed files with 137 additions and 6 deletions.
diff --git a/docs/source/references.rst b/docs/source/references.rst
@@ -36,6 +36,11 @@ iXML
 * `Gallery Software iXML Specification <http://www.gallery.co.uk/ixml/>`_
 
 
+Sampler Metadata
+----------------
+
+* `RecordingBlogs.com — Sample chunk (of a Wave file)<https://www.recordingblogs.com/wiki/sample-chunk-of-a-wave-file>`_
+
 RIFF Metadata
 -------------
 * `1991. Multimedia Programming Interface and Data Specifications 1.0 <https://www.aelius.com/njh/wavemetatools/doc/riffmci.pdf>`_

diff --git a/tests/test_files/smpl/alarm_citizen_loop1.wav b/tests/test_files/smpl/alarm_citizen_loop1.wav
diff --git a/wavinfo/__main__.py b/wavinfo/__main__.py
@@ -7,12 +7,15 @@
 import json
 from enum import Enum
 import importlib.metadata
+from base64 import b64encode
 
 
 class MyJSONEncoder(json.JSONEncoder):
     def default(self, o):
         if isinstance(o, Enum):
             return o._name_
+        elif isinstance(o, bytes):
+            return b64encode(o).decode('ascii')
         else:
             return super().default(o)
 

diff --git a/wavinfo/wave_reader.py b/wavinfo/wave_reader.py
@@ -5,16 +5,19 @@
 
 import pathlib
 
+
 from .riff_parser import parse_chunk, ChunkDescriptor, ListChunkDescriptor
 from .wave_ixml_reader import WavIXMLFormat
 from .wave_bext_reader import WavBextReader
 from .wave_info_reader import WavInfoChunkReader
 from .wave_adm_reader import WavADMReader
 from .wave_dbmd_reader import WavDolbyMetadataReader
 from .wave_cues_reader import WavCuesReader
-
+from .wave_smpl_reader import WavSmplReader
 
 #: Calculated statistics about the audio data.
+
+
 class WavDataDescriptor(NamedTuple):
     byte_count: int
     frame_count: int
@@ -80,6 +83,9 @@ def __init__(self, path, info_encoding='latin_1', bext_encoding='ascii'):
         #: RIFF cues markers, labels, and notes.
         self.cues: Optional[WavCuesReader] = None
 
+        #: Sampler `smpl` metadata
+        self.smpl: Optional[WavSmplReader] = None
+
         if hasattr(path, 'read'):
             self.get_wav_info(path)
             self.url = 'about:blank'
@@ -110,6 +116,7 @@ def get_wav_info(self, wavfile):
         self.info = self._get_info(wavfile, encoding=self.info_encoding)
         self.dolby = self._get_dbmd(wavfile)
         self.cues = self._get_cue(wavfile)
+        self.smpl = self._get_sampler_loops(wavfile)
         self.data = self._describe_data()
 
     def _find_chunk_data(self, ident, from_stream,
@@ -203,18 +210,23 @@ def _get_cue(self, f):
         return WavCuesReader.read_all(f, cue, labls, ltxts, notes,
                                       fallback_encoding=self.info_encoding)
 
+    def _get_sampler_loops(self, f):
+        sampler_data = self._find_chunk_data(b'smpl', f, default_none=True)
+        return WavSmplReader(sampler_data) if sampler_data else None
+
     # FIXME: this should probably be named "iter()"
     def walk(self) -> Generator[str, str, Any]:
         """
         Walk all of the available metadata fields.
 
         :yields: tuples of the *scope*, *key*, and *value* of
             each metadatum. The *scope* value will be one of
-            "fmt", "data", "ixml", "bext", "info", "dolby", "cues" or "adm".
+            "fmt", "data", "ixml", "bext", "info", "dolby", "cues", "adm" or
+            "smpl".
         """
 
         scopes = ('fmt', 'data', 'ixml', 'bext', 'info', 'adm', 'cues',
-                  'dolby')
+                  'dolby', 'smpl')
 
         for scope in scopes:
             if scope in ['fmt', 'data']:
@@ -223,10 +235,10 @@ def walk(self) -> Generator[str, str, Any]:
                     yield scope, field, attr.__getattribute__(field)
 
             else:
-                dict = self.__getattribute__(scope).to_dict(
+                mdict = self.__getattribute__(scope).to_dict(
                 ) if self.__getattribute__(scope) else {}
-                for key in dict.keys():
-                    yield scope, key, dict[key]
+                for key in mdict.keys():
+                    yield scope, key, mdict[key]
 
     def __repr__(self):
         return 'WavInfoReader({}, {}, {})'.format(self.path,

diff --git a/wavinfo/wave_smpl_reader.py b/wavinfo/wave_smpl_reader.py
@@ -0,0 +1,111 @@
+import struct
+
+from typing import Tuple, NamedTuple, List
+
+
+class WaveSmplLoop(NamedTuple):
+    ident: int
+    loop_type: int
+    start: int
+    end: int
+    fraction: int
+    repetition_count: int
+
+    def loop_type_desc(self):
+        if self.loop_type == 0:
+            return 'FORWARD'
+        elif self.loop_type == 1:
+            return 'FORWARD_BACKWARD'
+        elif self.loop_type == 2:
+            return 'BACKWARD'
+        elif 3 <= self.loop_type <= 31:
+            return 'RESERVED'
+        else:
+            return 'VENDOR'
+
+    def to_dict(self):
+        return {
+            'ident': self.ident,
+            'loop_type': self.loop_type,
+            'loop_type_description': self.loop_type_desc(),
+            'start_samples': self.start,
+            'end_samples': self.end,
+            'fraction': self.fraction,
+            'repetition_count': self.repetition_count,
+        }
+
+
+class WavSmplReader:
+
+    def __init__(self, smpl_data: bytes):
+        """
+        Read sampler metadata from smpl chunk.
+        """
+
+        header_field_fmt = "<IIIIIIbbbbII"
+        loop_field_fmt = "<IIIIII"
+        header_size = struct.calcsize(header_field_fmt)
+        loop_size = struct.calcsize(loop_field_fmt)
+
+        unpacked_data = struct.unpack(header_field_fmt,
+                                      smpl_data[0:header_size])
+
+        #: The MIDI Manufacturer's Association code for the sampler
+        #: manufactuer, or 0 if not specific.
+        self.manufacturer: int = unpacked_data[0]
+
+        #: The manufacturer-assigned code for their specific sampler model, or
+        #: 0 if not specific.
+        self.product: int = unpacked_data[1]
+
+        #: The number of nanoseconds in one audio frame.
+        self.sample_period_ns: int = unpacked_data[2]
+
+        #: The MIDI note number for the loops in this sample
+        self.midi_note: int = unpacked_data[3]
+
+        #: The number of semitones above the MIDI note the loops tune for.
+        self.midi_pitch_fraction_semis: int = unpacked_data[4]
+
+        #: SMPTE timecode format, one of (0, 24, 25, 29, 30)
+        self.smpte_format: int = unpacked_data[5]
+
+        #: The SMPTE offset to apply, as a tuple of four ints representing
+        #: hh, mm, ss, ff
+        self.smpte_offset: Tuple[int, int, int, int] = unpacked_data[6:10]
+
+        loop_count = unpacked_data[10]
+        sampler_udata_length = unpacked_data[11]
+
+        #: List of loops in the file.
+        self.sample_loops: List[WaveSmplLoop] = []
+
+        loop_buffer = smpl_data[header_size:
+                                header_size + loop_size * loop_count]
+
+        for unpacked_loop in struct.iter_unpack(loop_field_fmt, loop_buffer):
+            self.sample_loops.append(WaveSmplLoop(
+                ident=unpacked_loop[0],
+                loop_type=unpacked_loop[1],
+                start=unpacked_loop[2],
+                end=unpacked_loop[3],
+                fraction=unpacked_loop[4],
+                repetition_count=unpacked_loop[5]))
+
+        #: Sampler-specific user data.
+        self.sampler_udata: bytes = smpl_data[
+            header_size + loop_size * loop_count:
+            header_size + loop_size * loop_count + sampler_udata_length]
+
+    def to_dict(self):
+        return {
+            'manufactuer': self.manufacturer,
+            'product': self.product,
+            'sample_period_ns': self.sample_period_ns,
+            'midi_note': self.midi_note,
+            'midi_pitch_fraction_semis': self.midi_pitch_fraction_semis,
+            'smpte_format': self.smpte_format,
+            'smpte_offset': "%02i:%02i:%02i:%02i" % self.smpte_offset,
+            'loops': [x.to_dict() for x in self.sample_loops],
+            'sampler_user_data': self.sampler_udata,
+        }