diff --git a/src/audio_metadata/api.py b/src/audio_metadata/api.py index fe0e09f..5fff163 100644 --- a/src/audio_metadata/api.py +++ b/src/audio_metadata/api.py @@ -19,6 +19,7 @@ from .formats import ( FLAC, MP3, + MP4, WAV, ID3v2, MP3StreamInfo, @@ -61,6 +62,12 @@ def determine_format(data): if d.startswith(b'RIFF'): return WAV + if ( + d[4:8].lower() == b'ftyp' + and d[8:12].lower() in (b'dash', b'm4a ') + ): + return MP4 + if d.startswith(b'ID3'): ID3v2.load(data) diff --git a/src/audio_metadata/exceptions.py b/src/audio_metadata/exceptions.py index 3cecc12..15e747e 100644 --- a/src/audio_metadata/exceptions.py +++ b/src/audio_metadata/exceptions.py @@ -1,5 +1,6 @@ __all__ = [ 'AudioMetadataException', + 'InvalidAtom', 'InvalidBlock', 'InvalidChunk', 'InvalidComment', @@ -16,6 +17,12 @@ class AudioMetadataException(Exception): pass +class InvalidAtom(AudioMetadataException): + """Exception raised when an MPEG atom is invalid.""" + + pass + + class InvalidBlock(AudioMetadataException): """Exception raised when a FLAC metadata block is invalid.""" diff --git a/src/audio_metadata/formats/__init__.py b/src/audio_metadata/formats/__init__.py index 1d61f15..1df6d6c 100644 --- a/src/audio_metadata/formats/__init__.py +++ b/src/audio_metadata/formats/__init__.py @@ -3,6 +3,7 @@ from .id3v2 import * from .id3v2_frames import * from .mp3 import * +from .mp4 import * from .ogg import * from .oggopus import * from .tables import * @@ -16,6 +17,7 @@ *id3v2_frames.__all__, *id3v2.__all__, *mp3.__all__, + *mp4.__all__, *ogg.__all__, *oggopus.__all__, *tables.__all__, diff --git a/src/audio_metadata/formats/mp4.py b/src/audio_metadata/formats/mp4.py new file mode 100644 index 0000000..8c2f0cd --- /dev/null +++ b/src/audio_metadata/formats/mp4.py @@ -0,0 +1,602 @@ +__all__ = ['MP4'] + +import os +import struct + +from attr import attrib, attrs +from bidict import frozenbidict +from tbm_utils import ( + AttrMapping, + LabelList, + datareader, +) + +from .mp4_tags import MP4NumberTag, MP4Tag +from .tables import MP4AudioObjectTypes, MP4SamplingFrequencies +from ..exceptions import InvalidAtom, UnsupportedFormat +from ..models import Format, StreamInfo, Tags + +try: # pragma: nocover + import bitstruct.c as bitstruct +except ImportError: # pragma: nocover + import bitstruct + +PARENT_ATOMS = { + 'ilst', + 'mdia', + 'meta', + 'minf', + 'moof', + 'moov', + 'stbl', + 'traf', + 'trak', + 'udta' +} +EXT_DESCRIPTOR_TYPES = {b'\x80', b'\x81', b'\xFE'} + + +# TODO: Custom type? +@attrs(repr=False) +class Atom(AttrMapping): + _start = attrib() + _size = attrib() + _data_start = attrib() + type = attrib() # noqa + _children = attrib(default=[]) + + def __repr__(self): + repr_dict = {k: v for k, v in self.__dict__.items() if not k.startswith('_')} + + return super().__repr__(repr_dict=repr_dict) + + @datareader + @classmethod + def load(cls, data, level=0): + children = [] + + atom_start = data_start = data.tell() + atom_header = data.read(8) + + try: + atom_size, atom_type = struct.unpack('>I4s', atom_header[0:8]) + data_start += 8 + except struct.error: + raise InvalidAtom("Not a valid MP4 atom.") + + atom_type = atom_type.decode('iso-8859-1') + + if atom_size == 1: + try: + atom_size = struct.unpack('>Q', data.read(8))[0] + data_start += 8 + except struct.error: + raise InvalidAtom("Not a valid MP4 atom.") + + if atom_size < 16: + raise InvalidAtom("Not a valid MP4 atom.") + elif atom_size == 0: + if level != 0: + raise InvalidAtom("Not a valid MP4 atom.") + + data.seek(0, os.SEEK_END) + atom_size = data.tell() - atom_start + data.seek(atom_start + 8, os.SEEK_SET) + elif atom_size < 8: + raise InvalidAtom("Not a valid MP4 atom.") + + if atom_type in PARENT_ATOMS: + if atom_type == 'meta': + data.seek(4, os.SEEK_CUR) + + while data.tell() < atom_start + atom_size: + children.append(Atom.load(data, level + 1)) + else: + data.seek(atom_start + atom_size, os.SEEK_SET) + + return cls(atom_start, atom_size, data_start, atom_type, children) + + def get_child(self, path): + if not path: + return self + + if not self._children: + raise KeyError("No children found.") + + if isinstance(path, str): + path = path.split('.') + + for child in self._children: + if child.type == path[0]: + return child.get_child(path[1:]) + else: + raise KeyError('Path not found.') + + @datareader + def read_data(self, data): + data.seek(self._data_start, os.SEEK_SET) + atom_data = data.read(self._size - (self._data_start - self._start)) + + return atom_data + + +# TODO: Custom type? +class Atoms(LabelList): + item_label = ('atom', 'atoms') + + def __init__(self, items): + super().__init__(items) + + def __getitem__(self, path): + if isinstance(path, str): + path = path.split('.') + + for atom in self.data: + if atom.type == path[0]: + return atom.get_child(path[1:]) + else: + raise KeyError(f'No atom of type {path[0]} found.') + + return list.__getitem__(self.data, path) + + @datareader + @classmethod + def load(cls, data): + atoms = [] + while True: + try: + atoms.append(Atom.load(data, level=0)) + except (InvalidAtom, struct.error): + break + + return cls(atoms) + + +class MP4Tags(Tags): + FIELD_MAP = frozenbidict({ + 'album': '©alb', + 'albumsort': 'soal', + 'albumartist': 'aART', + 'albumartistsort': 'soaa', + 'artist': '©ART', + 'artistsort': 'soar', + 'bpm': 'tmpo', + 'category': 'catg', + 'comment': '©cmt', + 'compilation': 'cpil', + 'composer': '©wrt', + 'composersort': 'soco', + 'copyright': 'cprt', + 'date': '©day', + 'description': 'desc', + 'discnumber': 'disk', + 'encodedby': '©too', + 'genre': '©gen', + 'genre_id3': 'gnre', + 'grouping': '©grp', + 'keyword': 'keyw', + 'lyrics': '©lyr', + 'pictures': 'covr', + 'podcast': 'pcst', + 'podcasturl': 'purl', + 'rating': 'rtng', + 'title': '©nam', + 'titlesort': 'sonm', + 'tracknumber': 'trkn' + }) + + @datareader + @classmethod + def load(cls, data, ilst): + fields = {} + + for child in ilst._children: + if child.type in cls.FIELD_MAP.values(): + tag = MP4Tag.load(data, child) + if tag is None: # TODO + continue + + if isinstance(tag, MP4NumberTag): + fields[tag.id] = [f'{tag.number}/{tag.total}'] + else: + fields[tag.id] = [tag.value] + + return cls(**fields) + + +@attrs(repr=False) +class MP4StreamInfo(StreamInfo): + _start = attrib() + _size = attrib() + bit_depth = attrib() + bitrate = attrib() + channels = attrib() + codec = attrib() + codec_description = attrib() + duration = attrib() + sample_rate = attrib() + + @staticmethod + def _parse_audio_sample_entry(ase_data): + channels = struct.unpack( + '>H', + ase_data[16:18] + )[0] + + bit_depth = struct.unpack( + '>H', + ase_data[18:20] + )[0] + + sample_rate = struct.unpack( + '>I', + ase_data[22:26] + )[0] + + return channels, bit_depth, sample_rate + + @datareader + @staticmethod + def _parse_esds(data): + def _parse_audio_object_type(data): + audio_object_type_index = data.readbits(5) + audio_object_type_ext = None + + if audio_object_type_index == 31: + data.readbits(5) + + audio_object_type_ext = data.readbits(6) + audio_object_type_index = 32 + audio_object_type_ext + + return audio_object_type_index, audio_object_type_ext + + def _parse_sample_rate(data): + sampling_frequency_index = data.readbits(4) + if sampling_frequency_index == 15: + sample_rate = data.readbits(24) + else: + try: + sample_rate = MP4SamplingFrequencies[sampling_frequency_index] + except IndexError: + sample_rate = None + + return sample_rate + version = data.readbits(8) + if version != 0: + raise Exception + + data.seek(3, os.SEEK_CUR) + if data.readbits(8) == 3: + while True: + b = data.read(1) + if b not in EXT_DESCRIPTOR_TYPES: + break + + descriptor_type_length = b + es_id = data.readbits(16) + + stream_dependence_flag, url_flag, ocr_stream_flag = bitstruct.unpack( + 'b1 b1 b1', + data.read(1) + ) # TODO: Stream priority + + if stream_dependence_flag: + stream_dependence = data.readbits(16) + if url_flag: + url_length = data.readbits(8) + url = data.read(url_length) + if ocr_stream_flag: + ocr = data.readbits(16) + + if data.readbits(8) == 4: + while True: + b = data.read(1) + if b not in EXT_DESCRIPTOR_TYPES: + break + + object_type_indication = data.readbits(8) + stream_type, up_stream, reserved = bitstruct.unpack( + 'u6 b1 u1', + data.read(1) + ) + + if ( + object_type_indication != 64 + or stream_type != 5 + ): + raise Exception + + buffer_size = data.readbits(24) + max_bitrate = data.readbits(32) + average_bitrate = data.readbits(32) + + if data.readbits(8) == 5: + while True: + b = data.read(1) + if b not in EXT_DESCRIPTOR_TYPES: + break + + audio_object_type_index, audio_object_type_ext = _parse_audio_object_type(data) + + try: + codec_description = MP4AudioObjectTypes[audio_object_type_index] + except IndexError: + codec_description = None + + sample_rate = _parse_sample_rate(data) + + channel_config = data.readbits(4) # TODO: Channels + + sbr_present = False + ps_present = False + ext_sample_rate = None + + if audio_object_type_index in [5, 29]: + audio_object_type_ext = 5 + sbr_present = True + + if audio_object_type_index == 29: + ps_present = True + + ext_sample_rate = _parse_sample_rate(data) + audio_object_type_index, _ = _parse_audio_object_type(data) + + if audio_object_type_index == 22: + ext_channel_config = data.readbits(4) + else: + audio_object_type_ext = None + + if audio_object_type_index in [1, 2, 3, 4, 6, 7, 17, 19, 20, 21, 22, 23]: + try: + data.read(1) + + core_coder_dependence = data.readbits(1) + if core_coder_dependence: + data.readbits(14) + + extension_flag = data.readbits(1) + + if not channel_config: + pass # TODO: AAC Program Config Element + + if audio_object_type_index in [6, 20]: + data.readbits(3) + + if extension_flag: + if audio_object_type_index == 22: + data.readbits(16) + + if audio_object_type_index in [17, 19, 20, 23]: + data.readbits(3) + + extension_flag_3 = data.readbits(1) + if extension_flag_3 != 0: + raise Exception # TODO + except Exception: + raise + else: + raise UnsupportedFormat("Not a supported MP4 audio object type.") + + if audio_object_type_index in [17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 39]: + ep_config = data.readbits(2) + if ep_config in [2, 3]: + raise UnsupportedFormat + + # TODO: Finish/check. + if ( + audio_object_type_ext != 5 + and (len(data.peek()) * 8) - data.bit_count >= 16 + ): + sync_extension_type = data.readbits(11) + if sync_extension_type == 695: + audio_object_type_ext, _ = _parse_audio_object_type(data) + + if audio_object_type_ext == 5: + sbr_present = bool(data.readbits(1)) + if sbr_present: + ext_sample_rate = _parse_sample_rate(data) + + if (len(data.peek()) * 8) - data.bit_count >= 12: + sync_extension_type = data.readbits(11) + if sync_extension_type == 1352: + ps_present = bool(data.readbits(1)) + + if audio_object_type_ext == 22: + sbr_present = bool(data.readbits(1)) + if sbr_present: + ext_sample_rate = _parse_sample_rate(data) + ext_channel_config = data.readbits(4) + + return average_bitrate, sample_rate, codec_description + + @staticmethod + def _parse_alac(alac_data): + version = alac_data[0] + if version != 0: + raise Exception + + compatible_version = alac_data[8] + if compatible_version != 0: + raise UnsupportedFormat + + bit_depth = alac_data[9] + channels = alac_data[13] + bitrate = bitstruct.unpack('u32', alac_data[20:24])[0] + sample_rate = bitstruct.unpack('u32', alac_data[24:28])[0] + + return bit_depth, channels, bitrate, sample_rate + + @staticmethod + def _parse_ac3(ac3_data): + _, _, _, acmod, lfeon, bitrate_index = bitstruct.unpack( + 'u2 u5 u3 u3 u1 u5', + ac3_data[0:3] + ) + + channels = [2, 1, 2, 3, 3, 4, 4, 5][acmod] + lfeon + + try: + bitrate = [ + 32, + 40, + 48, + 56, + 64, + 80, + 96, + 112, + 128, + 160, + 192, + 224, + 256, + 320, + 384, + 448, + 512, + 576, + 640 + ][bitrate_index] * 1000 + except IndexError: + bitrate = None + + return channels, bitrate + + @staticmethod + def _parse_mdhd(mdhd_data): + if len(mdhd_data) < 4: + raise Exception + + version = mdhd_data[0] + flags = int.from_bytes(mdhd_data[1:4], 'big') + + if version == 0: + offset = 8 + struct_pattern = '>2I' + elif version == 1: + offset = 16 + struct_pattern = '>IQ' + else: + raise Exception + + end = offset + struct.calcsize(struct_pattern) + unit, size = struct.unpack(struct_pattern, mdhd_data[4:][offset:end]) + + try: + duration = size / unit + except ZeroDivisionError: + duration = 0 + + return version, flags, size, duration + + @datareader + @classmethod + def load(cls, data, atoms): + try: + moov = atoms['moov'] + except KeyError: + raise + + for child in moov._children: + if child.type == 'trak': + trak = child + + hdlr = trak.get_child('mdia.hdlr') + hdlr_data = hdlr.read_data(data) + + if hdlr_data[8:12] == b'soun': + break + else: + raise Exception + + mdhd = trak.get_child('mdia.mdhd') + version, flags, size, duration = cls._parse_mdhd(mdhd.read_data(data)) + + if version != 0: + raise Exception + + try: + stsd = trak.get_child('mdia.minf.stbl.stsd') + except KeyError: + raise + else: + stsd_data = stsd.read_data(data) + + num_entries = struct.unpack( + '>I', + stsd_data[4:8] + )[0] + + if num_entries == 0: + raise Exception + + audio_sample_entry = Atom.load(stsd_data[8:]) + ase_data = audio_sample_entry.read_data(stsd_data[8:]) + codec = audio_sample_entry.type + + channels, bit_depth, sample_rate = ( + cls._parse_audio_sample_entry(ase_data) + ) + + extra = Atom.load(ase_data[28:]) + + bitrate = None + # TODO: Other formats. + if codec == 'mp4a' and extra.type == 'esds': + esds_data = extra.read_data(ase_data[28:]) + bitrate, sample_rate, codec_description = cls._parse_esds(esds_data) + elif codec == 'alac' and extra.type == 'alac': + alac_data = extra.read_data(ase_data[28:]) + bit_depth, channels, bitrate, sample_rate = cls._parse_alac(alac_data) + codec_description = 'ALAC' + elif codec == 'ac-3' and extra.type == 'dac3': + ac3_data = extra.read_data(ase_data[28:]) + channels, bitrate_ = cls._parse_ac3(ac3_data) + + if bitrate_: + bitrate = bitrate_ + + codec_description = 'AC-3' + else: + raise UnsupportedFormat("Not a supported MP4 audio codec.") + + audio_start = atoms['mdat']._start + audio_size = atoms['mdat']._size + + if not bitrate: + bitrate = audio_size * 8 / duration + + return cls( + audio_start, + audio_size, + bit_depth, + bitrate, + channels, + codec, + codec_description, + duration, + sample_rate + ) + + +class MP4(Format): + @classmethod + def load(cls, data): + self = super()._load(data) + + atoms = Atoms.load(self._obj) + + self.streaminfo = MP4StreamInfo.load(data, atoms) + + try: + ilst = atoms['moov.udta.meta.ilst'] + except KeyError: + self.tags = MP4Tags() + else: + self.tags = MP4Tags.load(data, ilst) + + self.pictures = self.tags.pop('pictures', []) + + self._obj.close() + + return self diff --git a/src/audio_metadata/formats/mp4_tags.py b/src/audio_metadata/formats/mp4_tags.py new file mode 100644 index 0000000..6f662b3 --- /dev/null +++ b/src/audio_metadata/formats/mp4_tags.py @@ -0,0 +1,124 @@ +__all__ = [ +] + +import struct + +from attr import attrib, attrs +from tbm_utils import ( + AttrMapping, + datareader, +) + +from .tables import MP4CoverFormat +from ..models import Picture +from ..utils import get_image_size + + +class MP4Cover(Picture): + @datareader + @classmethod + def load(cls, data): + data = data.read() + + size = struct.unpack('>I', data[0:4])[0] + format_ = MP4CoverFormat(struct.unpack('>I', data[8:12])[0]) + image_data = data[16:size] + width, height = get_image_size(image_data) + + return cls(format=format_, width=width, height=height, data=image_data) + + +@attrs(repr=False) +class MP4BaseTag(AttrMapping): + id = attrib() # noqa + + +@attrs(repr=False) +class MP4CoverTag(MP4BaseTag): + value = attrib(converter=MP4Cover.load) + + +@attrs(repr=False) +class MP4FloatTag(MP4BaseTag): + value = attrib(converter=float) + + +@attrs(repr=False) +class MP4IntegerTag(MP4BaseTag): + value = attrib(converter=int) + + +@attrs(repr=False) +class MP4NumberTag(MP4BaseTag): + number = attrib(converter=int) + total = attrib(converter=int) + + +@attrs(repr=False) +class MP4TextTag(MP4BaseTag): + value = attrib(converter=str) + + +@attrs(repr=False) +class MP4Tag(MP4BaseTag): + value = attrib() + + # https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/Metadata/Metadata.html#//apple_ref/doc/uid/TP40000939-CH1-SW34 + decoders = { + 0: lambda d: d, + 1: lambda d: d.decode('utf-8', 'replace'), + 2: lambda d: d.decode('utf-16', 'replace'), + 3: lambda d: d.decode('s/jis', 'replace'), + 4: lambda d: d.decode('utf-8', 'replace'), + 5: lambda d: d.decode('utf-16', 'replace'), + 13: lambda d: d, + 14: lambda d: d, + 21: lambda d: struct.unpack('>b', d)[0], + 22: lambda d: struct.unpack('>B', d)[0], + 23: lambda d: struct.unpack('>f', d)[0], + 24: lambda d: struct.unpack('>d'. d)[0], + 27: lambda d: d, + 28: lambda d: d, + 65: lambda d: struct.unpack('b', d)[0], + 66: lambda d: struct.unpack('>h', d)[0], + 67: lambda d: struct.unpack('>i', d)[0], + 74: lambda d: struct.unpack('>q', d)[0], + 75: lambda d: struct.unpack('B', d)[0], + 76: lambda d: struct.unpack('>H', d)[0], + 77: lambda d: struct.unpack('>I', d)[0], + 78: lambda d: struct.unpack('>Q', d)[0] + } + + @datareader + @classmethod + def load(cls, data, atom): + if atom.type in ['disk', 'trkn']: + track_number, track_total = struct.unpack('>HH', atom.read_data(data)[18:22]) + + return MP4NumberTag(atom.type, track_number, track_total) + elif atom.type == 'covr': + return MP4CoverTag(atom.type, atom.read_data(data)) + else: + data_type = struct.unpack('>I', atom.read_data(data)[8:12])[0] + + try: + value = cls.decoders[data_type](atom.read_data(data)[16:]) + except KeyError: + value = atom.read_data(data)[16:] + + if data_type in [0, 1, 2, 3]: + return MP4TextTag(atom.type, value) + elif ( + data_type in range(21, 23) + or data_type in range(65, 68) + or data_type in range(74, 79) + ): + return MP4IntegerTag(atom.type, value) + elif data_type in range(23, 25): + return MP4FloatTag(atom.type, value) + else: # TODO: Handle other types. + import warnings + warnings.warn( + f"Unsupported data type found ('{data_type}') in" + f"'{atom.type}' atom with value: '{value}'" + ) diff --git a/src/audio_metadata/formats/tables.py b/src/audio_metadata/formats/tables.py index 2cd6f72..d501cd2 100644 --- a/src/audio_metadata/formats/tables.py +++ b/src/audio_metadata/formats/tables.py @@ -16,6 +16,9 @@ 'MP3ChannelMode', 'MP3SampleRates', 'MP3SamplesPerFrame', + 'MP4AudioObjectTypes', + 'MP4CoverFormat', + 'MP4SamplingFrequencies' ] from enum import ( @@ -457,3 +460,75 @@ class MP3ChannelMode(_BaseIntEnum): (2.5, 2): (1152, 1), (2.5, 3): (576, 1), } + +MP4AudioObjectTypes = [ + None, + "AAC MAIN", + "AAC LC", + "AAC SSR", + "AAC LTP", + "SBR", + "AAC Scalable", + "TwinVQ", + "CELP", + "HVXC", + None, + None, + "TTSI", + "Main synthetic", + "Wavetable sample-based synthesis", + "General MIDI", + "Algorithmic Synthesis and Audio Effects", + "ER AAC LC", + None, + "ER AAC LTP", + "ER AAC Scalable", + "ER Twin VQ", + "ER BSAC", + "ER AAC LD", + "ER CELP", + "ER HVXC", + "ER HILN", + "ER Parametric", + "SSC", + "PS", + "MPEG Surround", + None, + "MPEG-1/2 Layer-1", + "MPEG-1/2 Layer-2", + "MPEG-1/2 Layer-3", + "DST", + "ALS", + "SLS", + "SLS non-core", + "ER AAC ELD", + "SMR Simple", + "SMR Main", + "USAC", + "SAOC", + "LD MPEG Surround", + "USAC" +] + + +class MP4CoverFormat(_BaseIntEnum): + JPEG = 13 + PNG = 14 + BMP = 27 + + +MP4SamplingFrequencies = [ + 96000, + 88200, + 64000, + 48000, + 44100, + 32000, + 24000, + 22050, + 16000, + 12000, + 11025, + 8000, + 7350 +] diff --git a/tests/files/audio/test-mp4-aac.m4a b/tests/files/audio/test-mp4-aac.m4a new file mode 100644 index 0000000..194c230 Binary files /dev/null and b/tests/files/audio/test-mp4-aac.m4a differ diff --git a/tests/files/audio/test-mp4-ac3.m4a b/tests/files/audio/test-mp4-ac3.m4a new file mode 100644 index 0000000..97d465f Binary files /dev/null and b/tests/files/audio/test-mp4-ac3.m4a differ diff --git a/tests/files/audio/test-mp4-alac.m4a b/tests/files/audio/test-mp4-alac.m4a new file mode 100644 index 0000000..341ec81 Binary files /dev/null and b/tests/files/audio/test-mp4-alac.m4a differ