Merge branch 'base_class_structure' into main

Natooz · Aug 24, 2021 · ff501a3 · ff501a3
2 parents da85c35 + a21ff75
commit ff501a3
Show file tree

Hide file tree

Showing 13 changed files with 377 additions and 488 deletions.
diff --git a/README.md b/README.md
@@ -83,7 +83,7 @@ NOTES:
 
 ### Create your own
 
-You can easily create your own encoding strategy and benefit from the MidiTok framework. Just create a class inheriting from the [MIDITokenizer](miditok/midi_tokenizer_base.py#L34) base class, and override the ```events_to_tokens```, ```tokens_to_event``` and ```create_vocabulary``` methods with your tokenization strategy.
+You can easily create your own encoding strategy and benefit from the MidiTok framework. Just create a class inheriting from the [MIDITokenizer](miditok/midi_tokenizer_base.py#L34) base class, and override the ```track_to_tokens```, ```tokens_to_track``` and ```_create_vocabulary``` methods with your tokenization strategy.
 
 ## Features
 
@@ -121,7 +121,7 @@ These tokens bring additional information about the structure and content of MID
 ### Tokenize a MIDI
 
 ```python
-from miditok import REMIEncoding
+from miditok import REMIEncoding, get_midi_programs
 from miditoolkit import MidiFile
 
 # Our parameters
@@ -138,15 +138,16 @@ additional_tokens = {'Chord': True,
 remi_enc = REMIEncoding(pitch_range, beat_res, nb_velocities, additional_tokens)
 midi = MidiFile('path/to/your_midi.mid')
 
-# Converts MIDI to tokens
+# Converts MIDI to tokens, and back to a MIDI
 tokens = remi_enc.midi_to_tokens(midi)
+converted_back_midi = remi_enc.tokens_to_midi(tokens, get_midi_programs(midi))
 
 # Converts just a selected track
 remi_enc.current_midi_metadata = {'time_division': midi.ticks_per_beat, 'tempo_changes': midi.tempo_changes}
 piano_tokens = remi_enc.track_to_tokens(midi.instruments[0])
 
 # And convert it back (the last arg stands for (program number, is drum))
-converted_back_track = remi_enc.tokens_to_track(piano_tokens, midi.ticks_per_beat, (0, False))
+converted_back_track, tempo_changes = remi_enc.tokens_to_track(piano_tokens, midi.ticks_per_beat, (0, False))
 ```
 
 ### Tokenize a dataset

diff --git a/miditok/__init__.py b/miditok/__init__.py
@@ -5,6 +5,6 @@
 from .mumidi import MuMIDIEncoding
 from .octuple import OctupleEncoding
 from .octuple_mono import OctupleMonoEncoding
-from .midi_tokenizer_base import quantize_note_times, detect_chords, merge_tracks, MIDITokenizer
+from .midi_tokenizer_base import get_midi_programs, quantize_note_times, detect_chords, merge_tracks, MIDITokenizer
 from .constants import MIDI_INSTRUMENTS, INSTRUMENT_CLASSES, INSTRUMENT_CLASSES_RANGES, CHORD_MAPS, DRUM_SETS,\
     CONTROL_CHANGES
diff --git a/miditok/cp_word.py b/miditok/cp_word.py
diff --git a/miditok/midi_like.py b/miditok/midi_like.py
@@ -3,11 +3,10 @@
 
 """
 
-from pathlib import Path
 from typing import List, Tuple, Dict, Optional
 
 import numpy as np
-from miditoolkit import MidiFile, Instrument, Note, TempoChange
+from miditoolkit import Instrument, Note, TempoChange
 
 from .midi_tokenizer_base import MIDITokenizer, Event, detect_chords
 from .constants import *
@@ -44,15 +43,15 @@ def __init__(self, pitch_range: range = PITCH_RANGE, beat_res: Dict[Tuple[int, i
         additional_tokens['Empty'] = False  # Incompatible additional tokens
         super().__init__(pitch_range, beat_res, nb_velocities, additional_tokens, program_tokens, params)
 
-    def track_to_events(self, track: Instrument) -> List[Event]:
-        """ Converts a track (list of Note objects) into Event objects
+    def track_to_tokens(self, track: Instrument) -> List[int]:
+        """ Converts a track (miditoolkit.Instrument object) into a sequence of tokens
         (can probably be achieved faster with Mido objects)
 
-        :param track: track object to convert
-        :return: list of events
+        :param track: MIDI track to convert
+        :return: sequence of corresponding tokens
         """
         # Make sure the notes are sorted first by their onset (start) times, second by pitch
-        # notes.sort(key=lambda x: (x.start, x.pitch))  # it should have been done in quantization function
+        # notes.sort(key=lambda x: (x.start, x.pitch))  # done in midi_to_tokens
         events = []
 
         # Creates the Note On, Note Off and Velocity events
@@ -111,54 +110,23 @@ def track_to_events(self, track: Instrument) -> List[Event]:
 
         events.sort(key=lambda x: (x.time, self._order(x)))
 
-        return events
-
-    def tokens_to_midi(self, tokens: List[List[int]], programs: Optional[List[Tuple[int, bool]]] = None,
-                       output_path: Optional[str] = None, time_division: Optional[int] = TIME_DIVISION) -> MidiFile:
-        """ Override the parent class method
-        Convert multiple sequences of tokens into a multitrack MIDI and save it.
-        The tokens will be converted to event objects and then to a miditoolkit.MidiFile object.
-        NOTE: for multitrack with tempo, only the tempo tokens of the first
-            decoded track will be used for the MIDI
-
-        :param tokens: list of lists of tokens to convert, each list inside the
-                       first list corresponds to a track
-        :param programs: programs of the tracks
-        :param output_path: path to save the file (with its name, e.g. music.mid),
-                        leave None to not save the file
-        :param time_division: MIDI time division / resolution, in ticks/beat (of the MIDI to create)
-        :return: the midi object (miditoolkit.MidiFile)
-        """
-        midi = MidiFile(ticks_per_beat=time_division)
-        for i, track_tokens in enumerate(tokens):
-            if programs is not None:
-                track, tempos = self.tokens_to_track(track_tokens, time_division, programs[i])
-            else:
-                track, tempos = self.tokens_to_track(track_tokens, time_division)
-            midi.instruments.append(track)
-            if i == 0:  # only keep tempo changes of the first track
-                midi.tempo_changes = tempos
-                midi.tempo_changes[0].time = 0
-
-        # Write MIDI file
-        if output_path:
-            Path(output_path).mkdir(parents=True, exist_ok=True)
-            midi.dump(output_path)
-        return midi
-
-    def events_to_track(self, events: List[Event], time_division: int, program: Optional[Tuple[int, bool]] = (0, False),
-                        default_duration: int = None) -> Tuple[Instrument, List[TempoChange]]:
-        """ Transform a list of Event objects into an instrument object
-
-        :param events: list of Event objects to convert to a track
+        return self.events_to_tokens(events)
+
+    def tokens_to_track(self, tokens: List[int], time_division: Optional[int] = TIME_DIVISION,
+                        program: Optional[Tuple[int, bool]] = (0, False), default_duration: int = None) \
+            -> Tuple[Instrument, List[TempoChange]]:
+        """ Converts a sequence of tokens into a track object
+
+        :param tokens: sequence of tokens to convert
         :param time_division: MIDI time division / resolution, in ticks/beat (of the MIDI to create)
         :param program: the MIDI program of the produced track and if it drum, (default (0, False), piano)
-        :param default_duration: default duration in case a Note On event occurs without its associated
+        :param default_duration: default duration (in ticks) in case a Note On event occurs without its associated
                                 note off event. Leave None to discard Note On with no Note Off event.
-        :return: the miditoolkit instrument object
+        :return: the miditoolkit instrument object and tempo changes
         """
-        max_duration = (self.durations[-1][0] + self.durations[-1][1]) * time_division
+        events = self.tokens_to_events(tokens)
 
+        max_duration = (self.durations[-1][0] + self.durations[-1][1]) * time_division
         name = 'Drums' if program[1] else MIDI_INSTRUMENTS[program[0]]['name']
         instrument = Instrument(program[0], is_drum=program[1], name=name)
         if self.additional_tokens['Tempo']:
@@ -209,7 +177,7 @@ def events_to_track(self, events: List[Event], time_division: int, program: Opti
         del tempo_changes[0]
         return instrument, tempo_changes
 
-    def create_vocabulary(self, program_tokens: bool) -> Tuple[dict, dict, dict]:
+    def _create_vocabulary(self, program_tokens: bool) -> Tuple[dict, dict, dict]:
         """ Create the tokens <-> event dictionaries
         These dictionaries are created arbitrary according to constants defined
         at the top of this file.
@@ -276,7 +244,7 @@ def create_vocabulary(self, program_tokens: bool) -> Tuple[dict, dict, dict]:
         token_to_event = {v: k for k, v in event_to_token.items()}  # inversion
         return event_to_token, token_to_event, token_type_indices
 
-    def create_token_types_graph(self) -> Dict[str, List[str]]:
+    def _create_token_types_graph(self) -> Dict[str, List[str]]:
         dic = dict()
 
         if 'Program' in self.token_types_indices:

diff --git a/miditok/midi_tokenizer_base.py b/miditok/midi_tokenizer_base.py
@@ -15,7 +15,12 @@
 
 
 class Event:
-    """ Event class, representing a token and its characteristics"""
+    """ Event class, representing a token and its characteristics
+    The name corresponds to the token type (e.g. Pitch, Position ...);
+    The value to its value.
+    These two attributes are used in events_to_tokens and tokens_to_events
+    methods (see below) to convert an Event object to its corresponding integer.
+    """
 
     def __init__(self, name, time, value, text):
         self.name = name
@@ -53,7 +58,7 @@ def __init__(self, pitch_range: range, beat_res: Dict[Tuple[int, int], int], nb_
         else:
             self.load_params(params)
 
-        self.durations = self.create_durations_tuples()
+        self.durations = self._create_durations_tuples()
         self.velocity_bins = np.linspace(0, 127, self.nb_velocities + 1, dtype=np.intc)
         np.delete(self.velocity_bins, 0)  # removes velocity 0
         if additional_tokens['Tempo']:
@@ -62,7 +67,7 @@ def __init__(self, pitch_range: range, beat_res: Dict[Tuple[int, int], int], nb_
         else:
             self.tempo_bins = np.zeros(1)
 
-        self.event2token, self.token2event, self.token_types_indices = self.create_vocabulary(program_tokens)
+        self.event2token, self.token2event, self.token_types_indices = self._create_vocabulary(program_tokens)
 
         # Keep in memory durations in ticks for seen time divisions so these values
         # are not calculated each time a MIDI is processed
@@ -72,53 +77,47 @@ def __init__(self, pitch_range: range, beat_res: Dict[Tuple[int, int], int], nb_
         # MIDI (being parsed) so that methods processing tracks can access them
         self.current_midi_metadata = {}  # needs to be updated each time a MIDI is read
 
-    def midi_to_tokens(self, midi: MidiFile) -> Tuple[List[List[int]], List[Tuple[int, bool]]]:
-        """ Converts a MIDI file in a tokens representation
+    def midi_to_tokens(self, midi: MidiFile) -> List[List[Union[int, List[int]]]]:
+        """ Converts a MIDI file in a tokens representation.
+        NOTE: if you override this method, be sure to keep every line of code below until
+        the "Convert track to token" comment in the for loop
 
         :param midi: the MIDI objet to convert
-        :return: the token representation :
-                  1. tracks converted into sequences of tokens
-                  2. program numbers and if it is drums, for each track
+        :return: the token representation, i.e. tracks converted into sequences of tokens
         """
+        # Check if the durations values have been calculated before for this time division
         try:
             _ = self.durations_ticks[midi.ticks_per_beat]
         except KeyError:
             self.durations_ticks[midi.ticks_per_beat] = [(beat * res + pos) * midi.ticks_per_beat // res
                                                          for beat, pos, res in self.durations]
 
+        # Register MIDI metadata
         self.current_midi_metadata = {'time_division': midi.ticks_per_beat,
                                       'tempo_changes': midi.tempo_changes,
                                       'time_sig_changes': midi.time_signature_changes,
                                       'key_sig_changes': midi.key_signature_changes}
+
+        # Quantize tempo changes times
         quantize_tempos(midi.tempo_changes, midi.ticks_per_beat, max(self.beat_res.values()))
 
         tokens = []
         for track in midi.instruments:
+            quantize_note_times(track.notes, self.current_midi_metadata['time_division'], max(self.beat_res.values()))
+            track.notes.sort(key=lambda x: (x.start, x.pitch))  # sort notes
+            remove_duplicated_notes(track.notes)  # remove possible duplicated notes
+
+            # Convert track to tokens
             tokens.append(self.track_to_tokens(track))
 
-        track_info = [(int(track.program), track.is_drum) for track in midi.instruments]
-        return tokens, track_info
+        return tokens
 
-    def track_to_tokens(self, track: Instrument) -> List[int]:
+    def track_to_tokens(self, track: Instrument) -> List[Union[int, List[int]]]:
         """ Converts a track (miditoolkit.Instrument object) into a sequence of tokens
 
         :param track: MIDI track to convert
         :return: sequence of corresponding tokens
         """
-        quantize_note_times(track.notes, self.current_midi_metadata['time_division'], max(self.beat_res.values()))
-        track.notes.sort(key=lambda x: (x.start, x.pitch))  # sort notes
-        remove_duplicated_notes(track.notes)  # remove possible duplicated notes
-        events = self.track_to_events(track)  # get distinct events
-        return self.events_to_tokens(events)
-
-    def track_to_events(self, track: Instrument) -> List[Event]:
-        """ Converts a track (list of Note objects) into Event objects
-        NOTE: this method must take care of chord or other types of tokens, if specified
-        And to sort every events in the right order!
-
-        :param track: track object to convert
-        :return: list of events
-        """
         raise NotImplementedError
 
     def events_to_tokens(self, events: List[Event]) -> List[int]:
@@ -143,10 +142,13 @@ def tokens_to_events(self, tokens: List[int]) -> List[Event]:
             events.append(Event(name, None, val, None))
         return events
 
-    def tokens_to_midi(self, tokens: List[List[int]], programs: Optional[List[Tuple[int, bool]]] = None,
-                       output_path: Optional[str] = None, time_division: Optional[int] = TIME_DIVISION) -> MidiFile:
+    def tokens_to_midi(self, tokens: List[List[Union[int, List[int]]]],
+                       programs: Optional[List[Tuple[int, bool]]] = None, output_path: Optional[str] = None,
+                       time_division: Optional[int] = TIME_DIVISION) -> MidiFile:
         """ Convert multiple sequences of tokens into a multitrack MIDI and save it.
         The tokens will be converted to event objects and then to a miditoolkit.MidiFile object.
+        NOTE: With Remi, MIDI-Like, CP Word or other encoding methods that process tracks
+        independently, only the tempo changes of the first track in tokens will be used
 
         :param tokens: list of lists of tokens to convert, each list inside the
                        first list corresponds to a track
@@ -159,39 +161,32 @@ def tokens_to_midi(self, tokens: List[List[int]], programs: Optional[List[Tuple[
         midi = MidiFile(ticks_per_beat=time_division)
         for i, track_tokens in enumerate(tokens):
             if programs is not None:
-                midi.instruments.append(self.tokens_to_track(track_tokens, time_division, programs[i]))
+                track, tempo_changes = self.tokens_to_track(track_tokens, time_division, programs[i])
             else:
-                midi.instruments.append(self.tokens_to_track(track_tokens, time_division))
+                track, tempo_changes = self.tokens_to_track(track_tokens, time_division)
+            midi.instruments.append(track)
+            if i == 0:  # only keep tempo changes of the first track
+                midi.tempo_changes = tempo_changes
+                midi.tempo_changes[0].time = 0
 
         # Write MIDI file
         if output_path:
             Path(output_path).mkdir(parents=True, exist_ok=True)
             midi.dump(output_path)
         return midi
 
-    def tokens_to_track(self, tokens: List[int], time_division: Optional[int] = TIME_DIVISION,
-                        program: Optional[Tuple[int, bool]] = (0, False)):
+    def tokens_to_track(self, tokens: List[Union[int, List[int]]], time_division: Optional[int] = TIME_DIVISION,
+                        program: Optional[Tuple[int, bool]] = (0, False)) -> Tuple[Instrument, List[TempoChange]]:
         """ Converts a sequence of tokens into a track object
 
         :param tokens: sequence of tokens to convert
         :param time_division: MIDI time division / resolution, in ticks/beat (of the MIDI to create)
         :param program: the MIDI program of the produced track and if it drum, (default (0, False), piano)
-        :return:
-        """
-        events = self.tokens_to_events(tokens)
-        return self.events_to_track(events, time_division, program)
-
-    def events_to_track(self, events: List[Event], time_division: int, program: Optional[int] = 0) -> Instrument:
-        """ Transform a list of Event objects into an instrument object
-
-        :param events: list of Event objects to convert to a track
-        :param time_division: MIDI time division / resolution, in ticks/beat (of the MIDI to create)
-        :param program: the MIDI program of the produced track, use -1 for drums (default 0, piano)
-        :return: the miditoolkit instrument object
+        :return: the miditoolkit instrument object and the possible tempo changes
         """
         raise NotImplementedError
 
-    def create_vocabulary(self, program_tokens: bool) -> Tuple[dict, dict, dict]:
+    def _create_vocabulary(self, program_tokens: bool) -> Tuple[dict, dict, dict]:
         """ Create the tokens <-> event dictionaries
         These dictionaries are created arbitrary according to constants defined
         at the top of this file.
@@ -205,11 +200,11 @@ def create_vocabulary(self, program_tokens: bool) -> Tuple[dict, dict, dict]:
         """
         raise NotImplementedError
 
-    def create_token_types_graph(self) -> Dict[str, List[str]]:
+    def _create_token_types_graph(self) -> Dict[str, List[str]]:
         """ Creates a dictionary for the directions of the token types of the encoding"""
         raise NotImplementedError
 
-    def create_durations_tuples(self) -> List[Tuple]:
+    def _create_durations_tuples(self) -> List[Tuple]:
         """ Creates the possible durations in bar / beat units, as tuple of the form:
         (beat, pos, res) where beat is the number of beats, pos the number of "frames"
         ans res the beat resolution considered (frames per beat)
@@ -298,6 +293,16 @@ def load_params(self, params: Union[str, Path, PurePath, Dict[str, Any]]):
             setattr(self, key, value)
 
 
+def get_midi_programs(midi: MidiFile) -> List[Tuple[int, bool]]:
+    """ Returns the list of programs of the tracks of a MIDI, deeping the
+    same order. It returns it as a list of tuples (program, is_drum).
+
+    :param midi: the MIDI object to extract tracks programs
+    :return: the list of track programs, as a list of tuples (program, is_drum)
+    """
+    return [(int(track.program), track.is_drum) for track in midi.instruments]
+
+
 def quantize_note_times(notes: List[Note], time_division: int, beat_res: int):
     """ Quantize the notes items start and end values.
     It shifts the notes so they start at times that match the quantization (e.g. 16 frames per bar)