-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_extraction.py
70 lines (56 loc) · 2.13 KB
/
data_extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import numpy as np
import music21
from music21 import note, chord
from typing import List, Dict, Tuple
def run_data_extraction(path: str) -> List:
"""
This function returns list of unvectorized prepared data
:param path:
:return:
"""
notes_data = []
for i, f in enumerate(os.listdir(path)):
file_p = os.path.join(path, f)
if os.path.isdir(file_p):
temp_list = run_data_extraction(file_p)
for n in temp_list:
notes_data.append(n)
elif os.path.splitext(file_p)[1] == '.mid':
midi_file = music21.converter.parse(file_p)
midi_notes = extract_notes_from_midi(midi_file)
for n in midi_notes:
notes_data.append(n)
return notes_data
def get_notes_mapping_dict(notes_list: List) -> Tuple[Dict, np.array]:
"""
Function get list of midi notes and returns mapping for each note
:param notes_list:
:return:
"""
assert len(notes_list) > 0, 'Empty notes list !!'
full_list = sorted(set(notes_list))
notes2idx = {note_e: i for i, note_e in enumerate(full_list)}
idx2note = np.array(full_list)
return notes2idx, idx2note
def extract_notes_from_midi(midi_file: music21.stream.Stream) -> List:
"""
This function extract all the notes out of the midi files in the signed data path
:param midi_file: Midi file
:return: return list of notes out of the midi files
"""
notes = []
parts = music21.instrument.partitionByInstrument(midi_file)
if parts: # file has instrument parts
notes_to_parse = parts.parts[0].recurse()
else: # file has notes in a flat structure
notes_to_parse = midi_file.flat.notes
for element in notes_to_parse:
if isinstance(element, note.Note):
notes.append(str(element.pitch))
elif isinstance(element, chord.Chord):
notes.append('.'.join(str(n) for n in element.normalOrder))
return notes
def vectorize_notes_by_mapping(notes_list: List, mapping: Dict) -> np.array:
vectorized_output = np.array([mapping[char] for char in notes_list])
return vectorized_output