forked from f90/Wave-U-Net
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Metadata.py
98 lines (85 loc) · 3.38 KB
/
Metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import librosa
import os, re, subprocess
if False:
from soundfile import SoundFile
import scikits.audiolab
else:
from pysndfile import sndio
import Utils
class AudioReadError(EnvironmentError):
pass
_find_sampling_rate = re.compile('.* ([0-9:]+) Hz,', re.MULTILINE )
_find_channels = re.compile(".*Hz,( .*?),", re.MULTILINE)
_find_duration = re.compile('.*Duration: ([0-9:]+)', re.MULTILINE )
def timestamp_to_seconds( ms ):
"Convert a hours:minutes:seconds string representation to the appropriate time in seconds."
a = ms.split(':')
assert 3 == len( a )
return float(a[0]) * 3600 + float(a[1]) * 60 + float(a[2])
def seconds_to_min_sec( secs ):
"Return a minutes:seconds string representation of the given number of seconds."
mins = int(secs) / 60
secs = int(secs - (mins * 60))
return "%d:%02d" % (mins, secs)
def get_metadata_by_loading(audio_path):
print("Reading metadata for file " + audio_path + " by loading file completely")
audio, sr = Utils.load(audio_path, sr=None, mono=False)
if sr == None: # Error reading file
raise AudioReadError("Could not load file" + audio_path)
if len(audio.shape) == 1:
return sr, 1, float(audio.shape[0]) / float(sr)
else:
return sr, audio.shape[0], float(audio.shape[1]) / float(sr)
def get_mp3_metadata(audio_path):
"Determine length of tracks listed in the given input files (e.g. playlists)."
ffmpeg = subprocess.check_output(
'ffmpeg -i "%s"; exit 0' % audio_path,
shell=True,
stderr = subprocess.STDOUT )
# Get sampling rate
match = _find_sampling_rate.search( ffmpeg )
if not match:
return get_metadata_by_loading(audio_path)
sampling_rate = int(match.group( 1 ))
# Get channels
match = _find_channels.search( ffmpeg )
if not match:
return get_metadata_by_loading(audio_path)
channels = match.group( 1 )
channels = (2 if channels.__contains__("stereo") else 1)
# Get duration
match = _find_duration.search( ffmpeg )
if not match:
return get_metadata_by_loading(audio_path)
duration = match.group( 1 )
duration = timestamp_to_seconds(duration)
return sampling_rate, channels, duration
def get_audio_metadata(audioPath, sphereType=False):
'''
Returns sampling rate, number of channels and duration of an audio file
:param audioPath:
:param sphereType:
:return:
'''
ext = os.path.splitext(audioPath)[1][1:].lower()
if False:
if ext=="aiff" or sphereType: # SPHERE headers for the TIMIT dataset
audio = scikits.audiolab.Sndfile(audioPath)
sr = audio.samplerate
channels = audio.channels
duration = float(audio.nframes) / float(audio.samplerate)
elif ext=="mp3": # Use ffmpeg/ffprobe
sr, channels, duration = get_mp3_metadata(audioPath)
else:
snd_file = SoundFile(audioPath, mode='r')
inf = snd_file._info
sr = inf.samplerate
channels = inf.channels
duration = float(inf.frames) / float(inf.samplerate)
else:
if ext=="mp3": # Use ffmpeg/ffprobe
sr, channels, duration = get_mp3_metadata(audioPath)
else:
sr, _, _, channels, num_frames = sndio.get_info(audioPath, extended_info=True)
duration = num_frames/ sr
return int(sr), int(channels), float(duration)