forked from x4nth055/gender-recognition-by-voice
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
89 lines (81 loc) · 3.31 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import pandas as pd
import numpy as np
import os
import tqdm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.model_selection import train_test_split
label2int = {
"male": 1,
"female": 0
}
def load_data(vector_length=128):
"""A function to load gender recognition dataset from `data` folder
After the second run, this will load from results/features.npy and results/labels.npy files
as it is much faster!"""
# make sure results folder exists
if not os.path.isdir("results"):
os.mkdir("results")
# if features & labels already loaded individually and bundled, load them from there instead
if os.path.isfile("results/features.npy") and os.path.isfile("results/labels.npy"):
X = np.load("results/features.npy")
y = np.load("results/labels.npy")
return X, y
# read dataframe
df = pd.read_csv("balanced-all.csv")
# get total samples
n_samples = len(df)
# get total male samples
n_male_samples = len(df[df['gender'] == 'male'])
# get total female samples
n_female_samples = len(df[df['gender'] == 'female'])
print("Total samples:", n_samples)
print("Total male samples:", n_male_samples)
print("Total female samples:", n_female_samples)
# initialize an empty array for all audio features
X = np.zeros((n_samples, vector_length))
# initialize an empty array for all audio labels (1 for male and 0 for female)
y = np.zeros((n_samples, 1))
for i, (filename, gender) in tqdm.tqdm(enumerate(zip(df['filename'], df['gender'])), "Loading data", total=n_samples):
features = np.load(filename)
X[i] = features
y[i] = label2int[gender]
# save the audio features and labels into files
# so we won't load each one of them next run
np.save("results/features", X)
np.save("results/labels", y)
return X, y
def split_data(X, y, test_size=0.1, valid_size=0.1):
# split training set and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=7)
# split training set and validation set
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=valid_size, random_state=7)
# return a dictionary of values
return {
"X_train": X_train,
"X_valid": X_valid,
"X_test": X_test,
"y_train": y_train,
"y_valid": y_valid,
"y_test": y_test
}
def create_model(vector_length=128):
"""5 hidden dense layers from 256 units to 64, not the best model, but not bad."""
model = Sequential()
model.add(Dense(256, input_shape=(vector_length,)))
model.add(Dropout(0.3))
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.3))
# one output neuron with sigmoid activation function, 0 means female, 1 means male
model.add(Dense(1, activation="sigmoid"))
# using binary crossentropy as it's male/female classification (binary)
model.compile(loss="binary_crossentropy", metrics=["accuracy"], optimizer="adam")
# print summary of the model
model.summary()
return model