Skip to content

Commit

Permalink
new trains
Browse files Browse the repository at this point in the history
  • Loading branch information
dil-shana committed Jul 17, 2024
1 parent fdca870 commit 5899d92
Show file tree
Hide file tree
Showing 35 changed files with 5,420 additions and 1,018 deletions.
Binary file removed dfpl/.single_label_model.py.swp
Binary file not shown.
239 changes: 102 additions & 137 deletions dfpl/plot.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,29 @@
from typing import List
import array

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import wandb
from matplotlib.axes import Axes

# for NN model functions
from tensorflow.keras.callbacks import History
from matplotlib.axes import Axes


def get_max_validation_accuracy(history: History) -> str:
validation = smooth_curve(history.history["val_accuracy"])
y_max: float = max(validation)
return "Max validation accuracy ≈ " + str(round(y_max, 3) * 100) + "%"


def get_max_validation_balanced_accuracy(history: History) -> str:
validation_bal_acc = smooth_curve(history.history["val_balanced_accuracy"])
y_max: float = max(validation_bal_acc)
return "Max validation balanced accuracy ≈ " + str(round(y_max, 3) * 100) + "%"


def get_max_training_balanced_accuracy(history: History) -> str:
training_bal_acc = smooth_curve(history.history["balanced_accuracy"])
y_max: float = max(training_bal_acc)
return "Training balanced accuracy ≈ " + str(round(y_max, 3) * 100) + "%"


def get_max_training_auc(history: History) -> str:
training_auc = smooth_curve(history.history["auc"])
y_max: float = max(training_auc)
return "Validation AUC ≈ " + str(round(y_max, 3) * 100) + "%"
# for testing in Weights & Biases


def get_max_validation_auc(history: History) -> str:
validation_auc = smooth_curve(history.history["val_auc"])
y_max: float = max(validation_auc)
return "Validation AUC ≈ " + str(round(y_max, 3) * 100) + "%"
def get_max_validation_accuracy(history: History) -> str:
validation = smooth_curve(history.history['val_accuracy'])
y_max = max(validation)
return 'Max validation accuracy ≈ ' + str(round(y_max, 3) * 100) + '%'


def get_max_training_accuracy(history: History) -> str:
training = smooth_curve(history.history["accuracy"])
y_max: float = max(training)
return "Max training accuracy ≈ " + str(round(y_max, 3) * 100) + "%"
training = smooth_curve(history.history['accuracy'])
y_max = max(training)
return 'Max training accuracy ≈ ' + str(round(y_max, 3) * 100) + '%'


def smooth_curve(points: np.ndarray, factor: float = 0.75) -> np.ndarray:
smoothed_points: List[float] = []
def smooth_curve(points: array, factor: float = 0.75) -> array:
smoothed_points = []
for point in points:
if smoothed_points:
previous = smoothed_points[-1]
Expand All @@ -60,13 +36,13 @@ def smooth_curve(points: np.ndarray, factor: float = 0.75) -> np.ndarray:
def set_plot_history_data(ax: Axes, history: History, which_graph: str) -> None:
(train, valid) = (None, None)

if which_graph == "acc":
train = smooth_curve(history.history["accuracy"])
valid = smooth_curve(history.history["val_accuracy"])
if which_graph == 'acc':
train = smooth_curve(history.history['accuracy'])
valid = smooth_curve(history.history['val_accuracy'])

if which_graph == "loss":
train = smooth_curve(history.history["loss"])
valid = smooth_curve(history.history["val_loss"])
if which_graph == 'loss':
train = smooth_curve(history.history['loss'])
valid = smooth_curve(history.history['val_loss'])

# plt.xkcd() # make plots look like xkcd

Expand All @@ -75,69 +51,78 @@ def set_plot_history_data(ax: Axes, history: History, which_graph: str) -> None:
trim = 0 # remove first 5 epochs
# when graphing loss the first few epochs may skew the (loss) graph

ax.plot(epochs[trim:], train[trim:], "dodgerblue", linewidth=15, alpha=0.1)
ax.plot(epochs[trim:], train[trim:], "dodgerblue", label="Training")
ax.plot(epochs[trim:], train[trim:], 'dodgerblue', linewidth=15, alpha=0.1)
ax.plot(epochs[trim:], train[trim:], 'dodgerblue', label='Training')

ax.plot(epochs[trim:], valid[trim:], 'g', linewidth=15, alpha=0.1)
ax.plot(epochs[trim:], valid[trim:], 'g', label='Validation')

ax.plot(epochs[trim:], valid[trim:], "g", linewidth=15, alpha=0.1)
ax.plot(epochs[trim:], valid[trim:], "g", label="Validation")

def plot_loss(hist: History, file: str) -> None:
fig, ax = plt.subplots(1)
fig.suptitle('History of loss values (regression model)')
ax.plot(hist.epoch, hist.history['loss'], 'dodgerblue', linewidth=15, alpha=0.1)
ax.plot(hist.epoch, hist.history['loss'], 'dodgerblue', label='Training')
ax.plot(hist.epoch, hist.history['val_loss'], 'g', linewidth=15, alpha=0.1)
ax.plot(hist.epoch, hist.history['val_loss'], 'g', label='Validation')
ax.set_xlabel('Epochs')
ax.set_ylabel('Loss')
ax.legend(loc="upper right")
plt.tight_layout()
plt.savefig(fname=file, format='jpg')
plt.close()


def plot_history(history: History, file: str) -> None:
fig, (ax1, ax2) = plt.subplots(
nrows=2,
ncols=1,
figsize=(10, 6),
sharex="all",
gridspec_kw={"height_ratios": [5, 2]},
)
fig, (ax1, ax2) = plt.subplots(nrows=2,
ncols=1,
figsize=(10, 6),
sharex='all',
gridspec_kw={'height_ratios': [5, 2]})

set_plot_history_data(ax1, history, "acc")
set_plot_history_data(ax1, history, 'acc')

set_plot_history_data(ax2, history, "loss")
set_plot_history_data(ax2, history, 'loss')

# Accuracy graph
ax1.set_ylabel("Accuracy")
ax1.set_ylabel('Accuracy')
ax1.set_ylim(bottom=0.5, top=1)
ax1.legend(loc="lower right")
ax1.spines["top"].set_visible(False)
ax1.spines["right"].set_visible(False)
ax1.xaxis.set_ticks_position("none")
ax1.spines["bottom"].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
ax1.xaxis.set_ticks_position('none')
ax1.spines['bottom'].set_visible(False)

# max accuracy text
plt.text(
0.5,
0.6,
get_max_validation_balanced_accuracy(history),
horizontalalignment="right",
verticalalignment="top",
transform=ax1.transAxes,
fontsize=12,
)
plt.text(
0.5,
0.8,
get_max_training_balanced_accuracy(history),
horizontalalignment="right",
verticalalignment="top",
transform=ax1.transAxes,
fontsize=12,
)
plt.text(0.5,
0.6,
get_max_validation_accuracy(history),
horizontalalignment='right',
verticalalignment='top',
transform=ax1.transAxes,
fontsize=12)
plt.text(0.5,
0.8,
get_max_training_accuracy(history),
horizontalalignment='right',
verticalalignment='top',
transform=ax1.transAxes,
fontsize=12)

# Loss graph
ax2.set_ylabel("Loss")
ax2.set_ylabel('Loss')
ax2.set_yticks([])
ax2.plot(legend=False)
ax2.set_xlabel("Epochs")
ax2.spines["top"].set_visible(False)
ax2.spines["right"].set_visible(False)
ax2.set_xlabel('Epochs')
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)

plt.tight_layout()
plt.savefig(fname=file, format="svg")
plt.savefig(fname=file, format='svg')
plt.close()


def plot_train_history(hist, target, file_accuracy, file_loss):
def plotTrainHistory(hist, target, file_accuracy, file_loss):
"""
Plot the training performance in terms of accuracy and loss values for each epoch.
:param hist: The history returned by model.fit function
Expand All @@ -149,60 +134,44 @@ def plot_train_history(hist, target, file_accuracy, file_loss):

# plot accuracy
plt.figure()
plt.plot(hist.history["accuracy"])
if "val_accuracy" in hist.history.keys():
plt.plot(hist.history["val_accuracy"])
plt.title("Model accuracy - " + target)
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
if "val_accuracy" in hist.history.keys():
plt.legend(["Train", "Test"], loc="upper left")
plt.plot(hist.history['accuracy'])
if 'val_accuracy' in hist.history.keys():
plt.plot(hist.history['val_accuracy'])
plt.title('Model accuracy - ' + target)
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
if 'val_accuracy' in hist.history.keys():
plt.legend(['Train', 'Test'], loc='upper left')
else:
plt.legend(["Train"], loc="upper_left")
plt.savefig(fname=file_accuracy, format="svg")
plt.legend(['Train'], loc='upper_left')
plt.savefig(fname=file_accuracy, format='svg')

# Plot training & validation loss values
plt.figure()
plt.plot(hist.history["loss"])
plt.plot(hist.history["val_loss"])
plt.title("Model loss - " + target)
plt.ylabel("Loss")
plt.xlabel("Epoch")
plt.legend(["Train", "Test"], loc="upper left")
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Model loss - ' + target)
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
# plt.show()
plt.savefig(fname=file_loss, format="svg")
plt.savefig(fname=file_loss, format='svg')
plt.close()


def plot_history_vis(
hist: History,
model_hist_plot_path: str,
model_hist_csv_path: str,
model_hist_plot_path_a: str,
model_hist_plot_path_l: str,
target: str,
) -> None:
def plot_history_vis(hist: History, model_hist_plot_path: str, model_hist_csv_path: str,
model_hist_plot_path_a: str, model_hist_plot_path_l: str, target: str) -> None:
plot_history(history=hist, file=model_hist_plot_path)
histDF = pd.DataFrame(hist.history)
histDF.to_csv(model_hist_csv_path)

# plot accuracy and loss for the training and validation during training
plot_train_history(
hist=hist,
target=target,
file_accuracy=model_hist_plot_path_a,
file_loss=model_hist_plot_path_l,
)


def plot_auc(
fpr: np.ndarray,
tpr: np.ndarray,
auc_value: float,
target: str,
filename: str,
wandb_logging: bool = False,
) -> None:
plotTrainHistory(hist=hist, target=target,
file_accuracy=model_hist_plot_path_a,
file_loss=model_hist_plot_path_l)


def plot_auc(fpr: array, tpr: array, auc_value: float, target: str, filename: str) -> None:
"""
Plot the area under the curve to the provided file
Expand All @@ -211,18 +180,14 @@ def plot_auc(
:param auc_value: The value of the area under the curve
:param target: The name of the training target
:param filename: The filename to which the plot should be stored
:param wandb_logging: Whether to log the plot to wandb
:rtype: None
"""
# Create a boolean mask to filter out zero values
plt.figure()
plt.plot([0, 1], [0, 1], "k--")
plt.plot(fpr, tpr, label=f"Keras (area = {auc_value:.3f})")
plt.xlabel("False positive rate")
plt.ylabel("True positive rate")
plt.title("ROC curve " + target)
plt.legend(loc="best")
plt.savefig(fname=filename, format="png")
if wandb_logging:
wandb.log({"roc_plot": plt})
plt.close()
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr, label='Keras (area = {:.3f})'.format(auc_value))
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve ' + target)
plt.legend(loc='best')
plt.savefig(fname=filename, format='svg')
plt.close()
3 changes: 2 additions & 1 deletion dfpl/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
nn_fp_numpy_type = np.float32
nn_fp_compressed_numpy_type = np.float32
nn_target_numpy_type = np.short
nn_target_numpy_type_regression = np.float32

nn_multi_fp_numpy_type = np.float32
nn_multi_fp_compressed_numpy_type = np.float32
Expand All @@ -49,4 +50,4 @@
# Training settings of the FNN that were magic numbers in the code before.
nn_train_min_delta = 0.0001
nn_train_check_period = 10
nn_train_patience = 20
nn_train_patience = 20
Loading

0 comments on commit 5899d92

Please sign in to comment.