Skip to content

Commit

Permalink
wandb r2
Browse files Browse the repository at this point in the history
  • Loading branch information
dil-shana committed Dec 3, 2024
1 parent e29b10e commit 3026dfb
Show file tree
Hide file tree
Showing 9 changed files with 117 additions and 22 deletions.
1 change: 1 addition & 0 deletions dfpl/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score

project_directory = pathlib.Path(".").parent.parent.absolute()
test_train_opts = options.Options(
Expand Down
61 changes: 50 additions & 11 deletions dfpl/single_label_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
import tensorflow.keras.backend as K
from verstack import stratified_continuous_split

from dfpl import callbacks as cb
from dfpl import options
from dfpl import plot as pl
from dfpl import settings

import wandb

def sample_down_data(opts: options.Options, df: pd.DataFrame, target: str, column: str) -> (np.ndarray,
np.ndarray):
Expand Down Expand Up @@ -315,7 +316,27 @@ def acper(y_true, y_pred, t: float = 0.02):
yield True
else:
yield False
#def calculate_r2(y_true, y_pred,t: float = 0.02 ):
# ss_res = K.sum(K.square(y_true - y_pred))
# ss_tot = K.sum(K.square(y_true - K.mean(y_true)))
# return 1 - ss_res / (ss_tot + K.epsilon())
def calculate_r2(y_true, y_pred):
"""
Calculate R² (coefficient of determination) manually.
:param y_true: Array of true values
:param y_pred: Array of predicted values
:return: R² value
"""
# Residual sum of squares (SS_res)
ss_res = np.sum((y_true - y_pred) ** 2)

# Total sum of squares (SS_tot)
ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)

# Calculate R²
r2 = 1 - (ss_res / (ss_tot + np.finfo(float).eps)) # Add small epsilon to avoid division by zero
return r2

def evaluate_regression_model(x_test: np.ndarray, y_test: np.ndarray,file_prefix: str, model: Model,
target: str, fold: int, threshold: float = 0.05) -> pd.DataFrame:
Expand Down Expand Up @@ -346,9 +367,10 @@ def evaluate_regression_model(x_test: np.ndarray, y_test: np.ndarray,file_prefix
abs_error = abs(error)

# Compute R² (coefficient of determination)
ss_res = np.sum((y_test - y_predict) ** 2) # Residual sum of squares
ss_tot = np.sum((y_test - np.mean(y_test)) ** 2) # Total sum of squares
r2 = 1 - (ss_res / (ss_tot + np.finfo(float).eps)) # Add small epsilon to avoid division by zero
# ss_res = np.sum((y_test - y_predict) ** 2) # Residual sum of squares
# ss_tot = np.sum((y_test - np.mean(y_test)) ** 2) # Total sum of squares
# r2 = 1 - (ss_res / (ss_tot + np.finfo(float).eps)) # Add small epsilon to avoid division by zero
r2=calculate_r2(y_test, y_predict)

regression_metrics = ['MSE', 'MAE', 'MdAE', 'ACPER', 'MAPE', 'RMSE','R2']
metric_values = [
Expand Down Expand Up @@ -500,13 +522,14 @@ def fit_and_evaluate_model(x_train: np.ndarray, x_test: np.ndarray, y_train: np.
rmse_test = np.sqrt(np.mean((y_test - y_test_pred) ** 2))

# R² calculations
ss_res_train = np.sum((y_train - y_train_pred) ** 2)
ss_tot_train = np.sum((y_train - np.mean(y_train)) ** 2)
r2_train = 1 - (ss_res_train / (ss_tot_train + np.finfo(float).eps))

ss_res_test = np.sum((y_test - y_test_pred) ** 2)
ss_tot_test = np.sum((y_test - np.mean(y_test)) ** 2)
r2_test = 1 - (ss_res_test / (ss_tot_test + np.finfo(float).eps))
#ss_res_train = np.sum((y_train - y_train_pred) ** 2)
# ss_tot_train = np.sum((y_train - np.mean(y_train)) ** 2)
#r2_train = 1 - (ss_res_train / (ss_tot_train + np.finfo(float).eps))
r2_train = calculate_r2(y_train, y_train_pred)
r2_test = calculate_r2(y_test, y_test_pred)
# ss_res_test = np.sum((y_test - y_test_pred) ** 2)
#ss_tot_test = np.sum((y_test - np.mean(y_test)) ** 2)
# r2_test = 1 - (ss_res_test / (ss_tot_test + np.finfo(float).eps))

# Save metrics to a separate file
metrics_file = f"{model_file_prefix}.metrics.csv"
Expand All @@ -519,6 +542,15 @@ def fit_and_evaluate_model(x_train: np.ndarray, x_test: np.ndarray, y_train: np.
logging.info(f"Metrics saved to {metrics_file}")


# Log metrics to W&B
wandb.log({
"Train RMSE": rmse_train,
"Test RMSE": rmse_test,
"Train R²": r2_train,
"Test R²": r2_test
})


# Plot predictions vs. actual for both train and test
y_train_pred = callback_model.predict(x_train).flatten()
y_test_pred = callback_model.predict(x_test).flatten()
Expand Down Expand Up @@ -593,6 +625,13 @@ def train_single_label_models(df: pd.DataFrame, opts: options.Options) -> None:
test_size=opts.testSize,
random_state=split_random_state)

#x_train, x_test, y_train, y_test = stratified_continuous_split(x, y,
# stratify=target,
# test_size=opts.testSize,
# train_size= 1-test_size,
# continuous=True,
# random_state=split_random_state)

performance = fit_and_evaluate_model(x_train=x_train, x_test=x_test,
y_train=y_train, y_test=y_test,
fold=0, target=target, opts=opts)
Expand Down
25 changes: 25 additions & 0 deletions example/dilshana-sweep-trial/trial_sweep.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"py/object": "dfpl.options.Options",
"inputFile": "/home/shanavas/PycharmProjects/generate_dfpl_regression_data/data/cytoxicity_zscore.csv",
"outputDir": "example/cytotox/wandb",
"ecModelDir": "/home/shanavas/PycharmProjects/deepFPlearn/example/models/generic_encoder/",
"type": "smiles",
"fpType": "topological",
"fpSize": 2048,
"encFPSize": 256,
"enableMultiLabel": false,
"verbose": 2,
"trainAC": false,
"trainFNN": true,
"compressFeatures": true,
"kFolds": 5,
"testSize": 0.2,
"optimizer": "Adam",
"lossFunction": "mse",
"epochs": 5000,
"activationFunction": "tanh",
"fnnType": "REG",
"wabTracking": true,
"normalize" : true
}

10 changes: 5 additions & 5 deletions example/train_cytotox.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"py/object": "dfpl.options.Options",
"inputFile": "/home/shanavas/PycharmProjects/generate_dfpl_regression_data/data/cytoxicity_zscore.csv",
"outputDir": "example/cytotox/new/train_comp3",
"outputDir": "example/cytotox/new/train_trail",
"ecModelDir": "example/models/generic_encoder/",
"type": "smiles",
"fpType": "topological",
Expand All @@ -17,11 +17,11 @@
"optimizer": "Adam",
"lossFunction": "Huber",
"epochs": 5000,
"batchSize": 16 ,
"batchSize": 128 ,
"activationFunction": "tanh",
"dropout": 0.001 ,
"learningRate" : 0.00001 ,
"l2reg" : 0.00000005,
"dropout": 0.06600139458057926 ,
"learningRate" : 0.0008753463402134107 ,
"l2reg" : 0.00007087829555603556,
"fnnType": "REG",
"normalize" : true
}
27 changes: 27 additions & 0 deletions example/train_cytotox_good.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"py/object": "dfpl.options.Options",
"inputFile": "/home/shanavas/PycharmProjects/generate_dfpl_regression_data/data/cytoxicity_zscore.csv",
"outputDir": "example/cytotox/new/train_comp5",
"ecModelDir": "example/models/generic_encoder/",
"type": "smiles",
"fpType": "topological",
"fpSize": 2048,
"encFPSize": 256,
"enableMultiLabel": false,
"verbose": 2,
"trainAC": false,
"trainFNN": true,
"compressFeatures": true,
"kFolds": 5,
"testSize": 0.2,
"optimizer": "Adam",
"lossFunction": "Huber",
"epochs": 5000,
"batchSize": 32 ,
"activationFunction": "tanh",
"dropout": 0.05 ,
"learningRate" : 0.00001 ,
"l2reg" : 0.001,
"fnnType": "REG",
"normalize" : true
}
9 changes: 6 additions & 3 deletions sweep_cytotox.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ command:
- ${program}
- "train"
- "-f"
- "/home/shanavas/PycharmProjects/deepFPlearn/example/dilshana-sweep-trial/cytotox.json"
- "/home/shanavas/PycharmProjects/deepFPlearn/example/dilshana-sweep-trial/trial_sweep.json"
- ${args}
method: random

metric:
name: val_loss
goal: minimize
Expand All @@ -26,7 +27,9 @@ parameters:
min: 0.0001
max: 0.001
batchSize: # For batch size, it’s better to choose from discrete values
values: [32, 64, 128, 256, 512]
values: [32, 64, 128]
wabTarget:
value: AR
value: actual



2 changes: 1 addition & 1 deletion wandb/debug-internal.log
2 changes: 1 addition & 1 deletion wandb/debug.log
2 changes: 1 addition & 1 deletion wandb/latest-run

0 comments on commit 3026dfb

Please sign in to comment.