Skip to content

Commit

Permalink
Merge branch 'master' of github.com:jurjen93/lofar_helpers
Browse files Browse the repository at this point in the history
  • Loading branch information
jurjen93 committed Oct 29, 2024
2 parents 92b1092 + 15e3734 commit 5e7b459
Show file tree
Hide file tree
Showing 6 changed files with 322 additions and 29 deletions.
37 changes: 24 additions & 13 deletions neural_networks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
import __main__
from astropy.io import fits

from train_nn import ImagenetTransferLearning, load_checkpoint # noqa
from pre_processing_for_ml import normalize_fits
from .train_nn import (
ImagenetTransferLearning,
load_checkpoint,
normalize_inputs,
) # noqa
from .pre_processing_for_ml import normalize_fits

setattr(__main__, "ImagenetTransferLearning", ImagenetTransferLearning)

Expand All @@ -28,7 +32,7 @@ def __init__(
model_name: str = None,
device: str = None,
variational_dropout: int = 0,
**kwargs
**kwargs,
):
super().__init__(model_name, device)

Expand All @@ -47,30 +51,37 @@ def load_checkpoint(self, path) -> torch.nn.Module:
(
model,
_,
args,
self.args,
) = load_checkpoint(path, self.device).values()
self.resize = args["resize"]
self.lift = args["lift"]
self.resize = self.args["resize"]
self.lift = self.args["lift"]
return model

@functools.lru_cache(maxsize=1)
def prepare_data(self, input_path: str) -> torch.Tensor:
input_data: torch.Tensor = torch.from_numpy(process_fits(input_path))
input_data = input_data.to(self.dtype)
input_data = input_data.swapdims(0, 2).unsqueeze(0)
return self.prepare_batch(input_data)

def prepare_batch(self, batch: torch.Tensor, mean=None, std=None) -> torch.Tensor:
batch = batch.to(self.dtype).to(self.device)
if self.resize != 0:
input_data = interpolate(
input_data, size=self.resize, mode="bilinear", align_corners=False
batch = interpolate(
batch, size=self.resize, mode="bilinear", align_corners=False
)
input_data = input_data.to(self.device)
return input_data
if mean is None:
mean = self.mean
if std is None:
std = self.std
batch = normalize_inputs(batch, mean, std, normalize=1)
return batch

@torch.no_grad()
def predict(self, data: torch.Tensor):
with torch.autocast(dtype=self.dtype, device_type=self.device):
if self.variational_dropout > 0:
self.model.train()
# self.model.classifier.train()

predictions = torch.concat(
[
Expand All @@ -80,8 +91,8 @@ def predict(self, data: torch.Tensor):
dim=1,
)

mean = predictions.mean()
std = predictions.std()
mean = predictions.mean(dim=1)
std = predictions.std(dim=1)

print(mean, std)
return mean, std
Expand Down
15 changes: 15 additions & 0 deletions neural_networks/parameters.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,21 @@ efficientnet_v2_l 1e-05 1 0.1 32 0.2 1 0 16 16 0
efficientnet_v2_l 1e-05 1 0.1 32 0.1 1 0 16 16 0
efficientnet_v2_l 1e-05 1 0.1 32 0.1 0 0 16 16 0

dinov2_vitl14_reg 1e-04 1 0.25 32 0.1 0 0 16 16 560 conv 0
dinov2_vitl14_reg 1e-04 1 0.1 32 0.1 0 0 16 16 560 conv 0
dinov2_vitl14_reg 1e-04 1 0.25 32 0.1 0 0 16 16 560 conv 1
dinov2_vitl14_reg 1e-04 1 0.1 32 0.1 0 0 16 16 560 conv 1
dinov2_vitl14_reg 1e-04 1 0.25 32 0.1 0 0 16 16 560 stack 0
dinov2_vitl14_reg 1e-04 1 0.1 32 0.1 0 0 16 16 560 stack 0
efficientnet_v2_l 1e-04 1 0.1 32 0.2 0 0 16 16 0 stack 0
dinov2_vitl14_reg 1e-04 1 0.25 32 0.1 0 0 16 16 560 stack 1
dinov2_vitl14_reg 1e-04 1 0.1 32 0.1 0 0 16 16 560 stack 1
efficientnet_v2_l 1e-04 1 0.1 32 0.2 0 0 16 16 0 stack 1
dinov2_vitl14_reg 1e-04 1 0.25 32 0.1 0 1 16 16 560 conv 0
dinov2_vitl14_reg 1e-04 1 0.1 32 0.1 0 1 16 16 560 conv 0
dinov2_vitl14_reg 1e-04 1 0.25 32 0.1 0 1 16 16 560 conv 1
dinov2_vitl14_reg 1e-04 1 0.1 32 0.1 0 1 16 16 560 conv 1




Expand Down
207 changes: 207 additions & 0 deletions neural_networks/plots/confusion_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
from cortexchange.architecture import get_architecture, Architecture
from pathlib import Path
import sys
import os

SCRIPT_DIR = Path(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.dirname(SCRIPT_DIR))
from pre_processing_for_ml import normalize_fits
import matplotlib.pyplot as plt
import numpy as np
import torch
from functools import lru_cache
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from astropy.io import fits


class RawFitsDataset(Dataset):
def __init__(self, root_dir, mode="train"):
"""
Args:
root_dir (string): Directory with good/bad folders in it.
"""

modes = ("train", "val")
assert mode in modes

classes = {"stop": 0, "continue": 1}

root_dir = Path(root_dir)
assert root_dir.exists(), f"'{root_dir}' doesn't exist!"

ext = ".fits"
glob_ext = "*" + ext

self.root_dir = root_dir

for folder in (
root_dir / (cls + ("" if mode == "train" else "_val")) for cls in classes
):
assert (
folder.exists()
), f"root folder doesn't exist, got: '{str(folder.resolve())}'"
assert (
len(list(folder.glob(glob_ext))) > 0
), f"no '{ext}' files were found in '{str(folder.resolve())}'"

# Yes this code is way overengineered. Yes I also derive pleasure from writing it :) - RJS
#
# Actual documentation:
# You want all 'self.x' variables to be non-python objects such as numpy arrays,
# otherwise you get memory leaks in the PyTorch dataloader
self.data_paths, self.labels = map(
np.asarray,
list(
zip(
*(
(str(file), val)
for cls, val in classes.items()
for file in (
root_dir / (cls + ("" if mode == "train" else "_val"))
).glob(glob_ext)
)
)
),
)

assert len(self.data_paths) > 0
self.sources = ", ".join(
sorted([str(elem).split("/")[-1].strip(ext) for elem in self.data_paths])
)
self.mode = mode
_, counts = np.unique(self.labels, return_counts=True)
self.label_ratio = counts[0] / counts[1]
# print(f'{mode}: using the following sources: {sources}')

@staticmethod
def transform_data(image_data):
"""
Transform data for preprocessing
"""

# FIXME: this should really be a parameter
image_data = torch.from_numpy(image_data).to(torch.bfloat16)
image_data = torch.movedim(image_data, -1, 0)

return image_data

@lru_cache(maxsize=1)
def __len__(self):
return len(self.data_paths)

def __getitem__(self, idx):

fits_path = self.data_paths[idx]
label = self.labels[idx]

image_data = process_fits(fits_path)
# there is always only one array

# Pre-processing
image_data = self.transform_data(image_data)

return image_data, label


def load_model(architecture_name, model_name, device="cpu"):
StopPredictor: type(Architecture) = get_architecture(architecture_name)
predictor = StopPredictor(device=device, model_name=model_name)
return predictor


@torch.no_grad()
def get_confusion_matrix(predictor, dataloader, mean, std, thresholds):
confusion_matrices = np.zeros((len(thresholds), 2, 2))
thresholds = torch.tensor(thresholds)
for i, (img, label) in enumerate(dataloader):
data = predictor.prepare_batch(img, mean=mean, std=std)
pred = torch.sigmoid(predictor.model(data)).to("cpu")
preds_thres = pred >= thresholds
for i, _ in enumerate(thresholds):
confusion_matrices[i] += confusion_matrix(
label, preds_thres[:, i], labels=[0, 1]
)

return confusion_matrices


def plot_conf_matrices(savedir, confusion_matrices, thresholds):
savedir = model_name.split("/")[-1]
os.makedirs(savedir, exist_ok=True)
for i, conf_matrix in enumerate(confusion_matrices):

disp = ConfusionMatrixDisplay(
# Normalization
conf_matrix / np.sum(conf_matrix, axis=1, keepdims=True),
display_labels=["stop", "continue"],
)
# print(conf_matrix)
disp.plot()

plt.savefig(f"{savedir}/confusion_thres_{thresholds[i]:.3f}.png")


def process_fits(fits_path):
with fits.open(fits_path) as hdul:
image_data = hdul[0].data

return normalize_fits(image_data)


def get_dataloader(data_root, mode="val", batch_size=32):
dataset = RawFitsDataset(data_root, mode="val")
num_workers = min(12, len(os.sched_getaffinity(0)))

prefetch_factor, persistent_workers = (
(2, True) if num_workers > 0 else (None, False)
)
dataloader = DataLoader(
dataset,
batch_size=32,
shuffle=True,
num_workers=num_workers,
persistent_workers=persistent_workers,
prefetch_factor=prefetch_factor,
drop_last=False,
)

return dataloader


if __name__ == "__main__":
# Latest model
model_name = "surf/dinov2_09739_rotations"
TESTING = True
architecture_name = "surf/TransferLearning"
# Set Device here
DEVICE = "cuda"
# Thresholds to consider for classification
thresholds = [0.2, 0.3, 0.4, 0.5]
# Change to directory of files. Should have subfolders 'continue_val' and 'stop_val'
data_root = "/scratch-shared/CORTEX/public.spider.surfsara.nl/lofarvwf/jdejong/CORTEX/calibrator_selection_robertjan/cnn_data"
# Uses cached confusion matrix for testing the plotting functionalities
if model_name == "surf/dinov2_09739_rotations" and TESTING:
confusion_matrices = np.asarray(
[
[[149, 56], [2, 116]],
[[178, 27], [4, 114]],
[[190, 15], [6, 112]],
[[191, 14], [7, 111]],
]
)
else:

dataloader = get_dataloader(data_root, mode="val")

predictor = load_model(architecture_name, model_name, device=DEVICE)

mean, std = predictor.args["dataset_mean"], predictor.args["dataset_std"]

confusion_matrices = get_confusion_matrix(
predictor, dataloader, mean, std, thresholds
)

print(confusion_matrices)

plot_conf_matrices(model_name.split("/")[-1], confusion_matrices, thresholds)
9 changes: 7 additions & 2 deletions neural_networks/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
matplotlib
torch
torch>=2.1.2
torchvision
torcheval
tqdm
matplotlib
joblib
astropy
astropy>6.0.0
xformers
tensorboard
dino-finetune @ git+https://github.com/sara-nl/dinov2-finetune.git
scikit-learn


21 changes: 19 additions & 2 deletions neural_networks/train_nn.job
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ SLURM_ARRAY_TASK_ID=${SLURM_ARRAY_TASK_ID:=1}
PARAMS=$(sed -n "${SLURM_ARRAY_TASK_ID}p" $PARAM_FILE)

# Parse the parameters
read model lr normalize dropout_p batch_size label_smoothing stochastic_smoothing use_lora rank alpha resize <<< $PARAMS
read model lr normalize dropout_p batch_size label_smoothing stochastic_smoothing use_lora rank alpha resize lift flip_augmentations <<< $PARAMS

if [ "$use_lora" -eq 1 ]; then
LORA_ARG="--use_lora"
Expand All @@ -36,7 +36,24 @@ else
STOCHASTIC_SMOOTHING=""
fi

if [ "$flip_augmentations" -eq 1 ]; then
FLIP_AUGMENTATIONS="--flip_augmentations"
else
FLIP_AUGMENTATIONS=""
fi

# Scale up by 1e6 to convert to integers for comparison
scaled_lr=$(echo "$lr * 1000000" | awk '{printf("%d", $1)}')
scaled_threshold=$(echo "4e-05 * 1000000" | awk '{printf("%d", $1)}')

if [ "$scaled_lr" -le "$scaled_threshold" ]; then
EPOCHS="250"
else
EPOCHS="120"
fi

DATA_TRAINDATA_PATH="/scratch-shared/CORTEX/public.spider.surfsara.nl/lofarvwf/jdejong/CORTEX/calibrator_selection_robertjan/cnn_data/"

# Execute your Python script with the given parameters
python train_nn.py $DATA_TRAINDATA_PATH --model $model --lr $lr --normalize $normalize --dropout_p $dropout_p --batch_size $batch_size --log_path grid_search_2 --label_smoothing $label_smoothing --rank $rank --resize $resize --alpha $alpha $LORA_ARG $STOCHASTIC_SMOOTHING -d
echo $DATA_TRAINDATA_PATH --model $model --lr $lr --normalize $normalize --dropout_p $dropout_p --batch_size $batch_size --log_path grid_search_2 --label_smoothing $label_smoothing --rank $rank --resize $resize --alpha $alpha $LORA_ARG $STOCHASTIC_SMOOTHING -d --epochs $EPOCHS --lift $lift $FLIP_AUGMENTATIONS
python train_nn.py $DATA_TRAINDATA_PATH --model $model --lr $lr --normalize $normalize --dropout_p $dropout_p --batch_size $batch_size --log_path grid_search_2 --label_smoothing $label_smoothing --rank $rank --resize $resize --alpha $alpha $LORA_ARG $STOCHASTIC_SMOOTHING -d --epochs $EPOCHS --lift $lift $FLIP_AUGMENTATIONS
Loading

0 comments on commit 5e7b459

Please sign in to comment.