Skip to content

Commit

Permalink
move dataset cache dir to dataset directory
Browse files Browse the repository at this point in the history
  • Loading branch information
LVeefkind committed Oct 16, 2024
1 parent 8433c73 commit 2336f5f
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,5 @@ venv/
.cache/

_cache/
grid_search/
grid_search*/
public.spider.surfsara.nl/
20 changes: 10 additions & 10 deletions neural_networks/pre_processing_for_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@
import joblib
from matplotlib.colors import SymLogNorm
from torch.utils.data import Dataset
from concurrent.futures import ThreadPoolExecutor, as_completed

cache = joblib.Memory(location="_cache", verbose=0)


def get_rms(data: np.ndarray, maskSup=1e-7):
Expand Down Expand Up @@ -95,7 +92,7 @@ def transform_data(root_dir, classes=("continue", "stop"), modes=("", "_val")):
def process_fits(fits_path):
with fits.open(fits_path) as hdul:
image_data = hdul[0].data

# assert image_data.shape[2] == 2048, (image_data.shape, image_data.shape[2], fits_path)
transformed = normalize_fits(image_data)

np.savez_compressed(
Expand All @@ -111,7 +108,6 @@ def process_fits(fits_path):
for fits_path in (root_dir / (cls + mode)).glob("*.fits")
)


class FitsDataset(Dataset):
def __init__(self, root_dir, mode="train"):
"""
Expand All @@ -130,6 +126,8 @@ def __init__(self, root_dir, mode="train"):
ext = ".npz"
glob_ext = "*" + ext

self.root_dir = root_dir

for folder in (
root_dir / (cls + ("" if mode == "train" else "_val")) for cls in classes
):
Expand Down Expand Up @@ -170,16 +168,18 @@ def __init__(self, root_dir, mode="train"):
# print(f'{mode}: using the following sources: {sources}')

def compute_statistics(self, normalize):
self.mean, self.std = FitsDataset._compute_statistics(self, normalize)
cache = Memory(location=self.root_dir / '_cache')
cached_compute = cache.cache(FitsDataset._compute_statistics)
self.mean, self.std = cached_compute(self, normalize)
return self.mean, self.std

@staticmethod
@cache.cache()
def _compute_statistics(loader, normalize, verbose=True):
if not normalize:
return torch.asarray([0]), torch.asarray([1])
if verbose:
print("Computing dataset statistics")
if not normalize:
return torch.asarray([0]), torch.asarray([1])

means = []
sums_of_squares = []
f = (lambda x: torch.log(x + 1e-10)) if normalize == 2 else lambda x: x
Expand Down Expand Up @@ -281,7 +281,7 @@ def make_histogram(root_dir):


if __name__ == "__main__":
root = f"public.spider.surfsara.nl/project/lofarvwf/jdejong/CORTEX/calibrator_selection_robertjan/cnn_data"
root = f"/scratch-shared/CORTEX/public.spider.surfsara.nl/lofarvwf/jdejong/CORTEX/calibrator_selection_robertjan/cnn_data"
transform_data(root)

# make_histogram(root)
Expand Down
21 changes: 10 additions & 11 deletions neural_networks/train_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@
PROFILE = False
SEED = None

cache = joblib.Memory(location="_cache", verbose=0)


def init_vit(model_name):
assert model_name == "vit_l_16"
Expand Down Expand Up @@ -458,8 +456,7 @@ def main(
train_dataloader=train_dataloader,
optimizer=optimizer,
logging_interval=logging_interval,
label_smoothing=label_smoothing,
stochastic_smoothing=stochastic_smoothing,
smoothing_fn=partial(label_smoother, stochastic=stochastic_smoothing, smoothing_factor=label_smoothing),
)
val_step_f = partial(val_step_f, val_dataloader=val_dataloader)

Expand Down Expand Up @@ -558,6 +555,13 @@ def val_step(model, val_dataloader, global_step, metrics_logger, prepare_data_f)
return mean_loss, logits, targets


def label_smoother(labels: torch.tensor, smoothing_factor: float = 0.1, stochastic: bool = True):
smoothing_factor = smoothing_factor - (
torch.rand_like(labels) * smoothing_factor * stochastic
)
smoothed_label = (1 - smoothing_factor) * labels + 0.5 * smoothing_factor
return smoothed_label

def train_step(
model,
optimizer,
Expand All @@ -566,8 +570,7 @@ def train_step(
global_step,
logging_interval,
metrics_logger,
label_smoothing=0,
stochastic_smoothing=False,
smoothing_fn,
):
# print("training")
model.train()
Expand All @@ -578,11 +581,7 @@ def train_step(
global_step += 1

data, labels = prepare_data_f(data, labels)
# Stochastic smoothing factor
smoothing_factor = label_smoothing - (
torch.rand_like(labels) * label_smoothing * stochastic_smoothing
)
smoothed_label = (1 - smoothing_factor) * labels + 0.5 * smoothing_factor
smoothed_label = smoothing_fn(labels)
data = augmentation(data)

optimizer.zero_grad(set_to_none=True)
Expand Down

0 comments on commit 2336f5f

Please sign in to comment.