Skip to content

Commit

Permalink
clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
mieskolainen committed Oct 19, 2024
1 parent deb5f33 commit 69765bf
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 49 deletions.
22 changes: 11 additions & 11 deletions .github/workflows/icenet-install-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,37 +104,37 @@ jobs:
#
- name: Deep Learning system integration test (brem 1)
run: |
source setenv-github-actions.sh && maxevents=10000; source tests/runme_brem.sh && echo "yes"
source superclean.sh
source setenv-github-actions.sh && maxevents=10000; source tests/runme_brem.sh
echo "yes" | source superclean.sh
#
- name: Deep Learning system integration test (brem 2)
run: |
source setenv-github-actions.sh && maxevents=10000; source tests/runme_brem_reweight.sh && echo "yes"
source superclean.sh
source setenv-github-actions.sh && maxevents=10000; source tests/runme_brem_reweight.sh
echo "yes" | source superclean.sh
#
- name: Deep Learning system integration test (zee 1)
run: |
source setenv-github-actions.sh && maxevents=1000; source tests/runme_zee.sh && echo "yes"
source setenv-github-actions.sh && maxevents=1000; source tests/runme_zee.sh
#
- name: Deep Learning system integration test (zee 2)
run: |
source setenv-github-actions.sh && maxevents=1000; source tests/runme_zee_mixed.sh && echo "yes"
source superclean.sh
source setenv-github-actions.sh && maxevents=1000; source tests/runme_zee_mixed.sh
echo "yes" | source superclean.sh
#
- name: Deep Learning system integration test (hnl 1)
run: |
source setenv-github-actions.sh && maxevents=1000; source tests/runme_hnl.sh && echo "yes"
source superclean.sh
source setenv-github-actions.sh && maxevents=1000; source tests/runme_hnl.sh
echo "yes" | source superclean.sh
#
- name: Deep Learning system integration test (trg 1)
run: |
source setenv-github-actions.sh && maxevents=10000; source tests/runme_trg.sh && echo "yes"
source superclean.sh
source setenv-github-actions.sh && maxevents=10000; source tests/runme_trg.sh
echo "yes" | source superclean.sh
#
- name: Deep Learning system integration test (eid 1)
Expand Down
78 changes: 40 additions & 38 deletions icenet/tools/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def read_config(config_path='configs/xyz/', runmode='all'):

args = {}
config_yaml_file = cli.config
with open(f'{cwd}/{config_path}/{config_yaml_file}', 'r') as f:
with open(os.path.join(cwd, config_path, config_yaml_file), 'r') as f:
try:
args = yaml.load(f, Loader=yaml.FullLoader)

Expand All @@ -130,7 +130,7 @@ def read_config(config_path='configs/xyz/', runmode='all'):
yaml = YAML()
yaml.allow_duplicate_keys = True

with open(f'{cwd}/{config_path}/{file}', 'r') as f:
with open(os.path.join(cwd, config_path, file), 'r') as f:
try:
inputmap = yaml.load(f)

Expand Down Expand Up @@ -223,9 +223,9 @@ def read_config(config_path='configs/xyz/', runmode='all'):
hash_args = {}

# Critical Python files content
files = {'cuts': f'{cwd}/{config_path}/cuts.py',
'filter': f'{cwd}/{config_path}/filter.py',
'inputvars': f'{cwd}/{config_path}/{args["inputvars"]}.py'}
files = {'cuts': os.path.join(cwd, config_path, 'cuts.py'),
'filter': os.path.join(cwd, config_path, 'filter.py'),
'inputvars': os.path.join(cwd, config_path, f'{args["inputvars"]}.py')}

for key in files.keys():
if os.path.exists(files[key]):
Expand Down Expand Up @@ -524,10 +524,10 @@ def get_chunk_ind(N):
chunks = int(np.ceil(N / args['pickle_size']))
return aux.split_start_end(range(N), chunks)

cache_directory = aux.makedir(f'{args["datadir"]}/data__{args["__hash_genesis__"]}')
cache_directory = aux.makedir(os.path.join(args["datadir"], f'data__{args["__hash_genesis__"]}'))

# Check do we have already computed pickles ready
if (os.path.exists(f'{cache_directory}/output_0.pkl') and args['__use_cache__']):
if (os.path.exists(os.path.join(cache_directory, 'output_0.pkl')) and args['__use_cache__']):
print(f'Found existing pickle data under: {cache_directory} and --use_cache 1. [done] ', 'green')
return

Expand All @@ -548,7 +548,7 @@ def get_chunk_ind(N):
## New code

def save_pickle(i):
with open(f'{cache_directory}/output_{i}.pkl', 'wb') as handle:
with open(os.path.join(cache_directory, f'output_{i}.pkl'), 'wb') as handle:
pickle.dump([X[C[i][0]:C[i][-1]], Y[C[i][0]:C[i][-1]], W[C[i][0]:C[i][-1]], ids, info, args],
handle, protocol=pickle.HIGHEST_PROTOCOL)

Expand All @@ -566,15 +566,15 @@ def save_pickle(i):
print(f'Saving took {toc:0.2f} sec')

# Save args
aux.yaml_dump(data=args, filename=f'{args["datadir"]}/data__{args["__hash_genesis__"]}.yml')
aux.yaml_dump(data=args, filename=os.path.join(args["datadir"], f'data__{args["__hash_genesis__"]}.yml'))

"""
# OLD code
tic = time.time()
for i in tqdm(range(len(C))):
with open(f'{cache_directory}/output_{i}.pkl', 'wb') as handle:
with open(os.path.join(cache_directory, f'output_{i}.pkl'), 'wb') as handle:
pickle.dump([X[C[i][0]:C[i][-1]], Y[C[i][0]:C[i][-1]], W[C[i][0]:C[i][-1]], ids, info, args], \
handle, protocol=pickle.HIGHEST_PROTOCOL)
Expand All @@ -600,9 +600,9 @@ def combine_pickle_data(args):

num_cpus = args['num_cpus']

cache_directory = aux.makedir(f'{args["datadir"]}/data__{args["__hash_genesis__"]}')

if (not os.path.exists(f'{cache_directory}/output_0.pkl')):
cache_directory = aux.makedir(os.path.join(args["datadir"], f'data__{args["__hash_genesis__"]}'))
if not os.path.exists(os.path.join(cache_directory, 'output_0.pkl')):
raise Exception(__name__ + f'.process_pickle_data: No genesis stage pickle data under "{cache_directory}" [execute --runmode genesis and set --maxevents N]')

## New version
Expand Down Expand Up @@ -658,9 +658,9 @@ def combine_pickle_data(args):
tic = time.time()
for i in tqdm(range(num_files)):
with open(f'{cache_directory}/output_{i}.pkl', 'rb') as handle:
with open(os.path.join(cache_directory, f'output_{i}.pkl'), 'rb') as handle:
X_, Y_, W_, ids, info, genesis_args = pickle.load(handle)
if i > 0:
X = np.concatenate((X, X_), axis=0) # awkward will cast numpy automatically
Y = np.concatenate((Y, Y_), axis=0)
Expand Down Expand Up @@ -688,7 +688,7 @@ def train_eval_data_processor(args, func_factor, mvavars, runmode):
# --------------------------------------------------------------------
# 1. Pickle data combiner step

cache_filename = f'{args["datadir"]}/data__{args["__hash_genesis__"]}.pkl'
cache_filename = os.path.join(args["datadir"], f'data__{args["__hash_genesis__"]}.pkl')

if args['__use_cache__'] == False or (not os.path.exists(cache_filename)):

Expand Down Expand Up @@ -729,7 +729,7 @@ def train_eval_data_processor(args, func_factor, mvavars, runmode):
# --------------------------------------------------------------------
# 2. High level data step

cache_filename = f'{args["datadir"]}/processed_data__{runmode}__{args["__hash_post_genesis__"]}.pkl'
cache_filename = os.path.join(args["datadir"], f'processed_data__{runmode}__{args["__hash_post_genesis__"]}.pkl')

if args['__use_cache__'] == False or (not os.path.exists(cache_filename)):

Expand Down Expand Up @@ -1020,21 +1020,20 @@ def train_models(data_trn, data_val, args=None):
# -----------------------------
# Prepare output folders

targetdir = f'{args["plotdir"]}/train'
targetdir = os.path.join(f'{args["plotdir"]}', 'train')

subdirs = ['']
for sd in subdirs:
os.makedirs(targetdir + '/' + sd, exist_ok = True)
os.makedirs(os.path.join(targetdir, sd), exist_ok = True)
# ----------------------------------

# Print training stats
output_file = f'{args["plotdir"]}/train/stats_train_weights.log'
output_file = os.path.join(args["plotdir"], 'train', 'stats_train_weights.log')
prints.print_weights(weights=data_trn['data'].w, y=data_trn['data'].y, output_file=output_file)

output_file = f'{args["plotdir"]}/train/stats_validate_weights.log'
output_file = os.path.join(args["plotdir"], 'train', 'stats_validate_weights.log')
prints.print_weights(weights=data_val['data'].w, y=data_val['data'].y, output_file=output_file)


# @@ Tensor normalization @@
if data_trn['data_tensor'] is not None and (args['varnorm_tensor'] == 'zscore'):

Expand All @@ -1046,7 +1045,8 @@ def train_models(data_trn, data_val, args=None):
data_val['data_tensor'] = io.apply_zscore_tensor(data_val['data_tensor'], X_mu_tensor, X_std_tensor)

# Save it for the evaluation
pickle.dump({'X_mu_tensor': X_mu_tensor, 'X_std_tensor': X_std_tensor}, open(args["modeldir"] + '/zscore_tensor.pkl', 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
pickle.dump({'X_mu_tensor': X_mu_tensor, 'X_std_tensor': X_std_tensor},
open(os.path.join(args["modeldir"], 'zscore_tensor.pkl'), 'wb'), protocol=pickle.HIGHEST_PROTOCOL)

# --------------------------------------------------------------------

Expand Down Expand Up @@ -1128,10 +1128,11 @@ def train_models(data_trn, data_val, args=None):
data_val['data'].x = io.apply_zscore(data_val['data'].x, X_mu, X_std)

# Save it for the evaluation
pickle.dump({'X_mu': X_mu, 'X_std': X_std, 'ids': data_trn['data'].ids}, open(args['modeldir'] + '/zscore.pkl', 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
pickle.dump({'X_mu': X_mu, 'X_std': X_std, 'ids': data_trn['data'].ids},
open(os.path.join(args["modeldir"], 'zscore.pkl'), 'wb'), protocol=pickle.HIGHEST_PROTOCOL)

# Print train
output_file = f'{args["plotdir"]}/train/stats_train_{args["varnorm"]}.log'
output_file = os.path.join(f'{args["plotdir"]}', 'train', f'stats_train_{args["varnorm"]}.log')
prints.print_variables(data_trn['data'].x, data_trn['data'].ids, W=data_trn['data'].w, output_file=output_file)

elif args['varnorm'] == 'madscore' :
Expand All @@ -1144,10 +1145,11 @@ def train_models(data_trn, data_val, args=None):
data_val['data'].x = io.apply_zscore(data_val['data'].x, X_m, X_mad)

# Save it for the evaluation
pickle.dump({'X_m': X_m, 'X_mad': X_mad, 'ids': data_trn['data'].ids}, open(args['modeldir'] + '/madscore.pkl', 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
pickle.dump({'X_m': X_m, 'X_mad': X_mad, 'ids': data_trn['data'].ids},
open(os.path.join(args["modeldir"], 'madscore.pkl'), 'wb'), protocol=pickle.HIGHEST_PROTOCOL)

# Print train
output_file = f'{args["plotdir"]}/train/stats_train_{args["varnorm"]}.log'
output_file = os.path.join(f'{args["plotdir"]}', 'train', f'stats_train_{args["varnorm"]}.log')
prints.print_variables(data_trn['data'].x, data_trn['data'].ids, W=data_trn['data'].w, output_file=output_file)

# -------------------------------------------------------------
Expand Down Expand Up @@ -1314,7 +1316,7 @@ def set_distillation_drain(ID, param, inputs, dtype='torch'):
else:
raise Exception(__name__ + f".train_models: Unsupported distillation source <{param['train']}>")
# --------------------------------------------------------

except KeyboardInterrupt:
print(f'CTRL+C catched -- continue with the next model', 'red')

Expand Down Expand Up @@ -1373,16 +1375,16 @@ def evaluate_models(data=None, info=None, args=None):
# -----------------------------
# Prepare output folders

targetdir = f'{args["plotdir"]}/eval'
targetdir = os.path.join(f'{args["plotdir"]}', 'eval')

subdirs = ['', 'ROC', 'MVA', 'COR']
for sd in subdirs:
os.makedirs(targetdir + '/' + sd, exist_ok = True)
os.makedirs(os.path.join(targetdir, sd), exist_ok = True)

# --------------------------------------------------------------------

# Print evaluation stats
output_file = f'{args["plotdir"]}/eval/stats_eval_weights.log'
output_file = os.path.join(args["plotdir"], 'eval', 'stats_eval_weights.log')
prints.print_weights(weights=data['data'].w, y=data['data'].y, output_file=output_file)


Expand Down Expand Up @@ -1443,7 +1445,7 @@ def evaluate_models(data=None, info=None, args=None):
if data['data_tensor'] is not None and (args['varnorm_tensor'] == 'zscore'):

print('\nZ-score normalizing tensor variables ...', 'magenta')
Z_data = pickle.load(open(args["modeldir"] + '/zscore_tensor.pkl', 'rb'))
Z_data = pickle.load(open(os.path.join(args["modeldir"], 'zscore_tensor.pkl'), 'rb'))
X_mu_tensor = Z_data['X_mu_tensor']
X_std_tensor = Z_data['X_std_tensor']

Expand All @@ -1453,25 +1455,25 @@ def evaluate_models(data=None, info=None, args=None):
if args['varnorm'] == 'zscore' or args['varnorm'] == 'zscore-weighted':

print('\nZ-score normalizing variables ...', 'magenta')
Z_data = pickle.load(open(args["modeldir"] + '/zscore.pkl', 'rb'))
Z_data = pickle.load(open(os.path.join(args["modeldir"], 'zscore.pkl'), 'rb'))
X_mu = Z_data['X_mu']
X_std = Z_data['X_std']

X = io.apply_zscore(X, X_mu, X_std)

output_file = f'{args["plotdir"]}/eval/stats_variables_{args["varnorm"]}.log'
output_file = os.path.join(args["plotdir"], 'eval', f'stats_variables_{args["varnorm"]}.log')
prints.print_variables(X, ids, weights, output_file=output_file)

elif args['varnorm'] == 'madscore':

print('\nMAD-score normalizing variables ...', 'magenta')
Z_data = pickle.load(open(args["modeldir"] + '/madscore.pkl', 'rb'))
Z_data = pickle.load(open(os.path.join(args["modeldir"], 'madscore.pkl'), 'rb'))
X_m = Z_data['X_m']
X_mad = Z_data['X_mad']

X = io.apply_madscore(X, X_m, X_mad)

output_file = f'{args["plotdir"]}/eval/stats_variables_{args["varnorm"]}.log'
output_file = os.path.join(args["plotdir"], 'eval', f'stats_variables_{args["varnorm"]}.log')
prints.print_variables(X, ids, weights, output_file=output_file)

except Exception as e:
Expand Down Expand Up @@ -1606,7 +1608,7 @@ def evaluate_models(data=None, info=None, args=None):

if args['plot_param']['contours']['active']:
plots.plot_contour_grid(pred_func=func_predict, X=X_RAW, y=y, ids=ids_RAW, transform='numpy',
targetdir=aux.makedir(f'{args["plotdir"]}/eval/2D-contours/{param["label"]}/'))
targetdir=aux.makedir(os.path.join(f'{args["plotdir"]}', 'eval/2D-contours', f'{param["label"]}')))
else:
raise Exception(__name__ + f'.Unknown param["predict"] = {param["predict"]} for ID = {ID}')

Expand All @@ -1632,7 +1634,7 @@ def evaluate_models(data=None, info=None, args=None):
'ROC_binned_mlabel': ROC_binned_mlabel,
'info': info}

targetfile = targetdir + '/eval_results.pkl'
targetfile = os.path.join(targetdir, 'eval_results.pkl')
print(f'Saving pickle output to:')
print(f'{targetfile}')

Expand Down

0 comments on commit 69765bf

Please sign in to comment.