diff --git a/src/earthkit/regrid/utils/matrix.py b/src/earthkit/regrid/utils/matrix.py index 41b1cc9..55a52c1 100644 --- a/src/earthkit/regrid/utils/matrix.py +++ b/src/earthkit/regrid/utils/matrix.py @@ -11,6 +11,8 @@ import json import os +from scipy.sparse import load_npz + from earthkit.regrid.db import VERSION, MatrixIndex from .mir import mir_cached_matrix_to_file @@ -64,6 +66,17 @@ def get_method_name(entry): return method +def matrix_memory_size(m): + # see: https://stackoverflow.com/questions/11173019/determining-the-byte-size-of-a-scipy-sparse-matrix + try: + # TODO: This works for bsr, csc and csr matrices but not for other types. + return m.data.nbytes + m.indptr.nbytes + m.indices.nbytes + + except Exception as e: + print(e) + return 0 + + def make_matrix( input_path, output_path, index_file=None, global_input=None, global_output=None ): @@ -93,6 +106,7 @@ def make_matrix( m["input"] = entry["input"] m["output"] = entry["output"] m["interpolation"] = inter_ori + key = make_sha(m) name = key @@ -123,10 +137,17 @@ def convert(x): if global_output is not None and "global" not in entry["output"]: entry["output"]["global"] = 1 if global_output else 0 + # get matrix size + z = load_npz(npz_file) + mem_size = matrix_memory_size(z) + z = None + index["matrix"][key] = dict( input=convert(entry["input"]), output=convert(entry["output"]), interpolation=entry["interpolation"], + nnz=entry["matrix"]["nnz"], + memory=mem_size, ) with open(index_file, "w") as f: diff --git a/tools/manage/build_db.py b/tools/manage/build_db.py index 382ab37..3af6fdd 100644 --- a/tools/manage/build_db.py +++ b/tools/manage/build_db.py @@ -102,7 +102,7 @@ "O_rgg": [400], } -build_root_dir = "_build_20241021" +build_root_dir = "_build_20241123" build_dir = os.path.join(build_root_dir, "db") # extra = [["0.25x0.25", "N320"], ["O1280", "N320"], ["5x5", "10x10"]] @@ -115,7 +115,7 @@ index_file = os.path.join(build_dir, "index.json") -for method in ["grid-box-average"]: # ["linear", "nn", "grid-box-average"]: +for method in ["linear", "nn", "grid-box-average"]: matrix_dir = os.path.join(build_dir, f"matrices_{method}") for g_in in in_grids: