Skip to content

Commit

Permalink
Working on ...
Browse files Browse the repository at this point in the history
  • Loading branch information
benchimols committed Dec 1, 2023
1 parent 9eac9bd commit d549e4c
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 25 deletions.
28 changes: 5 additions & 23 deletions cfgrib/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import json
import os.path
import typing as T
from pathlib import Path

import click

Expand Down Expand Up @@ -180,31 +179,14 @@ def dump(inpaths, variable, cdm, engine):
@cfgrib_cli.command("build_index")
@click.argument("inpaths", nargs=-1, required=True)
@click.option("--index-basedir", default=None)
@click.option("--force", default=None)
def build_index(inpaths, index_basedir, force):
@click.option("--force-index-creation", default=None)
def build_index(inpaths, index_basedir, force_index_creation):
# type: (T.List[str], str, bool) -> None
from .messages import FileStream, FileIndex
from .dataset import compute_index_keys

index_keys = compute_index_keys(("time", "step", "shortName"), {})
indexpath = "{path}.idx"
if index_basedir:
indexpath = os.path.join(index_basedir, '{path}.idx')
from .dataset import get_or_create_index

for fp in inpaths:
fp_idx = Path(indexpath.format(path=fp))
if force:
fp_idx.unlink(missing_ok=True)

print(f"{fp}: Creating index to {fp_idx}")
stream = FileStream(str(fp))
index = FileIndex.from_indexpath_or_filestream(
filestream=stream,
index_keys=index_keys,
indexpath=indexpath
)


print(f"{fp}: Creating index")
get_or_create_index(str(fp), index_basedir, force_index_creation)


if __name__ == "__main__": # pragma: no cover
Expand Down
14 changes: 14 additions & 0 deletions cfgrib/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import logging
import os
import typing as T
from pathlib import Path

import attr
import numpy as np
Expand Down Expand Up @@ -797,3 +798,16 @@ def open_file(
index = open_fileindex(stream, indexpath, index_keys, filter_by_keys=filter_by_keys)

return open_from_index(index, read_keys, time_dims, extra_coords, errors=errors, **kwargs)


def get_or_create_index(fp: str | Path, index_basedir: str | Path, force_index_creation: bool=False) -> messages.FileIndex:
""" Create a pygrib index file """
index_keys = compute_index_keys()
stream = messages.FileStream(str(fp))
index = messages.FileIndex.from_indexpath_or_filestream(
filestream=stream,
index_keys=index_keys,
indexpath=str(os.path.join(index_basedir, '{path}.idx')),
force_index_creation=force_index_creation
)
return index
9 changes: 7 additions & 2 deletions cfgrib/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,16 +520,21 @@ class FileIndex(FieldsetIndex):

@classmethod
def from_indexpath_or_filestream(
cls, filestream, index_keys, indexpath=DEFAULT_INDEXPATH, computed_keys={}, log=LOG
cls, filestream, index_keys, indexpath=DEFAULT_INDEXPATH, computed_keys={}, log=LOG,
force_index_creation=False
):
# type: (FileStream, T.Sequence[str], str, ComputedKeysType, logging.Logger) -> FileIndex
# type: (FileStream, T.Sequence[str], str, ComputedKeysType, logging.Logger, bool) -> FileIndex

# Reading and writing the index can be explicitly suppressed by passing indexpath==''.
if not indexpath:
return cls.from_fieldset(filestream, index_keys, computed_keys)

hash = hashlib.md5(repr(index_keys).encode("utf-8")).hexdigest()
indexpath = indexpath.format(path=filestream.path, hash=hash, short_hash=hash[:5])

if force_index_creation and os.path.exists(indexpath):
os.unlink(indexpath)

try:
with compat_create_exclusive(indexpath) as new_index_file:
self = cls.from_fieldset(filestream, index_keys, computed_keys)
Expand Down
8 changes: 8 additions & 0 deletions tests/test_30_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,3 +324,11 @@ def test_missing_field_values() -> None:
t2 = res.variables["t2m"]
assert np.isclose(np.nanmean(t2.data[0, :, :]), 268.375)
assert np.isclose(np.nanmean(t2.data[1, :, :]), 270.716)


def test_get_or_create_index(tmpdir) -> None:
index = dataset.get_or_create_index(TEST_DATA, os.path.join(tmpdir, "indexes"))
assert isinstance(index, messages.FileIndex)

index = dataset.get_or_create_index(TEST_DATA, os.path.join(tmpdir, "indexes"), force_index_creation=True)
assert isinstance(index, messages.FileIndex)

0 comments on commit d549e4c

Please sign in to comment.