Skip to content

Commit

Permalink
Another tweak to intake metadata problems (#3595)
Browse files Browse the repository at this point in the history
* Another tweak to intake metadata problems

Make sure we can't end up with undefined `metadata`.

Record details of `metadata.log` access to `run.controller` without adding
a ton of separate messages.
  • Loading branch information
dbutenhof authored Jan 9, 2024
1 parent 5d9ce4d commit 3db0cf9
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 27 deletions.
46 changes: 27 additions & 19 deletions lib/pbench/server/cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -834,24 +834,22 @@ def get_inventory(self, path: str) -> dict[str, Any]:
return {"name": path, "type": type, "stream": stream}

@staticmethod
def _get_metadata(tarball_path: Path) -> Optional[JSONOBJECT]:
def _get_metadata(tarball_path: Path) -> JSONOBJECT:
"""Fetch the values in metadata.log from the tarball.
Args:
tarball_path: a file path to a tarball
Returns:
A JSON representation of the dataset `metadata.log` or None if the
tarball has no metadata.log.
A JSON representation of the dataset `metadata.log`
"""
name = Dataset.stem(tarball_path)
try:
data = Tarball.extract(tarball_path, f"{name}/metadata.log")
except CacheExtractBadPath:
return None
else:
metadata_log = MetadataLog()
metadata_log.read_file(e.decode() for e in data)
data.close()
metadata = {s: dict(metadata_log.items(s)) for s in metadata_log.sections()}
return metadata
data = Tarball.extract(tarball_path, f"{name}/metadata.log")
metadata_log = MetadataLog()
metadata_log.read_file(e.decode() for e in data)
data.close()
metadata = {s: dict(metadata_log.items(s)) for s in metadata_log.sections()}
return metadata

@staticmethod
def subprocess_run(
Expand Down Expand Up @@ -1442,18 +1440,28 @@ def create(self, tarfile_path: Path) -> Tarball:
raise BadFilename(tarfile_path)
name = Dataset.stem(tarfile_path)
controller_name = None
errwhy = "unknown"
try:
metadata = Tarball._get_metadata(tarfile_path)
controller_name = metadata["run"]["controller"]
except Exception as exc:
self.logger.warning(
"{} metadata.log is missing run.controller: {!r}", name, exc
)
except Exception as e:
metadata = None
errwhy = str(e)
else:
run = metadata.get("run")
if run:
controller_name = run.get("controller")
if not controller_name:
errwhy = "missing 'controller' in 'run' section"
else:
errwhy = "missing 'run' section"

if not controller_name:
controller_name = "unknown"
self.logger.warning(
"{} has no controller name, assuming {!r}", name, controller_name
"{} has no controller name ({}), assuming {!r}",
name,
errwhy,
controller_name,
)

if name in self.tarballs:
Expand Down
38 changes: 30 additions & 8 deletions lib/pbench/test/unit/server/test_cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,8 @@ def test_create_bad(
# The create call will remove the source files, so trying again should
# result in an error.
with pytest.raises(BadFilename) as exc:
cm.create(source_md5)
assert exc.value.path == str(source_md5)
cm.create(source_tarball)
assert exc.value.path == str(source_tarball)

# Attempting to create the same dataset again (from the archive copy)
# should fail with a duplicate dataset error.
Expand All @@ -262,6 +262,29 @@ def test_create_bad(
assert tarball.metadata == fake_get_metadata(tarball.tarball_path)
assert exc.value.tarball == tarball.name

def test_create_null_metalog(
self,
monkeypatch,
db_session,
selinux_disabled,
server_config,
make_logger,
tarball,
):
"""Test create when we can't get metadata.log"""

def get_throw(_p: Path):
raise Exception("Catch!")

source_tarball, source_md5, md5 = tarball
cm = CacheManager(server_config, make_logger)
monkeypatch.setattr(Tarball, "_get_metadata", get_throw)

# Attempting to create a dataset with a metadata error should succeed,
# with "unknown" controller
tarobj = cm.create(source_tarball)
assert tarobj.controller_name == "unknown"

def test_duplicate(
self,
monkeypatch,
Expand Down Expand Up @@ -1221,12 +1244,11 @@ def fake_extract(t: Path, f: Path):

with monkeypatch.context() as m:
m.setattr(Tarball, "extract", staticmethod(fake_extract))
metadata = Tarball._get_metadata(Path(tarball))

if stream:
assert metadata == {"test": {"foo": "bar"}}
else:
assert metadata is None
try:
metadata = Tarball._get_metadata(Path(tarball))
assert stream and metadata == {"test": {"foo": "bar"}}
except Exception as e:
assert isinstance(e, CacheExtractBadPath) and not stream

def test_inventory_without_subprocess(self):
"""Test the Inventory class when used without a subprocess
Expand Down

0 comments on commit 3db0cf9

Please sign in to comment.