Skip to content

Commit

Permalink
Cleanup filter script CLI error handling (#102)
Browse files Browse the repository at this point in the history
Fix exit code values to 1 and print error messages to stdout and update unit tests accordingly.
  • Loading branch information
laurejt authored Oct 7, 2024
1 parent e86be21 commit b10ed27
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 20 deletions.
25 changes: 13 additions & 12 deletions src/corppa/utils/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,19 +296,19 @@ def main():

if args.idfile:
if not args.idfile.is_file():
print(f"Error: idfile {args.idfile} does not exist")
sys.exit(-1)
print(f"Error: idfile {args.idfile} does not exist", file=sys.stderr)
sys.exit(1)
elif args.idfile.stat().st_size == 0:
print(f"Error: idfile {args.idfile} is zero size")
sys.exit(-1)
print(f"Error: idfile {args.idfile} is zero size", file=sys.stderr)
sys.exit(1)

if args.pgfile:
if not args.pgfile.is_file():
print(f"Error: pgfile {args.pgfile} does not exist")
print(f"Error: pgfile {args.pgfile} does not exist", file=sys.stderr)
sys.exit(1)
elif args.pgfile.stat().st_size == 0:
print(f"Error: pgfile {args.pgfile} is zero size")
sys.exit(-1)
print(f"Error: pgfile {args.pgfile} is zero size", file=sys.stderr)
sys.exit(1)

# if requested output filename has no extension, add jsonl
output_filepath = args.output
Expand All @@ -317,9 +317,10 @@ def main():

if output_filepath.is_file():
print(
f"Error: requested output file {args.output} already exists; not overwriting"
f"Error: requested output file {args.output} already exists; not overwriting",
file=sys.stderr,
)
sys.exit(-1)
sys.exit(1)

try:
save_filtered_corpus(
Expand All @@ -334,12 +335,12 @@ def main():
except (FileNotFoundError, JSONDecodeError) as err:
# catch known possible errors and display briefly
# with the type of error and the brief message
print(f"{err.__class__.__name__}: {err}")
sys.exit(-1)
print(f"{err.__class__.__name__}: {err}", file=sys.stderr)
sys.exit(1)

# check if output file exists but is zero size (i.e., no pages selected)
if output_filepath.is_file() and output_filepath.stat().st_size == 0:
# if claenup is disabled, remove and report
# if cleanup is disabled, remove and report
if args.cleanup:
output_filepath.unlink()
print(
Expand Down
20 changes: 12 additions & 8 deletions test/test_utils/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,32 +360,35 @@ def test_main_idfile_nonexistent(mock_save_filtered_corpus, capsys):
with patch(
"sys.argv", ["f.py", "foo.jsonl", "out.jsonl", "--idfile", "/not/a/real/id.txt"]
):
with pytest.raises(SystemExit):
with pytest.raises(SystemExit) as execinfo:
main()
assert execinfo.value.code == 1
captured = capsys.readouterr()
assert "does not exist" in captured.out
assert "does not exist" in captured.err


@patch("corppa.utils.filter.save_filtered_corpus")
def test_main_idfile_empty(mock_save_filtered_corpus, capsys, tmp_path):
idfile = tmp_path / "id.txt"
idfile.touch()
with patch("sys.argv", ["f.py", "foo.jsonl", "out.jsonl", "--idfile", str(idfile)]):
with pytest.raises(SystemExit):
with pytest.raises(SystemExit) as execinfo:
main()
assert execinfo.value.code == 1
captured = capsys.readouterr()
assert "is zero size" in captured.out
assert "is zero size" in captured.err


@patch("corppa.utils.filter.save_filtered_corpus")
def test_main_pgfile_empty(mock_save_filtered_corpus, capsys, tmp_path):
pgfile = tmp_path / "pages.csv"
pgfile.touch()
with patch("sys.argv", ["f.py", "foo.jsonl", "out.jsonl", "--pgfile", str(pgfile)]):
with pytest.raises(SystemExit):
with pytest.raises(SystemExit) as execinfo:
main()
assert execinfo.value.code == 1
captured = capsys.readouterr()
assert "is zero size" in captured.out
assert "is zero size" in captured.err


@patch("corppa.utils.filter.save_filtered_corpus")
Expand All @@ -397,7 +400,8 @@ def test_main_outfile_exists(mock_save_filtered_corpus, capsys, tmp_path):
with patch(
"sys.argv", ["f.py", "foo.jsonl", str(outfile), "--idfile", str(idfile)]
):
with pytest.raises(SystemExit):
with pytest.raises(SystemExit) as execinfo:
main()
assert execinfo.value.code == 1
captured = capsys.readouterr()
assert "already exists" in captured.out
assert "already exists" in captured.err

0 comments on commit b10ed27

Please sign in to comment.