Skip to content

Commit

Permalink
updated version; added smoother handling of data with all records fai…
Browse files Browse the repository at this point in the history
…ling data checks
  • Loading branch information
jarathomas committed Apr 3, 2024
1 parent c2452a9 commit 82ab851
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 11 deletions.
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ rpy2 = "*"
[packages]
pandas = "*"
numpy = "*"
vacheck = "*"
vacheck = ">=0.0.3"
xlrd = "*"
2 changes: 1 addition & 1 deletion interva/__version__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
__title__ = "interva"
__description__ = "Python implementation of the InterVA Algorithm."
__url__ = "https://github.com/verbal-autopsy-software/interva"
__version__ = "0.0.6"
__version__ = "0.0.7"
__author__ = "Sherry Zhao & Jason Thomas"
__author_email__ = "[email protected]"
__license__ = "GPLv3"
35 changes: 26 additions & 9 deletions interva/interva5.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import datetime
from pkgutil import get_data
from io import BytesIO
import warnings

from interva.data.causetext import CAUSETEXTV5
from interva.utils import _get_dem_groups
Expand Down Expand Up @@ -607,10 +608,16 @@ def run(self) -> None:
"WHOLEPROB"]
VA_result.drop(nan_indices, axis=0, inplace=True)
else:
# TODO: add get_errors() function (similar to pyinsilicova)
warnings.warn(
"NO VALID VA RECORDS (datacheck procedure invalidated all "
"deaths)! Check error log for more details."
)
VA_result = None

dem_group = DataFrame(list_dem_group)
self.dem_group = dem_group.set_index("ID")
if len(list_dem_group) > 0:
dem_group = DataFrame(list_dem_group)
self.dem_group = dem_group.set_index("ID")

self.results = {"ID": ID_list,
"VA5": VA_result,
Expand All @@ -636,21 +643,21 @@ def set_hiv(self, hiv_level: str) -> str:
hiv_lvl = hiv_level.lower()
if hiv_lvl in ["h", "l", "v"]:
self.hiv = hiv_lvl
print(f"HIV parameter is {self.hiv}")
else:
print(f"The provided HIV level '{hiv_level}' is invalid.")
return self.hiv
print(f"HIV parameter is {self.hiv}")

def set_malaria(self, malaria_level: str) -> str:
"""Set malaria parameter."""

malaria_lvl = malaria_level.lower()
if malaria_lvl in ["h", "l", "v"]:
self.malaria = malaria_lvl
print(f"Malaria parameter is {self.malaria}")
else:
print(f"The provided malaria level '{malaria_level}' is invalid.")
return self.malaria
print(f"Malaria parameter is {self.malaria}")

def get_ids(self) -> Series:
"""Return pandas series of ID column in data."""
Expand All @@ -677,6 +684,13 @@ def get_csmf(self, top: int = 10,
:rtype: pandas.series
"""

if len(self.results) == 0:
print("No results. Use run() method to assign causes.")
return None
if self.results["VA5"] is None:
print("No results found. Check error log. It is likely that "
"all records failed the data consistency checks.")
return None
va = self.results["VA5"]
set_option("display.max_rows", None)
set_option("display.max_columns", None)
Expand Down Expand Up @@ -738,8 +752,10 @@ def get_csmf(self, top: int = 10,
dist = None
for i in range(len(va)):
if va.iloc[i, 14] is not None:
dist = [[0 for _ in range(len(va.iloc[i, 14]))]]
# dist = [[0 for _ in range(len(va.iloc[i, 14]))]]
dist = array([0] * len(va.iloc[i, 14]))
break
# what if dist is still None at this point? ex dataset 2?
undeter = 0

# Pick not simply the top # causes,
Expand Down Expand Up @@ -786,10 +802,11 @@ def get_csmf(self, top: int = 10,
this_dist[k] = 0

if va.iloc[i, 14] is not None:
if i == 0:
dist = this_dist
else:
dist = dist + this_dist
# if i == 0:
# dist = this_dist
# else:
# dist = dist + this_dist
dist = dist + this_dist

dist = Series(dist)
dist_cod = None
Expand Down
1 change: 1 addition & 0 deletions interva/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ def _csmf_with_interva_rule(va5: DataFrame) -> Series:
for i in va.index:
if va.loc[i, "WHOLEPROB"] is not None:
dist = zeros(len(va.loc[i, "WHOLEPROB"]))
# TODO: this has been changed (to fix bug) in interva5.py
break
undetermined = 0

Expand Down

0 comments on commit 82ab851

Please sign in to comment.