diff --git a/lib/model_taxonomy_dataframe.py b/lib/model_taxonomy_dataframe.py index df32730..8070ae5 100644 --- a/lib/model_taxonomy_dataframe.py +++ b/lib/model_taxonomy_dataframe.py @@ -1,4 +1,5 @@ import pandas as pd +import math class ModelTaxonomyDataframe: @@ -38,6 +39,10 @@ def load_mapping(self, path, thresholds_path): if thresholds_path is not None: thresholds = pd.read_csv(thresholds_path)[["taxon_id", "thres"]]. \ rename(columns={"thres": "geo_threshold"}).set_index("taxon_id").sort_index() + # round thresholds down to 5 decimal places + thresholds["geo_threshold"] = thresholds["geo_threshold"].apply( + lambda x: math.floor(x * 100000) / 100000 + ) self.df = self.df.join(thresholds) # create a data frame with just the leaf taxa using leaf_class_id as the index