From 96cdf3deabaa8e961dc8fa2ff7fcabeb6b9126a0 Mon Sep 17 00:00:00 2001
From: Patrick Leary <patrick.r.leary@gmail.com>
Date: Tue, 6 Feb 2024 23:44:44 -0500
Subject: [PATCH 1/4] consolidating class usage; fix formatting issues

---
 .flake8                               |   2 +-
 Pipfile                               |   3 +
 forms.py                              |   7 +-
 generate_thresholds.py                | 338 +++++---------------
 lib/inat_inferrer.py                  |  65 ++--
 lib/inat_vision_api.py                |  44 ++-
 lib/model_taxonomy.py                 |  73 -----
 lib/model_taxonomy_dataframe.py       |  38 ++-
 lib/model_test_data_export_manager.py |   6 +-
 lib/model_test_data_exporter.py       |   7 +-
 lib/pt_geo_prior_model.py             |   2 +-
 lib/taxon.py                          |  30 --
 lib/tf_gp_elev_model.py               |   6 +-
 lib/vision_inferrer.py                |   4 +-
 lib/vision_testing.py                 |  43 ++-
 requirements.txt                      |   2 +
 taxon_range_evaluation.py             | 431 ++++++--------------------
 tests/test_inat_inferrer.py           |   2 +-
 tests/test_model_taxonomy.py          |  54 ----
 tests/test_taxon.py                   |  17 -
 tests/test_tf_gp_elev_model.py        |   2 +-
 21 files changed, 328 insertions(+), 848 deletions(-)
 create mode 100644 Pipfile
 delete mode 100644 lib/model_taxonomy.py
 delete mode 100644 lib/taxon.py
 delete mode 100644 tests/test_model_taxonomy.py
 delete mode 100644 tests/test_taxon.py

diff --git a/.flake8 b/.flake8
index 998a753..8cc6d52 100644
--- a/.flake8
+++ b/.flake8
@@ -1,6 +1,7 @@
 [flake8]
 ignore = D203
 max-line-length = 100
+inline-quotes = "
 exclude =
     .git,
     __pycache__,
@@ -11,4 +12,3 @@ exclude =
     test-obs*,
     venv
 max-complexity = 10
-
diff --git a/Pipfile b/Pipfile
new file mode 100644
index 0000000..6c49b0b
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,3 @@
+[scripts]
+tests = "pytest -s"
+coverage = "bash -c 'coverage run -m pytest -s && coverage report --show-missing'"
diff --git a/forms.py b/forms.py
index 420dd8c..26f0371 100644
--- a/forms.py
+++ b/forms.py
@@ -1,8 +1,11 @@
 from flask_wtf import FlaskForm
 from flask_wtf.file import FileField, FileRequired
 
+
 class ImageForm(FlaskForm):
-    image = FileField('image',
+    image = FileField(
+        "image",
         validators=[
             FileRequired(message="Please include 'image' field.")
-        ])
+        ]
+    )
diff --git a/generate_thresholds.py b/generate_thresholds.py
index 3f0bd14..47ff87a 100644
--- a/generate_thresholds.py
+++ b/generate_thresholds.py
@@ -4,218 +4,29 @@
 
 import argparse
 import tifffile
-import os
 import pandas as pd
 import numpy as np
 import h3
-import h3pandas
+import h3pandas  # noqa: F401
 import tensorflow as tf
-import csv
-import math
-import json
 from tqdm.auto import tqdm
-import tensorflow as tf
 from sklearn.metrics import precision_recall_curve
-import matplotlib.pyplot as plt    
 import warnings
+from lib.model_taxonomy_dataframe import ModelTaxonomyDataframe
+from lib.tf_gp_elev_model import TFGeoPriorModelElev
 
-class ResLayer(tf.keras.layers.Layer):
-    def __init__(self):
-        super(ResLayer, self).__init__()
-        self.w1 = tf.keras.layers.Dense(
-            256, activation="relu", kernel_initializer="he_normal"
-        )
-        self.w2 = tf.keras.layers.Dense(
-            256, activation="relu", kernel_initializer="he_normal"
-        )
-        self.dropout = tf.keras.layers.Dropout(rate=0.5)
-        self.add = tf.keras.layers.Add()
-
-    def call(self, inputs):
-        x = self.w1(inputs)
-        x = self.dropout(x)
-        x = self.w2(x)
-        x = self.add([x, inputs])
-        return x
-
-    def get_config(self):
-        return {}
-
-class Taxon:
-
-    def __init__(self, row):
-        for key in row:
-            setattr(self, key, row[key])
-
-    def set(self, attr, val):
-        setattr(self, attr, val)
-
-    def is_or_descendant_of(self, taxon):
-        if self.id == taxon.id:
-            return True
-        return self.descendant_of(taxon)
-
-    # using the nested set left and right values, a taxon is a descendant of another
-    # as long as its left is higher and its right is lower
-    def descendant_of(self, taxon):
-        return self.left > taxon.left and self.right < taxon.right
-
-class ModelTaxonomy:
-
-    def __init__(self, path):
-        self.load_mapping(path)
-        self.assign_nested_values()
-
-    def load_mapping(self, path):
-        self.node_key_to_leaf_class_id = {}
-        self.leaf_class_to_taxon = {}
-        # there is no taxon with ID 0, but roots of the taxonomy with have a parent ID of 0,
-        # so create a fake taxon of Life to represent the root of the entire tree
-        self.taxa = {0: Taxon({"name": "Life", "depth": 0})}
-        self.taxon_children = {}
-        try:
-            with open(path) as csv_file:
-                csv_reader = csv.DictReader(csv_file, delimiter=",")
-                for row in csv_reader:
-                    taxon_id = int(row["taxon_id"])
-                    rank_level = float(row["rank_level"])
-                    leaf_class_id = int(row["leaf_class_id"]) if row["leaf_class_id"] else None
-                    parent_id = int(row["parent_taxon_id"]) if row["parent_taxon_id"] else 0
-                    # some taxa are not leaves and aren't represented in the leaf layer
-                    if leaf_class_id is not None:
-                        self.node_key_to_leaf_class_id[taxon_id] = leaf_class_id
-                        self.leaf_class_to_taxon[leaf_class_id] = taxon_id
-                    self.taxa[taxon_id] = Taxon({
-                        "id": taxon_id,
-                        "name": row["name"],
-                        "parent_id": parent_id,
-                        "leaf_class_id": leaf_class_id,
-                        "rank_level": rank_level
-                    })
-                    if parent_id not in self.taxon_children:
-                        self.taxon_children[parent_id] = []
-                    self.taxon_children[parent_id].append(taxon_id)
-        except IOError as e:
-            print(e)
-            print(f"\n\nCannot open mapping file `{path}`\n\n")
-            raise e
-
-    # prints to the console a representation of this tree
-    def print(self, taxon_id=0, ancestor_prefix=""):
-        children = self.taxon_children[taxon_id]
-        index = 0
-        for child_id in children:
-            last_in_branch = (index == len(children) - 1)
-            index += 1
-            icon = "└──" if last_in_branch else "├──"
-            prefixIcon = "   " if last_in_branch else "│   "
-            taxon = self.taxa[child_id]
-            print(f'{ancestor_prefix}{icon}{taxon.name} :: {taxon.left}:{taxon.right}')
-            if child_id in self.taxon_children:
-                self.print(child_id, f"{ancestor_prefix}{prefixIcon}")
-
-    # calculated nested set left and right values and depth representing how many nodes
-    # down the taxon is from Life. These can be later used for an efficient way to calculate
-    # if a taxon is a descendant of another
-    def assign_nested_values(self, taxon_id=0, index=0, depth=1, ancestors=[]):
-        for child_id in self.taxon_children[taxon_id]:
-            self.taxa[child_id].set("left", index)
-            self.taxa[child_id].set("depth", depth)
-            self.taxa[child_id].set("ancestors", ancestors)
-            index += 1
-            if child_id in self.taxon_children:
-                child_ancestors = ancestors + [child_id]
-                index = self.assign_nested_values(child_id, index, depth + 1, child_ancestors)
-            self.taxa[child_id].set("right", index)
-            index += 1
-        return index
-
-
-class TFGeoPriorModelEnv:
-
-    def __init__(self, model, taxonomy):
-        self.taxonomy = taxonomy
-        # initialize the geo model for inference
-        self.gpmodel = tf.keras.models.load_model(
-            model,
-            custom_objects={'ResLayer': ResLayer},
-            compile=False
-        )
-
-    
-    def features_for_one_class_elevation(self, latitude, longitude, elevation):
-        """Evalutes the model for a single class and multiple locations
-
-        Args:
-            latitude (list): A list of latitudes
-            longitude (list): A list of longitudes (same length as latitude)
-            elevation (list): A list of elevations (same length as latitude)
-            class_of_interest (int): The single class to eval
-
-        Returns:
-            numpy array: scores for class of interest at each location
-        """
-        def encode_loc(latitude, longitude, elevation):
-            latitude = np.array(latitude)
-            longitude = np.array(longitude)
-            elevation = np.array(elevation)
-            elevation = elevation.astype("float32")
-            grid_lon = longitude.astype('float32') / 180.0
-            grid_lat = latitude.astype('float32') / 90.0
-            
-            elevation[elevation>0] = elevation[elevation>0]/6574.0
-            elevation[elevation<0] = elevation[elevation<0]/32768.0
-            norm_elev = elevation
-            
-            if np.isscalar(grid_lon):
-                grid_lon = np.array([grid_lon])
-            if np.isscalar(grid_lat):
-                grid_lat = np.array([grid_lat])
-            if np.isscalar(norm_elev):
-                norm_elev = np.array([norm_elev])
-                        
-            norm_loc = tf.stack([grid_lon, grid_lat], axis=1)
-
-            encoded_loc = tf.concat([
-                tf.sin(norm_loc * math.pi),
-                tf.cos(norm_loc * math.pi),
-                tf.expand_dims(norm_elev, axis=1),
-
-            ], axis=1)            
-            
-            return encoded_loc
-
-        encoded_loc = encode_loc(latitude, longitude, elevation)
-        loc_emb = self.gpmodel.layers[0](encoded_loc)
-        
-        # res layers - feature extraction
-        x = self.gpmodel.layers[1](loc_emb)
-        x = self.gpmodel.layers[2](x)
-        x = self.gpmodel.layers[3](x)
-        x = self.gpmodel.layers[4](x)
-        
-        # process just the one class
-        return x
-
-    def eval_one_class_elevation_from_features(self, x, class_of_interest):
-        return tf.keras.activations.sigmoid(
-            tf.matmul(
-                x, 
-                tf.expand_dims(self.gpmodel.layers[5].weights[0][:,class_of_interest], axis=0),
-                transpose_b=True
-            )
-        ).numpy()
 
 def ignore_shapely_deprecation_warning(message, category, filename, lineno, file=None, line=None):
     if "array interface is deprecated" in str(message):
         return None
     return warnings.defaultaction(message, category, filename, lineno, file, line)
 
+
 def main(args):
     print("loading in the model...")
-    mt = ModelTaxonomy(args.taxonomy)
-    tfgpm = TFGeoPriorModelEnv(args.model, mt)
-    
+    mtd = ModelTaxonomyDataframe(args.taxonomy, None)
+    tfgpm = TFGeoPriorModelElev(args.model)
+
     print("setting up the map...")
     warnings.showwarning = ignore_shapely_deprecation_warning
     im = tifffile.imread(args.elevation)
@@ -229,13 +40,13 @@ def main(args):
     im_df.columns = ["lat", "lng", "elevation"]
     elev_dfh3 = im_df.h3.geo_to_h3(args.h3_resolution)
     elev_dfh3 = elev_dfh3.drop(
-        columns=['lng', 'lat']
-    ).groupby("h3_0"+str(args.h3_resolution)).mean()
+        columns=["lng", "lat"]
+    ).groupby("h3_0" + str(args.h3_resolution)).mean()
     gdfk = elev_dfh3.h3.h3_to_geo()
     gdfk["lng"] = gdfk["geometry"].x
     gdfk["lat"] = gdfk["geometry"].y
     _ = gdfk.pop("geometry")
-    gdfk = gdfk.rename_axis('h3index')
+    gdfk = gdfk.rename_axis("h3index")
 
     print("making features...")
     feats = tfgpm.features_for_one_class_elevation(
@@ -245,13 +56,20 @@ def main(args):
     )
 
     print("loading in the training data...")
-    train_df = pd.read_csv(args.train_spatial_data,
-        usecols=["taxon_id","latitude","longitude","captive"]).rename({
+    train_df = pd.read_csv(
+        args.train_spatial_data,
+        usecols=[
+            "taxon_id",
+            "latitude",
+            "longitude",
+            "captive"
+        ]
+    ).rename({
         "latitude": "lat",
         "longitude": "lng"
     }, axis=1)
-    train_df = train_df[train_df.captive==0] #no-CID ok, wild only
-    train_df.drop(["captive"],axis=1)
+    train_df = train_df[train_df.captive == 0]  # no-CID ok, wild only
+    train_df.drop(["captive"], axis=1)
     train_df_h3 = train_df.h3.geo_to_h3(args.h3_resolution)
     all_spatial_grid_counts = train_df_h3.index.value_counts()
     presence_absence = pd.DataFrame({
@@ -261,89 +79,103 @@ def main(args):
 
     print("...looping through taxa")
     output = []
-    taxa = pd.read_csv(args.taxonomy, usecols=["taxon_id","leaf_class_id","iconic_class_id"]).dropna(subset=['leaf_class_id'])
+    taxa = pd.read_csv(
+        args.taxonomy,
+        usecols=[
+            "taxon_id",
+            "leaf_class_id",
+            "iconic_class_id"
+        ]
+    ).dropna(subset=["leaf_class_id"])
     taxon_ids = taxa.taxon_id
     if args.stop_after is not None:
-            taxon_ids = taxon_ids[0:args.stop_after]
-    desired_recall = 0.95
+        taxon_ids = taxon_ids[0:args.stop_after]
     resolution = args.h3_resolution
     area = h3.hex_area(resolution)
     for taxon_id in tqdm(taxon_ids):
         try:
-            class_of_interest = mt.node_key_to_leaf_class_id[taxon_id]
-        except:
-            print('not in the model for some reason')
+            class_of_interest = mtd.df.loc[taxon_id]["leaf_class_id"]
+        except Exception:
+            print("not in the model for some reason")
             continue
 
-        #get predictions
+        # get predictions
         preds = tfgpm.eval_one_class_elevation_from_features(feats, class_of_interest)
         gdfk["pred"] = tf.squeeze(preds).numpy()
-    
-        #make presence absence dataset
-        target_spatial_grid_counts = train_df_h3[train_df_h3.taxon_id==taxon_id].index.value_counts()
+
+        # make presence absence dataset
+        target_spatial_grid_counts = \
+            train_df_h3[train_df_h3.taxon_id == taxon_id].index.value_counts()
         presences = gdfk.loc[target_spatial_grid_counts.index]["pred"]
         if len(presences) == 0:
             print("not present")
             continue
-    
-        #calculate threhold
+
+        # calculate threhold
         presence_absence["forground"] = target_spatial_grid_counts
         presence_absence["predictions"] = gdfk["pred"]
         presence_absence.forground = presence_absence.forground.fillna(0)
-        yield_cutoff = np.percentile((presence_absence["background"]/presence_absence["forground"])[presence_absence["forground"]>0], 95)
-        absences = presence_absence[(presence_absence["forground"]==0) & (presence_absence["background"] > yield_cutoff)]["predictions"]
-        presences = presence_absence[(presence_absence["forground"]>0)]["predictions"]
-        df_x = pd.DataFrame({'predictions': presences, 'test': 1})
-        df_y = pd.DataFrame({'predictions': absences, 'test': 0})
+        yield_cutoff = np.percentile((
+            presence_absence["background"] / presence_absence["forground"]
+        )[presence_absence["forground"] > 0], 95)
+        absences = presence_absence[
+            (presence_absence["forground"] == 0) & (presence_absence["background"] > yield_cutoff)
+        ]["predictions"]
+        presences = presence_absence[(presence_absence["forground"] > 0)]["predictions"]
+        df_x = pd.DataFrame({"predictions": presences, "test": 1})
+        df_y = pd.DataFrame({"predictions": absences, "test": 0})
         for_thres = pd.concat([df_x, df_y], ignore_index=False)
-        precision, recall, thresholds = precision_recall_curve(for_thres.test, for_thres.predictions)
+        precision, recall, thresholds = precision_recall_curve(
+            for_thres.test,
+            for_thres.predictions
+        )
         p1 = (2 * precision * recall)
         p2 = (precision + recall)
-        out = np.zeros( (len(p1)) )
-        fscore = np.divide(p1,p2, out=out, where=p2!=0)
+        out = np.zeros((len(p1)))
+        fscore = np.divide(p1, p2, out=out, where=p2 != 0)
         index = np.argmax(fscore)
         thres = thresholds[index]
-    
-        #store daa
+
+        # store daa
         row = {
             "taxon_id": taxon_id,
             "thres": thres,
-            "area": len(gdfk[gdfk.pred >= thres])*area
+            "area": len(gdfk[gdfk.pred >= thres]) * area
         }
         row_dict = dict(row)
         output.append(row_dict)
-    
+
     print("writing output...")
     output_pd = pd.DataFrame(output)
-    output_pd.to_csv(args.output_dir+"/thresholds.csv")
+    output_pd.to_csv(args.output_dir + "/thresholds.csv")
+
 
 if __name__ == "__main__":
-    
-    info_str = '\nrun as follows\n' + \
-               '   python generate_thresholds.py --elevation wc2.1_5m_elev.tif \n' + \
-               '   --model v2_6/tf_geoprior_2_5_r6_elevation.h5 \n' + \
-               '   --taxonomy taxonomy_1_4.csv\n' + \
-               '   --train_spatial_data v2_6/taxonomy.csv\n' + \
-               '   --output_dir v2_6\n' + \
-               '   --h3_resolution 4\n' + \
-               '   --stop_after 10\n'
-    
+    info_str = "\nrun as follows\n" + \
+               "   python generate_thresholds.py --elevation wc2.1_5m_elev.tif \n" + \
+               "   --model v2_6/tf_geoprior_2_5_r6_elevation.h5 \n" + \
+               "   --taxonomy taxonomy_1_4.csv\n" + \
+               "   --train_spatial_data v2_6/taxonomy.csv\n" + \
+               "   --output_dir v2_6\n" + \
+               "   --h3_resolution 4\n" + \
+               "   --stop_after 10\n"
+
     parser = argparse.ArgumentParser(usage=info_str)
-    parser.add_argument('--elevation', type=str,
-                        help='Path to elev tif.', required=True)
-    parser.add_argument('--model', type=str,
-                        help='Path to tf model.', required=True)
-    parser.add_argument('--taxonomy', type=str,
-                        help='Path to taxonomy csv.', required=True)
-    parser.add_argument('--train_spatial_data', type=str,
-                        help='Path to train csv for occupancy.', required=True)
-    parser.add_argument('--output_dir', type=str,
-                        help='directory to write thesholds.', required=True)
-    parser.add_argument('--h3_resolution', type=int, default=4,
-        help='grid resolution from 0 - 15, lower numbers are coarser/faster. Currently using 4')
-    parser.add_argument('--stop_after', type=int,
-            help='just run the first x taxa')
+    parser.add_argument("--elevation", type=str,
+                        help="Path to elev tif.", required=True)
+    parser.add_argument("--model", type=str,
+                        help="Path to tf model.", required=True)
+    parser.add_argument("--taxonomy", type=str,
+                        help="Path to taxonomy csv.", required=True)
+    parser.add_argument("--train_spatial_data", type=str,
+                        help="Path to train csv for occupancy.", required=True)
+    parser.add_argument("--output_dir", type=str,
+                        help="directory to write thesholds.", required=True)
+    parser.add_argument("--h3_resolution", type=int, default=4,
+                        help="grid resolution from 0 - 15, lower numbers are coarser/faster. "
+                        "Currently using 4")
+    parser.add_argument("--stop_after", type=int,
+                        help="just run the first x taxa")
     args = parser.parse_args()
 
     main(args)
-    
\ No newline at end of file
diff --git a/lib/inat_inferrer.py b/lib/inat_inferrer.py
index 2ae5e92..26e8d57 100644
--- a/lib/inat_inferrer.py
+++ b/lib/inat_inferrer.py
@@ -52,7 +52,9 @@ def setup_elevation_dataframe(self, config):
         if "elevation_h3_r4" in config:
             self.geo_elevation_cells = pd.read_csv(config["elevation_h3_r4"]). \
                 sort_values("h3_04").set_index("h3_04").sort_index()
-            self.geo_elevation_cells = InatInferrer.add_lat_lng_to_h3_geo_dataframe(self.geo_elevation_cells)
+            self.geo_elevation_cells = InatInferrer.add_lat_lng_to_h3_geo_dataframe(
+                self.geo_elevation_cells
+            )
 
     def setup_elevation_dataframe_from_worldclim(self, config, resolution):
         # preventing from processing at too high a resolution
@@ -67,7 +69,7 @@ def setup_elevation_dataframe_from_worldclim(self, config, resolution):
             im_df = im_df.melt(id_vars=["index"])
             im_df.columns = ["lat", "lng", "elevation"]
             elev_dfh3 = im_df.h3.geo_to_h3(resolution)
-            elev_dfh3 = elev_dfh3.drop(columns=["lng", "lat"]).groupby(f'h3_0{resolution}').mean()
+            elev_dfh3 = elev_dfh3.drop(columns=["lng", "lat"]).groupby(f"h3_0{resolution}").mean()
 
     def setup_geo_model(self, config):
         self.geo_elevation_model = None
@@ -112,7 +114,7 @@ def lookup_taxon(self, taxon_id):
         try:
             return self.taxonomy.df.loc[taxon_id]
         except Exception as e:
-            print(f'taxon `{taxon_id}` does not exist in the taxonomy')
+            print(f"taxon `{taxon_id}` does not exist in the taxonomy")
             raise e
 
     def predictions_for_image(self, file_path, lat, lng, filter_taxon, score_without_geo=False,
@@ -154,7 +156,8 @@ def combine_results(self, raw_vision_scores, raw_geo_scores, filter_taxon,
             )
             # normalize the vision scores so they add up to 1 after filtering
             sum_of_vision_scores = leaf_scores["vision_score"].sum()
-            leaf_scores["normalized_vision_score"] = leaf_scores["vision_score"] / sum_of_vision_scores
+            leaf_scores["normalized_vision_score"] = \
+                leaf_scores["vision_score"] / sum_of_vision_scores
         else:
             # when not filtering by a taxon, the normalized vision score is the same as the original
             leaf_scores["normalized_vision_score"] = leaf_scores["vision_score"]
@@ -182,8 +185,8 @@ def aggregate_results(self, leaf_scores, filter_taxon, score_without_geo=False,
             # using nested set left and right values, select the filter taxon,
             # its descendants, and its ancestors
             all_node_scores = self.taxonomy.df.query(
-                f'(left >= {filter_taxon["left"]} and right <= {filter_taxon["right"]}) or' +
-                f'(left < {filter_taxon["left"]} and right > {filter_taxon["right"]})'
+                f"(left >= {filter_taxon['left']} and right <= {filter_taxon['right']}) or"
+                f"(left < {filter_taxon['left']} and right > {filter_taxon['right']})"
             ).copy().reset_index(drop=True)
         else:
             all_node_scores = self.taxonomy.df.copy().reset_index(drop=True)
@@ -204,7 +207,7 @@ def aggregate_results(self, leaf_scores, filter_taxon, score_without_geo=False,
 
         aggregated_scores = {}
         # restrict score aggregation to results where the combined score is above the cutoff
-        scores_to_aggregate = leaf_scores.query(f'combined_score > {cutoff}')
+        scores_to_aggregate = leaf_scores.query(f"combined_score > {cutoff}")
         # loop through all results where the combined score is above the cutoff
         for taxon_id, vision_score, geo_score, geo_threshold in zip(
             scores_to_aggregate["taxon_id"],
@@ -223,13 +226,14 @@ def aggregate_results(self, leaf_scores, filter_taxon, score_without_geo=False,
                         aggregated_scores[ancestor_taxon_id]["aggregated_geo_threshold"] = 100
                 # aggregated vision score is a sum of descendant scores
                 aggregated_scores[ancestor_taxon_id]["aggregated_vision_score"] += vision_score
-                if not no_geo_scores and geo_score > aggregated_scores[ancestor_taxon_id]["aggregated_geo_score"]:
+                if not no_geo_scores and \
+                   geo_score > aggregated_scores[ancestor_taxon_id]["aggregated_geo_score"]:
                     # aggregated geo score is the max of descendant geo scores
                     aggregated_scores[ancestor_taxon_id]["aggregated_geo_score"] = geo_score
                 if not no_geo_scores and \
                     aggregated_scores[ancestor_taxon_id]["aggregated_geo_threshold"] != 0 and \
                         geo_score > geo_threshold:
-                    # aggregated geo threshold is set to 0 if any descendants are above their threshold
+                    # aggregated threshold is set to 0 if any descendants are above their threshold
                     aggregated_scores[ancestor_taxon_id]["aggregated_geo_threshold"] = 0
 
         # turn the aggregated_scores dict into a data frame
@@ -252,12 +256,13 @@ def aggregate_results(self, leaf_scores, filter_taxon, score_without_geo=False,
         if (no_geo_scores or score_without_geo):
             # if there are no geo scores, or it was requested to not use geo scores to affect
             # the final combined score, set the combined scores to be the same as the vision scores
-            all_node_scores["aggregated_combined_score"] = all_node_scores["aggregated_vision_score"]
+            all_node_scores["aggregated_combined_score"] = \
+                all_node_scores["aggregated_vision_score"]
         else:
             # the combined score is simply the normalized vision score
             # multipliedby the normalized geo score
-            all_node_scores["aggregated_combined_score"] = all_node_scores["aggregated_vision_score"] * \
-                all_node_scores["aggregated_geo_score"]
+            all_node_scores["aggregated_combined_score"] = \
+                all_node_scores["aggregated_vision_score"] * all_node_scores["aggregated_geo_score"]
 
         # calculate a normalized combined score so all values add to 1, to be used for thresholding
         sum_of_root_node_aggregated_combined_scores = all_node_scores.query(
@@ -267,26 +272,30 @@ def aggregate_results(self, leaf_scores, filter_taxon, score_without_geo=False,
 
         if debug:
             print("Aggregation Time: %0.2fms" % ((time.time() - start_time) * 1000.))
-            thresholded_results = all_node_scores.query("normalized_aggregated_combined_score > 0.05")
+            thresholded_results = all_node_scores.query(
+                "normalized_aggregated_combined_score > 0.05"
+            )
             print("\nTree of aggregated results:")
             ModelTaxonomyDataframe.print(thresholded_results, display_taxon_lambda=(
-                lambda row: f'{row.name}    [' +
-                            f'V:{round(row.aggregated_vision_score, 4)}, ' +
-                            f'G:{round(row.aggregated_geo_score, 4)}, ' +
-                            f'C:{round(row.aggregated_combined_score, 4)}, ' +
-                            f'NC:{round(row.normalized_aggregated_combined_score, 4)}]'))
+                lambda row: f"{row.name}    ["
+                            f"V:{round(row.aggregated_vision_score, 4)}, "
+                            f"G:{round(row.aggregated_geo_score, 4)}, "
+                            f"C:{round(row.aggregated_combined_score, 4)}, "
+                            f"NC:{round(row.normalized_aggregated_combined_score, 4)}]"
+            ))
             print("")
         return all_node_scores
 
-    def h3_04_geo_results_for_taxon(self, taxon_id, bounds=[], thresholded=False, raw_results=False):
+    def h3_04_geo_results_for_taxon(self, taxon_id, bounds=[],
+                                    thresholded=False, raw_results=False):
         if (self.geo_elevation_cells is None) or (self.geo_elevation_model is None):
             return
         try:
             taxon = self.taxonomy.df.loc[taxon_id]
         except Exception as e:
-            print(f'taxon `{taxon_id}` does not exist in the taxonomy')
+            print(f"taxon `{taxon_id}` does not exist in the taxonomy")
             raise e
-        if math.isnan(taxon["leaf_class_id"]):
+        if pd.isna(taxon["leaf_class_id"]):
             return
 
         geo_scores = self.geo_elevation_model.eval_one_class_elevation_from_features(
@@ -300,7 +309,7 @@ def h3_04_geo_results_for_taxon(self, taxon_id, bounds=[], thresholded=False, ra
             # is smaller. This reduces data needed to be redendered client-side for the Data Layer
             # mapping approach, and maybe can be removed once switching to map tiles
             lower_bound_score = np.array([0.0001, taxon["geo_threshold"] / 10]).min()
-            geo_score_cells = geo_score_cells.query(f'geo_score > {lower_bound_score}')
+            geo_score_cells = geo_score_cells.query(f"geo_score > {lower_bound_score}")
 
         if bounds:
             min = geo_score_cells["geo_score"].min()
@@ -316,7 +325,7 @@ def h3_04_geo_results_for_taxon(self, taxon_id, bounds=[], thresholded=False, ra
         return dict(zip(geo_score_cells.index.astype(str), geo_score_cells["geo_score"]))
 
     def h3_04_taxon_range(self, taxon_id, bounds=[]):
-        taxon_range_path = os.path.join(self.config["taxon_ranges_path"], f'{taxon_id}.csv')
+        taxon_range_path = os.path.join(self.config["taxon_ranges_path"], f"{taxon_id}.csv")
         if not os.path.exists(taxon_range_path):
             return None
         taxon_range_df = pd.read_csv(taxon_range_path, names=["h3_04"], header=None). \
@@ -328,7 +337,9 @@ def h3_04_taxon_range(self, taxon_id, bounds=[]):
         return dict(zip(taxon_range_df.index.astype(str), taxon_range_df["value"]))
 
     def h3_04_taxon_range_comparison(self, taxon_id, bounds=[]):
-        geomodel_results = self.h3_04_geo_results_for_taxon(taxon_id, bounds, thresholded=True) or {}
+        geomodel_results = self.h3_04_geo_results_for_taxon(
+            taxon_id, bounds, thresholded=True
+        ) or {}
         taxon_range_results = self.h3_04_taxon_range(taxon_id, bounds) or {}
         combined_results = {}
         for cell_key in geomodel_results:
@@ -394,7 +405,7 @@ def filter_geo_dataframe_by_bounds(geo_df, bounds):
 
         # query for cells wtihin the buffered bounds, and potentially
         # on the other side of the antimeridian
-        query = f'lat >= {bounds[0] - buffer} and lat <= {bounds[2] + buffer} and ' + \
-            f' ((lng >= {bounds[1] - buffer} and lng <= {bounds[3] + buffer})' + \
-            f' {antimedirian_condition})'
+        query = f"lat >= {bounds[0] - buffer} and lat <= {bounds[2] + buffer} and " + \
+            f" ((lng >= {bounds[1] - buffer} and lng <= {bounds[3] + buffer})" + \
+            f" {antimedirian_condition})"
         return geo_df.query(query)
diff --git a/lib/inat_vision_api.py b/lib/inat_vision_api.py
index 8703c43..42fd8b0 100644
--- a/lib/inat_vision_api.py
+++ b/lib/inat_vision_api.py
@@ -57,7 +57,7 @@ def h3_04_default_route(self, h3_04_method):
         else:
             results_dict = h3_04_method(taxon_id, bounds)
         if results_dict is None:
-            return f'Unknown taxon_id {taxon_id}', 422
+            return f"Unknown taxon_id {taxon_id}", 422
         return InatVisionAPI.round_floats(results_dict, 8)
 
     def h3_04_bounds_route(self):
@@ -67,7 +67,7 @@ def h3_04_bounds_route(self):
 
         results_dict = self.inferrer.h3_04_bounds(taxon_id)
         if results_dict is None:
-            return f'Unknown taxon_id {taxon_id}', 422
+            return f"Unknown taxon_id {taxon_id}", 422
         return results_dict
 
     def index_route(self):
@@ -133,11 +133,11 @@ def score_image(self, form, file_path, lat, lng, iconic_taxon_id, geomodel):
                 "aggregated_geo_threshold": "geo_threshold"
             }
 
-            no_geo_scores = (leaf_scores["geo_score"].max() == 0)
-
             # set a cutoff where branches whose combined scores are below the threshold are ignored
             # TODO: this threshold is completely arbitrary and needs testing
-            aggregated_results = aggregated_results.query("normalized_aggregated_combined_score > 0.05")
+            aggregated_results = aggregated_results.query(
+                "normalized_aggregated_combined_score > 0.05"
+            )
 
             # after setting a cutoff, get the parent IDs of the remaining taxa
             parent_taxon_ids = aggregated_results["parent_taxon_id"].values  # noqa: F841
@@ -145,20 +145,30 @@ def score_image(self, form, file_path, lat, lng, iconic_taxon_id, geomodel):
             # taxa who are not parents of any remaining taxa
             leaf_results = aggregated_results.query("taxon_id not in @parent_taxon_ids")
 
-            leaf_results = leaf_results.sort_values("aggregated_combined_score", ascending=False).head(100)
+            leaf_results = leaf_results.sort_values(
+                "aggregated_combined_score",
+                ascending=False
+            ).head(100)
             score_columns = ["aggregated_combined_score", "aggregated_geo_score",
                              "aggregated_vision_score", "aggregated_geo_threshold"]
             leaf_results[score_columns] = leaf_results[score_columns].multiply(100, axis="index")
             final_results = leaf_results[columns_to_return].rename(columns=column_mapping)
         else:
-            no_geo_scores = (leaf_scores["geo_score"].max() == 0)
-            top_combined_score = leaf_scores.sort_values("combined_score", ascending=False).head(1)["combined_score"].values[0]
+            top_combined_score = leaf_scores.sort_values(
+                "combined_score",
+                ascending=False
+            ).head(1)["combined_score"].values[0]
             # set a cutoff so results whose combined scores are
             # much lower than the best score are not returned
-            leaf_scores = leaf_scores.query(f'combined_score > {top_combined_score * 0.001}')
+            leaf_scores = leaf_scores.query(f"combined_score > {top_combined_score * 0.001}")
 
             top100 = leaf_scores.sort_values("combined_score", ascending=False).head(100)
-            score_columns = ["combined_score", "geo_score", "normalized_vision_score", "geo_threshold"]
+            score_columns = [
+                "combined_score",
+                "geo_score",
+                "normalized_vision_score",
+                "geo_threshold"
+            ]
             top100[score_columns] = top100[score_columns].multiply(100, axis="index")
 
             # legacy dict response
@@ -222,7 +232,7 @@ def valid_leaf_taxon_id_for_request(self, request):
 
         taxon_id = int(taxon_id)
         if float(taxon_id) not in self.inferrer.taxonomy.leaf_df["taxon_id"].values:
-            return None, f'Unknown taxon_id {taxon_id}', 422
+            return None, f"Unknown taxon_id {taxon_id}", 422
         return taxon_id, None, None
 
     def valid_bounds_for_request(self, request):
@@ -242,12 +252,12 @@ def valid_bounds_for_request(self, request):
     def write_logstash(image_uuid, file_path, request_start_datetime, request_start_time):
         request_end_time = time.time()
         request_time = round((request_end_time - request_start_time) * 1000, 6)
-        logstash_log = open('log/logstash.log', 'a')
-        log_data = {'@timestamp': request_start_datetime.isoformat(),
-                    'uuid': image_uuid,
-                    'duration': request_time,
-                    'client_ip': request.access_route[0],
-                    'image_size': os.path.getsize(file_path)}
+        logstash_log = open("log/logstash.log", "a")
+        log_data = {"@timestamp": request_start_datetime.isoformat(),
+                    "uuid": image_uuid,
+                    "duration": request_time,
+                    "client_ip": request.access_route[0],
+                    "image_size": os.path.getsize(file_path)}
         json.dump(log_data, logstash_log)
         logstash_log.write("\n")
         logstash_log.close()
diff --git a/lib/model_taxonomy.py b/lib/model_taxonomy.py
deleted file mode 100644
index d390516..0000000
--- a/lib/model_taxonomy.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import csv
-from lib.taxon import Taxon
-
-
-class ModelTaxonomy:
-
-    def __init__(self, path):
-        self.load_mapping(path)
-        self.assign_nested_values()
-
-    def load_mapping(self, path):
-        self.node_key_to_leaf_class_id = {}
-        self.leaf_class_to_taxon = {}
-        # there is no taxon with ID 0, but roots of the taxonomy have a parent ID of 0,
-        # so create a fake taxon of Life to represent the root of the entire tree
-        self.taxa = {0: Taxon({"name": "Life", "depth": 0})}
-        self.taxon_children = {}
-        try:
-            with open(path) as csv_file:
-                csv_reader = csv.DictReader(csv_file, delimiter=",")
-                for row in csv_reader:
-                    taxon_id = int(row["taxon_id"])
-                    rank_level = float(row["rank_level"])
-                    leaf_class_id = int(row["leaf_class_id"]) if row["leaf_class_id"] else None
-                    parent_id = int(row["parent_taxon_id"]) if row["parent_taxon_id"] else 0
-                    # some taxa are not leaves and aren't represented in the leaf layer
-                    if leaf_class_id is not None:
-                        self.node_key_to_leaf_class_id[taxon_id] = leaf_class_id
-                        self.leaf_class_to_taxon[leaf_class_id] = taxon_id
-                    self.taxa[taxon_id] = Taxon({
-                        "id": taxon_id,
-                        "name": row["name"],
-                        "parent_id": parent_id,
-                        "leaf_class_id": leaf_class_id,
-                        "rank_level": rank_level
-                    })
-                    if parent_id not in self.taxon_children:
-                        self.taxon_children[parent_id] = []
-                    self.taxon_children[parent_id].append(taxon_id)
-        except IOError as e:
-            print(e)
-            print(f"\n\nCannot open mapping file `{path}`\n\n")
-            raise e
-
-    # prints to the console a representation of this tree
-    def print(self, taxon_id=0, ancestor_prefix=""):
-        children = self.taxon_children[taxon_id]
-        index = 0
-        for child_id in children:
-            last_in_branch = (index == len(children) - 1)
-            index += 1
-            icon = "└──" if last_in_branch else "├──"
-            prefixIcon = "   " if last_in_branch else "│   "
-            taxon = self.taxa[child_id]
-            print(f'{ancestor_prefix}{icon}{taxon.name} :: {taxon.left}:{taxon.right}')
-            if child_id in self.taxon_children:
-                self.print(child_id, f"{ancestor_prefix}{prefixIcon}")
-
-    # calculated nested set left and right values and depth representing how many nodes
-    # down the taxon is from Life. These can be later used for an efficient way to calculate
-    # if a taxon is a descendant of another
-    def assign_nested_values(self, taxon_id=0, index=0, depth=1, ancestors=[]):
-        for child_id in self.taxon_children[taxon_id]:
-            self.taxa[child_id].set("left", index)
-            self.taxa[child_id].set("depth", depth)
-            self.taxa[child_id].set("ancestors", ancestors)
-            index += 1
-            if child_id in self.taxon_children:
-                child_ancestors = ancestors + [child_id]
-                index = self.assign_nested_values(child_id, index, depth + 1, child_ancestors)
-            self.taxa[child_id].set("right", index)
-            index += 1
-        return index
diff --git a/lib/model_taxonomy_dataframe.py b/lib/model_taxonomy_dataframe.py
index a707c7d..4264d1f 100644
--- a/lib/model_taxonomy_dataframe.py
+++ b/lib/model_taxonomy_dataframe.py
@@ -1,4 +1,3 @@
-import math
 import pandas as pd
 
 
@@ -8,7 +7,27 @@ def __init__(self, path, thresholds_path):
         self.load_mapping(path, thresholds_path)
 
     def load_mapping(self, path, thresholds_path):
-        self.df = pd.read_csv(path)
+        self.df = pd.read_csv(
+            path,
+            usecols=[
+                "parent_taxon_id",
+                "taxon_id",
+                "rank_level",
+                "leaf_class_id",
+                "iconic_class_id",
+                "spatial_class_id",
+                "name"
+            ],
+            dtype={
+                "parent_taxon_id": "Int64",
+                "taxon_id": int,
+                "rank_level": float,
+                "leaf_class_id": "Int64",
+                "iconic_class_id": "Int64",
+                "spatial_class_id": "Int64",
+                "name": pd.StringDtype()
+            }
+        )
         # left and right will be used to store nested set indices
         self.df["left"] = pd.Series([], dtype=object)
         self.df["right"] = pd.Series([], dtype=object)
@@ -17,7 +36,7 @@ def load_mapping(self, path, thresholds_path):
         self.taxon_ancestors = {}
         for index, taxon in self.df.iterrows():
             self.taxon_row_mapping[taxon["taxon_id"]] = index
-            parent_id = 0 if math.isnan(taxon["parent_taxon_id"]) else int(taxon["parent_taxon_id"])
+            parent_id = 0 if pd.isna(taxon["parent_taxon_id"]) else int(taxon["parent_taxon_id"])
             if parent_id not in self.taxon_children:
                 self.taxon_children[parent_id] = []
             self.taxon_children[parent_id].append(taxon["taxon_id"])
@@ -50,7 +69,7 @@ def assign_nested_values(self, taxon_id=0, index=0, ancestor_taxon_ids=[]):
     def children(df, taxon_id):
         if taxon_id == 0:
             return df.query("parent_taxon_id.isnull()")
-        return df.query(f'parent_taxon_id == {taxon_id}')
+        return df.query(f"parent_taxon_id == {taxon_id}")
 
     @staticmethod
     def print(df, taxon_id=0, ancestor_prefix="", display_taxon_lambda=None):
@@ -65,10 +84,15 @@ def print(df, taxon_id=0, ancestor_prefix="", display_taxon_lambda=None):
             index += 1
             icon = "└──" if last_in_branch else "├──"
             prefixIcon = "   " if last_in_branch else "│   "
-            print(f'{ancestor_prefix}{icon}', end="")
+            print(f"{ancestor_prefix}{icon}", end="")
             if display_taxon_lambda is None:
-                print(f'{row.name} :: {row.left}:{row.right}')
+                print(f"{row.name} :: {row.left}:{row.right}")
             else:
                 print(display_taxon_lambda(row))
             if row.right != row.left + 1:
-                ModelTaxonomyDataframe.print(df, row.taxon_id, f"{ancestor_prefix}{prefixIcon}", display_taxon_lambda)
+                ModelTaxonomyDataframe.print(
+                    df,
+                    row.taxon_id,
+                    f"{ancestor_prefix}{prefixIcon}",
+                    display_taxon_lambda
+                )
diff --git a/lib/model_test_data_export_manager.py b/lib/model_test_data_export_manager.py
index 4c3ee2e..3aa35f9 100644
--- a/lib/model_test_data_export_manager.py
+++ b/lib/model_test_data_export_manager.py
@@ -21,9 +21,9 @@ def load_train_data_photo_ids(self):
     def export_path(self, filename_addition):
         currentDatetime = datetime.now()
         timestamp = currentDatetime.strftime("%Y%m%d")
-        export_path = f'test-obs-{timestamp}'
+        export_path = f"test-obs-{timestamp}"
         if filename_addition:
-            export_path += f'-{filename_addition}'
+            export_path += f"-{filename_addition}"
         if "filename_suffix" in self.cmd_args and self.cmd_args["filename_suffix"]:
             export_path += "-" + self.cmd_args["filename_suffix"]
         export_path += ".csv"
@@ -38,7 +38,7 @@ async def generate_from_cmd_args(self):
 
         parameters_string = None
         if api_parameters:
-            parameters_string = "-".join(map(lambda key: f'{key}-{api_parameters[key]}',
+            parameters_string = "-".join(map(lambda key: f"{key}-{api_parameters[key]}",
                                              api_parameters))
         export_path = self.export_path(parameters_string)
         exporter = ModelTestDataExporter(
diff --git a/lib/model_test_data_exporter.py b/lib/model_test_data_exporter.py
index 5013bd7..f0b32c0 100644
--- a/lib/model_test_data_exporter.py
+++ b/lib/model_test_data_exporter.py
@@ -95,7 +95,7 @@ async def fetch_more_data(self):
         min_pages_remaining = math.ceil(
             (self.max_results / ModelTestDataExporter.API_REQUEST_PER_PAGE)
         )
-        print(f'Queueing {min_pages_remaining} workers')
+        print(f"Queueing {min_pages_remaining} workers")
         for i in range(min_pages_remaining):
             await self.queue.put(i)
         await self.queue.join()
@@ -110,7 +110,7 @@ async def process_api_response(self):
         if self.finished():
             return
 
-        print(f'Fetching more results... {self.rows_written} so far')
+        print(f"Fetching more results... {self.rows_written} so far")
         starting_rows_written = self.rows_written
         async with self.session.get(ModelTestDataExporter.API_BASE_URL,
                                     params=self.api_parameters) as response:
@@ -158,7 +158,8 @@ def process_api_response_row(self, row):
             self.used_observations[row["uuid"]] = True
             return
 
-        if row["quality_grade"] == "casual" and not (row["community_taxon_id"] and row["community_taxon_id"] == row["taxon"]["id"]):
+        if row["quality_grade"] == "casual" \
+           and not (row["community_taxon_id"] and row["community_taxon_id"] == row["taxon"]["id"]):
             self.used_observations[row["uuid"]] = True
             return
 
diff --git a/lib/pt_geo_prior_model.py b/lib/pt_geo_prior_model.py
index b81b4e9..77ea3b8 100644
--- a/lib/pt_geo_prior_model.py
+++ b/lib/pt_geo_prior_model.py
@@ -31,7 +31,7 @@ def predict(self, latitude, longitude, filter_taxon_id=None):
             try:
                 filter_taxon = self.taxonomy.df.iloc[filter_taxon_id]
             except Exception as e:
-                print(f'filter_taxon `{filter_taxon_id}` does not exist in the taxonomy')
+                print(f"filter_taxon `{filter_taxon_id}` does not exist in the taxonomy")
                 raise e
         location = np.array([longitude, latitude])[np.newaxis, ...]
         # we're not currently using date inference, so set default values for date
diff --git a/lib/taxon.py b/lib/taxon.py
deleted file mode 100644
index 63e169a..0000000
--- a/lib/taxon.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Taxon:
-#   parent_taxon_id
-#   taxon_id
-#   rank_level
-#   leaf_class_id
-#   iconic_class_id
-#   name
-#   left
-#   right
-#   depth
-
-
-class Taxon:
-
-    def __init__(self, row):
-        for key in row:
-            self.set(key, row[key])
-
-    def set(self, attr, val):
-        setattr(self, attr, val)
-
-    def is_or_descendant_of(self, taxon):
-        if self.id == taxon.id:
-            return True
-        return self.descendant_of(taxon)
-
-    # using the nested set left and right values, a taxon is a descendant of another
-    # as long as its left is higher and its right is lower
-    def descendant_of(self, taxon):
-        return self.left > taxon.left and self.right < taxon.right
diff --git a/lib/tf_gp_elev_model.py b/lib/tf_gp_elev_model.py
index ebaad95..caf3fad 100644
--- a/lib/tf_gp_elev_model.py
+++ b/lib/tf_gp_elev_model.py
@@ -11,13 +11,13 @@ class TFGeoPriorModelElev:
 
     def __init__(self, model_path):
         # initialize the geo model for inference
-        tf.config.set_visible_devices([], 'GPU')
+        tf.config.set_visible_devices([], "GPU")
         visible_devices = tf.config.get_visible_devices()
         for device in visible_devices:
-            assert device.device_type != 'GPU'
+            assert device.device_type != "GPU"
         self.gpmodel = tf.keras.models.load_model(
             model_path,
-            custom_objects={'ResLayer': ResLayer},
+            custom_objects={"ResLayer": ResLayer},
             compile=False
         )
 
diff --git a/lib/vision_inferrer.py b/lib/vision_inferrer.py
index f1c7482..a81e8ee 100644
--- a/lib/vision_inferrer.py
+++ b/lib/vision_inferrer.py
@@ -10,10 +10,10 @@ def __init__(self, model_path):
     # initialize the TF model given the configured path
     def prepare_tf_model(self):
         # disable GPU processing
-        tf.config.set_visible_devices([], 'GPU')
+        tf.config.set_visible_devices([], "GPU")
         visible_devices = tf.config.get_visible_devices()
         for device in visible_devices:
-            assert device.device_type != 'GPU'
+            assert device.device_type != "GPU"
 
         self.vision_model = tf.keras.models.load_model(self.model_path, compile=False)
 
diff --git a/lib/vision_testing.py b/lib/vision_testing.py
index 73f21ce..2da6329 100644
--- a/lib/vision_testing.py
+++ b/lib/vision_testing.py
@@ -1,6 +1,4 @@
-import csv
 import os
-import urllib
 import hashlib
 import magic
 import time
@@ -34,7 +32,7 @@ def __init__(self, config, **args):
         print("Models:")
         for index, model_config in enumerate(config["models"]):
             print(json.dumps(model_config, indent=4))
-            model_name = model_config["name"] if "name" in model_config else f'Model {index}'
+            model_name = model_config["name"] if "name" in model_config else f"Model {index}"
             model_config["name"] = model_name
             for score_type in score_types:
                 self.scores[score_type]["vision"][index] = []
@@ -134,10 +132,16 @@ def print_scores(self):
                     (sum(top5_distance_scores) / metrics["count"]) * 100, 2)
                 metrics["top10∆"] = round(
                     (sum(top10_distance_scores) / metrics["count"]) * 100, 2)
-                metrics["avg∆"] = round(
-                    (mean(self.scores["average_ancestor_distance_scores"][method][index]) / metrics["count"]) * 100, 2)
-                metrics["sum∆"] = round(
-                    (mean(self.scores["sum_ancestor_distance_scores"][method][index]) / metrics["count"]) * 100, 2)
+                metrics["avg∆"] = round((
+                    mean(
+                        self.scores["average_ancestor_distance_scores"][method][index]
+                    ) / metrics["count"]
+                ) * 100, 2)
+                metrics["sum∆"] = round((
+                    mean(
+                        self.scores["sum_ancestor_distance_scores"][method][index]
+                    ) / metrics["count"]
+                ) * 100, 2)
                 all_metrics[method] = metrics
 
             print("method  " + "\t" + "\t".join(all_metrics["vision"].keys()))
@@ -147,16 +151,6 @@ def print_scores(self):
                     str(value) for value in all_metrics[method].values()))
             print("\n")
 
-    # NOTE: this is assuming no conversion is needed.
-    # Ideally we'd reuse the inat_inferrer prepare_image_for_inference
-    def prepare_image_for_inference(self, cache_path):
-        image = tf.io.read_file(cache_path)
-        image = tf.image.decode_jpeg(image, channels=3)
-        image = tf.image.convert_image_dtype(image, tf.float32)
-        image = tf.image.central_crop(image, 0.875)
-        image = tf.image.resize(image, [299, 299], tf.image.ResizeMethod.NEAREST_NEIGHBOR)
-        return tf.expand_dims(image, 0)
-
     def assess_top_results(self, observation, top_results):
         match_index = None
         distance_scores = []
@@ -194,7 +188,7 @@ async def test_observation_async(self, observation):
         cache_path = await self.download_photo_async(observation.photo_url)
         if cache_path is None or not os.path.exists(cache_path):
             return False
-        if observation.lat == '' or observation.lng == '':
+        if observation.lat == "" or observation.lng == "":
             return False
 
         iconic_taxon_id = None
@@ -215,7 +209,7 @@ async def test_observation_async(self, observation):
                 )
             except Exception as e:
                 print(e)
-                print(f'\nError scoring observation {observation.observation_id}')
+                print(f"\nError scoring observation {observation.observation_id}")
                 return False
         return inferrer_scores
 
@@ -237,7 +231,9 @@ def ancestor_distance_scores(self, observation, inferrer, results):
             if result_ancestor_match_index is None:
                 result_ancestor_match_index = len(reversed_target_ancestors)
             # calculate a score of how far from species the result matched the target
-            ancestor_distance_scores.append((1 - (result_ancestor_match_index / len(reversed_target_ancestors)))**2)
+            ancestor_distance_scores.append((1 - (
+                result_ancestor_match_index / len(reversed_target_ancestors)
+            ))**2)
         return ancestor_distance_scores
 
     def append_to_aggregate_results(self, observation, inferrer_scores):
@@ -318,5 +314,8 @@ def debug(self, message):
     def report_progress(self):
         if self.processed_counter % 10 == 0:
             total_time = round(time.time() - self.start_time, 3)
-            remaining_time = round((self.limit - self.processed_counter) / (self.processed_counter / total_time), 3)
-            print(f'Processed {self.processed_counter} in {total_time} sec\testimated {remaining_time} sec remaining')
+            remaining_time = round((
+                self.limit - self.processed_counter
+            ) / (self.processed_counter / total_time), 3)
+            print(f"Processed {self.processed_counter} in {total_time} sec\t"
+                  f"estimated {remaining_time} sec remaining")
diff --git a/requirements.txt b/requirements.txt
index a4d3300..8fc83f2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 coverage
 flake8
+flake8-quotes
 Flask
 Flask-WTF
 h3
@@ -11,6 +12,7 @@ matplotlib
 numpy
 pandas
 Pillow
+pipenv
 prison
 pytest
 pytest-cov
diff --git a/taxon_range_evaluation.py b/taxon_range_evaluation.py
index 25e87ec..1d00336 100644
--- a/taxon_range_evaluation.py
+++ b/taxon_range_evaluation.py
@@ -2,8 +2,8 @@
 Script to evaluate model and thresholds against taxon ranges
 """
 
+
 import argparse
-import csv
 import tensorflow as tf
 import pandas as pd
 import gc
@@ -12,284 +12,44 @@
 import tifffile
 import numpy as np
 import h3
-import h3pandas
-import math
+import h3pandas  # noqa: F401
 import geopandas as gpd
+import matplotlib.pyplot as plt
 from sklearn.metrics import auc
 from sklearn.metrics import precision_recall_curve
+from lib.model_taxonomy_dataframe import ModelTaxonomyDataframe
+from lib.tf_gp_elev_model import TFGeoPriorModelElev
 
-class ResLayer(tf.keras.layers.Layer):
-    def __init__(self):
-        super(ResLayer, self).__init__()
-        self.w1 = tf.keras.layers.Dense(
-            256, activation="relu", kernel_initializer="he_normal"
-        )
-        self.w2 = tf.keras.layers.Dense(
-            256, activation="relu", kernel_initializer="he_normal"
-        )
-        self.dropout = tf.keras.layers.Dropout(rate=0.5)
-        self.add = tf.keras.layers.Add()
-
-    def call(self, inputs):
-        x = self.w1(inputs)
-        x = self.dropout(x)
-        x = self.w2(x)
-        x = self.add([x, inputs])
-        return x
-
-    def get_config(self):
-        return {}
-
-class Taxon:
-
-    def __init__(self, row):
-        for key in row:
-            setattr(self, key, row[key])
-
-    def set(self, attr, val):
-        setattr(self, attr, val)
-
-    def is_or_descendant_of(self, taxon):
-        if self.id == taxon.id:
-            return True
-        return self.descendant_of(taxon)
-
-    # using the nested set left and right values, a taxon is a descendant of another
-    # as long as its left is higher and its right is lower
-    def descendant_of(self, taxon):
-        return self.left > taxon.left and self.right < taxon.right
-
-class ModelTaxonomy:
-
-    def __init__(self, path):
-        self.load_mapping(path)
-        self.assign_nested_values()
-
-    def load_mapping(self, path):
-        self.node_key_to_leaf_class_id = {}
-        self.leaf_class_to_taxon = {}
-        # there is no taxon with ID 0, but roots of the taxonomy with have a parent ID of 0,
-        # so create a fake taxon of Life to represent the root of the entire tree
-        self.taxa = {0: Taxon({"name": "Life", "depth": 0})}
-        self.taxon_children = {}
-        try:
-            with open(path) as csv_file:
-                csv_reader = csv.DictReader(csv_file, delimiter=",")
-                for row in csv_reader:
-                    taxon_id = int(row["taxon_id"])
-                    rank_level = float(row["rank_level"])
-                    leaf_class_id = int(row["leaf_class_id"]) if row["leaf_class_id"] else None
-                    parent_id = int(row["parent_taxon_id"]) if row["parent_taxon_id"] else 0
-                    # some taxa are not leaves and aren't represented in the leaf layer
-                    if leaf_class_id is not None:
-                        self.node_key_to_leaf_class_id[taxon_id] = leaf_class_id
-                        self.leaf_class_to_taxon[leaf_class_id] = taxon_id
-                    self.taxa[taxon_id] = Taxon({
-                        "id": taxon_id,
-                        "name": row["name"],
-                        "parent_id": parent_id,
-                        "leaf_class_id": leaf_class_id,
-                        "rank_level": rank_level
-                    })
-                    if parent_id not in self.taxon_children:
-                        self.taxon_children[parent_id] = []
-                    self.taxon_children[parent_id].append(taxon_id)
-        except IOError as e:
-            print(e)
-            print(f"\n\nCannot open mapping file `{path}`\n\n")
-            raise e
-
-    # prints to the console a representation of this tree
-    def print(self, taxon_id=0, ancestor_prefix=""):
-        children = self.taxon_children[taxon_id]
-        index = 0
-        for child_id in children:
-            last_in_branch = (index == len(children) - 1)
-            index += 1
-            icon = "└──" if last_in_branch else "├──"
-            prefixIcon = "   " if last_in_branch else "│   "
-            taxon = self.taxa[child_id]
-            print(f'{ancestor_prefix}{icon}{taxon.name} :: {taxon.left}:{taxon.right}')
-            if child_id in self.taxon_children:
-                self.print(child_id, f"{ancestor_prefix}{prefixIcon}")
-
-    # calculated nested set left and right values and depth representing how many nodes
-    # down the taxon is from Life. These can be later used for an efficient way to calculate
-    # if a taxon is a descendant of another
-    def assign_nested_values(self, taxon_id=0, index=0, depth=1, ancestors=[]):
-        for child_id in self.taxon_children[taxon_id]:
-            self.taxa[child_id].set("left", index)
-            self.taxa[child_id].set("depth", depth)
-            self.taxa[child_id].set("ancestors", ancestors)
-            index += 1
-            if child_id in self.taxon_children:
-                child_ancestors = ancestors + [child_id]
-                index = self.assign_nested_values(child_id, index, depth + 1, child_ancestors)
-            self.taxa[child_id].set("right", index)
-            index += 1
-        return index
-
-class TFGeoPriorModelEnv:
-
-    def __init__(self, model_path, taxonomy):
-        self.taxonomy = taxonomy
-        # initialize the geo model for inference
-        self.gpmodel = tf.keras.models.load_model(
-            model_path,
-            custom_objects={'ResLayer': ResLayer},
-            compile=False
-        )
-
-    def eval_one_class_elevation(self, latitude, longitude, elevation, class_of_interest):
-        """Evalutes the model for a single class and multiple locations
-
-        Args:
-            latitude (list): A list of latitudes
-            longitude (list): A list of longitudes (same length as latitude)
-            elevation (list): A list of elevations (same length as latitude)
-            class_of_interest (int): The single class to eval
-
-        Returns:
-            numpy array: scores for class of interest at each location
-        """
-        def encode_loc(latitude, longitude, elevation):
-            latitude = np.array(latitude)
-            longitude = np.array(longitude)
-            elevation = np.array(elevation)
-            elevation = elevation.astype("float32")
-            grid_lon = longitude.astype('float32') / 180.0
-            grid_lat = latitude.astype('float32') / 90.0
-            
-            elevation[elevation>0] = elevation[elevation>0]/6574.0
-            elevation[elevation<0] = elevation[elevation<0]/32768.0
-            norm_elev = elevation
-            
-            if np.isscalar(grid_lon):
-                grid_lon = np.array([grid_lon])
-            if np.isscalar(grid_lat):
-                grid_lat = np.array([grid_lat])
-            if np.isscalar(norm_elev):
-                norm_elev = np.array([norm_elev])
-                        
-            norm_loc = tf.stack([grid_lon, grid_lat], axis=1)
-
-            encoded_loc = tf.concat([
-                tf.sin(norm_loc * math.pi),
-                tf.cos(norm_loc * math.pi),
-                tf.expand_dims(norm_elev, axis=1),
-
-            ], axis=1)            
-            
-            return encoded_loc
-
-        encoded_loc = encode_loc(latitude, longitude, elevation)
-        loc_emb = self.gpmodel.layers[0](encoded_loc)
-        
-        # res layers - feature extraction
-        x = self.gpmodel.layers[1](loc_emb)
-        x = self.gpmodel.layers[2](x)
-        x = self.gpmodel.layers[3](x)
-        x = self.gpmodel.layers[4](x)
-        
-        # process just the one class
-        return tf.keras.activations.sigmoid(
-            tf.matmul(
-                x, 
-                tf.expand_dims(self.gpmodel.layers[5].weights[0][:,class_of_interest], axis=0),
-                transpose_b=True
-            )
-        ).numpy()
-    
-    def features_for_one_class_elevation(self, latitude, longitude, elevation):
-        """Evalutes the model for a single class and multiple locations
-
-        Args:
-            latitude (list): A list of latitudes
-            longitude (list): A list of longitudes (same length as latitude)
-            elevation (list): A list of elevations (same length as latitude)
-            class_of_interest (int): The single class to eval
-
-        Returns:
-            numpy array: scores for class of interest at each location
-        """
-        def encode_loc(latitude, longitude, elevation):
-            latitude = np.array(latitude)
-            longitude = np.array(longitude)
-            elevation = np.array(elevation)
-            elevation = elevation.astype("float32")
-            grid_lon = longitude.astype('float32') / 180.0
-            grid_lat = latitude.astype('float32') / 90.0
-            
-            elevation[elevation>0] = elevation[elevation>0]/6574.0
-            elevation[elevation<0] = elevation[elevation<0]/32768.0
-            norm_elev = elevation
-            
-            if np.isscalar(grid_lon):
-                grid_lon = np.array([grid_lon])
-            if np.isscalar(grid_lat):
-                grid_lat = np.array([grid_lat])
-            if np.isscalar(norm_elev):
-                norm_elev = np.array([norm_elev])
-                        
-            norm_loc = tf.stack([grid_lon, grid_lat], axis=1)
-
-            encoded_loc = tf.concat([
-                tf.sin(norm_loc * math.pi),
-                tf.cos(norm_loc * math.pi),
-                tf.expand_dims(norm_elev, axis=1),
-
-            ], axis=1)            
-            
-            return encoded_loc
-
-        encoded_loc = encode_loc(latitude, longitude, elevation)
-        loc_emb = self.gpmodel.layers[0](encoded_loc)
-        
-        # res layers - feature extraction
-        x = self.gpmodel.layers[1](loc_emb)
-        x = self.gpmodel.layers[2](x)
-        x = self.gpmodel.layers[3](x)
-        x = self.gpmodel.layers[4](x)
-        
-        # process just the one class
-        return x
-
-    def eval_one_class_elevation_from_features(self, x, class_of_interest):
-        return tf.keras.activations.sigmoid(
-            tf.matmul(
-                x, 
-                tf.expand_dims(self.gpmodel.layers[5].weights[0][:,class_of_interest], axis=0),
-                transpose_b=True
-            )
-        ).numpy()
 
 def evaluate_p_r(thres, gdfb, tr_h3, world, plot):
-    bp_h3 = gdfb[gdfb["pred"]>=thres].copy()
+    bp_h3 = gdfb[gdfb["pred"] >= thres].copy()
     area = bp_h3.shape[0]
     if area == 0:
         return None, None, None
-    tt = tr_h3.h3.h3_to_geo_boundary()[['geometry']].copy()
-    fp_map = bp_h3[~bp_h3.index.isin(tt.index)].h3.h3_to_geo_boundary()[['geometry']].copy()
+    tt = tr_h3.h3.h3_to_geo_boundary()[["geometry"]].copy()
+    fp_map = bp_h3[~bp_h3.index.isin(tt.index)].h3.h3_to_geo_boundary()[["geometry"]].copy()
     fp_map = fp_map.set_geometry(fp_map.geometry.apply(push_right))
     fp_map["score"] = 1
-    tp_map = tt[tt.index.isin(bp_h3.index)][['geometry']].copy()
+    tp_map = tt[tt.index.isin(bp_h3.index)][["geometry"]].copy()
     tp_map["score"] = 2
-    fn_map = tt[~tt.index.isin(bp_h3.index)][['geometry']].copy()
+    fn_map = tt[~tt.index.isin(bp_h3.index)][["geometry"]].copy()
     fn_map["score"] = 3
     kappa_map = pd.concat([fp_map, tp_map, fn_map], axis=0)
-    
-    fp=kappa_map[kappa_map["score"]==1].shape[0] #fp
-    tp=kappa_map[kappa_map["score"]==2].shape[0] #tp
-    fn=kappa_map[kappa_map["score"]==3].shape[0] #fn
-    p = tp/(tp+fp)
-    r = tp/(fn+tp)
-    
-    if plot==True:
+
+    fp = kappa_map[kappa_map["score"] == 1].shape[0]  # fp
+    tp = kappa_map[kappa_map["score"] == 2].shape[0]  # tp
+    fn = kappa_map[kappa_map["score"] == 3].shape[0]  # fn
+    if tp + fp == 0 or fn + tp == 0:
+        return None, None, None
+    p = tp / (tp + fp)
+    r = tp / (fn + tp)
+
+    if plot is True:
         print("Precision: " + str(p))
         print("Recall: " + str(r))
         kappa_map_geometry_total_bounds = kappa_map.geometry.total_bounds
         if np.isnan(kappa_map_geometry_total_bounds).any():
-            minx, miny, maxx, maxy = [-180,  -90, 180,  90]
+            minx, miny, maxx, maxy = [-180, -90, 180, 90]
         else:
             minx, miny, maxx, maxy = kappa_map_geometry_total_bounds
         fig, ax = plt.subplots(figsize=(10, 10))
@@ -302,9 +62,10 @@ def evaluate_p_r(thres, gdfb, tr_h3, world, plot):
         ax.set_xlim(minx - .1, maxx + .1)
         ax.set_ylim(miny - .1, maxy + .1)
         plt.show()
-    
+
     return p, r, area
 
+
 def push_right(geom):
     def shift_pts(pts):
         for x, y in pts:
@@ -319,6 +80,7 @@ def shift_pts(pts):
     holes = list()
     return type(geom)(shell, holes)
 
+
 def get_prauc(gdfb, tr_h3, plot):
     bp_h3 = gdfb.copy()
     if bp_h3.shape[0] == 0:
@@ -330,35 +92,39 @@ def get_prauc(gdfb, tr_h3, plot):
     precision, recall, thresholds = precision_recall_curve(test, predictions)
     p1 = (2 * precision * recall)
     p2 = (precision + recall)
-    out = np.zeros( (len(p1)) )
-    fscore = np.divide(p1,p2, out=out, where=p2!=0)
+    out = np.zeros((len(p1)))
+    fscore = np.divide(p1, p2, out=out, where=p2 != 0)
     index = np.argmax(fscore)
     prthres = thresholds[index]
     prf1 = fscore[index]
     prprecision = precision[index]
     prrecall = recall[index]
     prauc = auc(recall, precision)
-    if plot==True:
+    if plot is True:
         print("PR AUC: " + str(prauc))
         fig, ax = plt.subplots()
-        ax.plot(recall, precision, color='purple')
-        ax.plot([recall[index]], [precision[index]], color='green', marker='o')
-        ax.set_title('Precision-Recall Curve')
-        ax.set_ylabel('Precision')
-        ax.set_xlabel('Recall')
+        ax.plot(recall, precision, color="purple")
+        ax.plot([recall[index]], [precision[index]], color="green", marker="o")
+        ax.set_title("Precision-Recall Curve")
+        ax.set_ylabel("Precision")
+        ax.set_xlabel("Recall")
         plt.show()
     return prauc, prthres, prf1, prprecision, prrecall
 
+
 def main(args):
     print("read in the taxonomy...")
-    taxa = pd.read_csv(args.taxonomy, usecols=["taxon_id","leaf_class_id","iconic_class_id"]).dropna(subset=['leaf_class_id'])
+    taxa = pd.read_csv(
+        args.taxonomy,
+        usecols=["taxon_id", "leaf_class_id", "iconic_class_id"]
+    ).dropna(subset=["leaf_class_id"])
     taxon_ids = taxa.taxon_id
     if args.stop_after is not None:
-                taxon_ids = taxon_ids[0:args.stop_after]
-    mt = ModelTaxonomy(args.taxonomy)
+        taxon_ids = taxon_ids[0:args.stop_after]
+    mtd = ModelTaxonomyDataframe(args.taxonomy, None)
 
     print("read in the model...")
-    tfgpm = TFGeoPriorModelEnv(args.model, mt)
+    tfgpm = TFGeoPriorModelElev(args.model)
 
     print("read in the taxon range recalls and thresholds...")
     taxon_range_recalls = pd.read_csv(args.taxon_range_recalls)
@@ -366,10 +132,10 @@ def main(args):
 
     print("reading in the elevation and world map...")
     im = tifffile.imread(args.elevation)
-    world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
+    world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
 
     print("processing elevation and making features...")
-    h3_resolution=4
+    h3_resolution = 4
     im_df = pd.DataFrame(im)
     im_df.index = np.linspace(90, -90, 2160)
     im_df.columns = np.linspace(-180, 180, 4320)
@@ -380,110 +146,113 @@ def main(args):
     im_df.columns = ["lat", "lng", "elevation"]
     elev_dfh3 = im_df.h3.geo_to_h3(h3_resolution)
     elev_dfh3 = elev_dfh3.drop(
-        columns=['lng', 'lat']
-    ).groupby("h3_0"+str(h3_resolution)).mean()
+        columns=["lng", "lat"]
+    ).groupby("h3_0" + str(h3_resolution)).mean()
     gdfk = elev_dfh3.h3.h3_to_geo()
     gdfk["lng"] = gdfk["geometry"].x
     gdfk["lat"] = gdfk["geometry"].y
     _ = gdfk.pop("geometry")
-    gdfk = gdfk.rename_axis('h3index')
+    gdfk = gdfk.rename_axis("h3index")
     feats = tfgpm.features_for_one_class_elevation(
         latitude=list(gdfk.lat),
         longitude=list(gdfk.lng),
         elevation=list(gdfk.elevation)
     )
-    
+
     print("looping through the taxa...")
     eval_output = []
     for taxon_id in tqdm(taxon_ids):
-        #check whether taxon represented in taxon range eval set
+        # check whether taxon represented in taxon range eval set
         if taxon_range_recalls[taxon_range_recalls.taxon_id.eq(taxon_id)].shape[0] == 0:
             continue
-        if taxon_range_recalls[(taxon_range_recalls['taxon_id'] == taxon_id) & (taxon_range_recalls['r'] > 0.9)].empty:
+        if taxon_range_recalls[
+            (taxon_range_recalls["taxon_id"] == taxon_id) & (taxon_range_recalls["recall"] > 0.9)
+        ].empty:
             continue
-        taxon_range_indicies = args.taxon_range_indicies+"/"+ str(taxon_id) +".csv"
-        if exists(taxon_range_indicies) == False:
+        taxon_range_indicies = args.taxon_range_indicies + "/" + str(taxon_id) + ".csv"
+        if not exists(taxon_range_indicies):
             continue
-    
-        #process taxon range
+
+        # process taxon range
         try:
             taxon_range_index = pd.read_csv(taxon_range_indicies, header=None)
-            taxon_range_index.rename(columns={0: 'h3index_new'}, inplace=True)
+            taxon_range_index.rename(columns={0: "h3index_new"}, inplace=True)
             tr_h3 = gdfk.loc[gdfk.index.isin(taxon_range_index.h3index_new)]
-        except:
+        except Exception:
             gc.collect()
             continue
 
-        #get model predictions and threshold
+        # get model predictions and threshold
         try:
-            class_of_interest = mt.node_key_to_leaf_class_id[taxon_id]
-        except:
+            class_of_interest = mtd.df.loc[taxon_id]["leaf_class_id"]
+        except Exception:
+            print("not in the model for some reason")
             continue
         preds = tfgpm.eval_one_class_elevation_from_features(feats, class_of_interest)
         gdfk["pred"] = tf.squeeze(preds).numpy()
-        thres = thresholds[thresholds.taxon_id==taxon_id].thres.values[0]
-    
-        #get precision, recall, prauc, and f1
+        thres = thresholds[thresholds.taxon_id == taxon_id].thres.values[0]
+
+        # get precision, recall, prauc, and f1
         p, r, area = evaluate_p_r(thres, gdfk, tr_h3, world, False)
-        if p == None or r == None or ((p+r)==0):
+        if p is None or r is None or ((p + r) == 0):
             f1 = None
         else:
             f1 = (2 * p * r) / (p + r)
         prauc, prthres, prf1, prprecision, prrecall = get_prauc(gdfk, tr_h3, False)
         area = h3.hex_area(h3_resolution)
-    
-        #store results
+
+        # store results
         row = {
             "taxon_id": taxon_id,
             "prauc": prauc,
             "p": p,
             "r": r,
             "f1": f1,
-            "taxon_range_area": len(tr_h3)*area,
+            "taxon_range_area": len(tr_h3) * area,
         }
         row_dict = dict(row)
         eval_output.append(row_dict)
-        
+
     eval_output_pd = pd.DataFrame(eval_output)
     print("evaluation statistics:")
-    print("\tPR-AUC: "+str(round(eval_output_pd.prauc.mean(),3)))
-    print("\tPrecision: "+str(round(eval_output_pd.p.mean(),3)))
-    print("\tRecall: "+str(round(eval_output_pd.r.mean(),3)))
-    print("\tF1: "+str(round(eval_output_pd.f1.mean(),3)))
+    print("\tPR-AUC: " + str(round(eval_output_pd.prauc.mean(), 3)))
+    print("\tPrecision: " + str(round(eval_output_pd.p.mean(), 3)))
+    print("\tRecall: " + str(round(eval_output_pd.r.mean(), 3)))
+    print("\tF1: " + str(round(eval_output_pd.f1.mean(), 3)))
 
     print("writing output...")
     eval_output_pd.to_csv(args.output_path)
 
+
 if __name__ == "__main__":
-    
-    info_str = '\nrun as follows\n' + \
-               '   python taxon_range_evaluation.py --elevation wc2.1_5m_elev.tif \n' + \
-               '   --model v2_8/no_full_shuffle_50k_buffer.h5 \n' + \
-               '   --taxonomy v2_8/taxonomy.csv\n' + \
-               '   --thresholds v2_8/tf_env_thresh.csv\n' + \
-               '   --taxon_range_recalls v2_8/taxon_range_recalls.csv\n' + \
-               '   --taxon_range_indicies v2_8/taxon_range_indicies\n' + \
-               '   --output_path v2_8/tf_env_eval_test.csv\n' + \
-               '   --stop_after 10\n'
-    
+
+    info_str = "\nrun as follows\n" + \
+               "   python taxon_range_evaluation.py --elevation wc2.1_5m_elev.tif \n" + \
+               "   --model v2_8/no_full_shuffle_50k_buffer.h5 \n" + \
+               "   --taxonomy v2_8/taxonomy.csv\n" + \
+               "   --thresholds v2_8/tf_env_thresh.csv\n" + \
+               "   --taxon_range_recalls v2_8/taxon_range_recalls.csv\n" + \
+               "   --taxon_range_indicies v2_8/taxon_range_indicies\n" + \
+               "   --output_path v2_8/tf_env_eval_test.csv\n" + \
+               "   --stop_after 10\n"
+
     parser = argparse.ArgumentParser(usage=info_str)
-    parser.add_argument('--elevation', type=str,
-                        help='Path to elev tif.', required=True)
-    parser.add_argument('--model', type=str,
-                        help='Path to tf model.', required=True)
-    parser.add_argument('--taxonomy', type=str,
-                        help='Path to taxonomy csv.', required=True)
-    parser.add_argument('--thresholds', type=str,
-                        help='Path to thresholds csv.', required=True)
-    parser.add_argument('--taxon_range_recalls', type=str,
-                        help='Path to taxon_range_recalls csv.', required=True)
-    parser.add_argument('--taxon_range_indicies', type=str,
-                        help='Path to indices dir.', required=True)                        
-    parser.add_argument('--output_path', type=str,
-                        help='file to write thesholds.', required=True)
-    parser.add_argument('--stop_after', type=int,
-            help='just run the first x taxa')
+    parser.add_argument("--elevation", type=str,
+                        help="Path to elev tif.", required=True)
+    parser.add_argument("--model", type=str,
+                        help="Path to tf model.", required=True)
+    parser.add_argument("--taxonomy", type=str,
+                        help="Path to taxonomy csv.", required=True)
+    parser.add_argument("--thresholds", type=str,
+                        help="Path to thresholds csv.", required=True)
+    parser.add_argument("--taxon_range_recalls", type=str,
+                        help="Path to taxon_range_recalls csv.", required=True)
+    parser.add_argument("--taxon_range_indicies", type=str,
+                        help="Path to indices dir.", required=True)
+    parser.add_argument("--output_path", type=str,
+                        help="file to write thesholds.", required=True)
+    parser.add_argument("--stop_after", type=int,
+                        help="just run the first x taxa")
     args = parser.parse_args()
 
     main(args)
-    
diff --git a/tests/test_inat_inferrer.py b/tests/test_inat_inferrer.py
index 9ba1a1c..26967f3 100644
--- a/tests/test_inat_inferrer.py
+++ b/tests/test_inat_inferrer.py
@@ -18,7 +18,7 @@ def test_initialization(self, inatInferrer):
         )
         tf.keras.models.load_model.assert_any_call(
             inatInferrer.config["tf_geo_elevation_model_path"],
-            custom_objects={'ResLayer': ResLayer},
+            custom_objects={"ResLayer": ResLayer},
             compile=False
         )
 
diff --git a/tests/test_model_taxonomy.py b/tests/test_model_taxonomy.py
deleted file mode 100644
index 167fff7..0000000
--- a/tests/test_model_taxonomy.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import pytest
-import os
-from lib.model_taxonomy import ModelTaxonomy
-
-
-@pytest.fixture()
-def taxonomy():
-    yield ModelTaxonomy(
-        os.path.realpath(os.path.dirname(__file__) + "/fixtures/taxonomy.csv")
-    )
-
-
-@pytest.fixture()
-def taxon(request, taxonomy):
-    yield next(v for k, v in taxonomy.taxa.items() if v.name == request.param)
-
-
-class TestModelTaxonomyDataframe:
-    def test_raise_error_on_missing_path(self):
-        with pytest.raises(FileNotFoundError):
-            ModelTaxonomy(
-                os.path.realpath("nonsense")
-            )
-
-    @pytest.mark.parametrize("taxon", ["Aramus guarauna"], indirect=True)
-    def test_loading_mapping(self, taxon):
-        assert taxon.id == 7
-        assert taxon.parent_id == 6
-        assert taxon.rank_level == 10
-        assert taxon.leaf_class_id == 1
-        assert taxon.name == "Aramus guarauna"
-
-    @pytest.mark.parametrize("taxon", ["Aramus guarauna"], indirect=True)
-    def test_nested_set_assigning(self, taxon):
-        assert taxon.left == 7
-        assert taxon.right == 8
-
-    def test_children_of_root(self, taxonomy):
-        children = taxonomy.taxon_children[0]
-        assert len(children) == 2
-        assert taxonomy.taxa[children[0]].name == "Animalia"
-        assert taxonomy.taxa[children[1]].name == "Plantae"
-
-    @pytest.mark.parametrize("taxon", ["Animalia"], indirect=True)
-    def test_children_of_taxon(self, taxonomy, taxon):
-        children = taxonomy.taxon_children[taxon.id]
-        assert len(children) == 1
-        assert taxonomy.taxa[children[0]].name == "Chordata"
-
-    def test_print(self, capsys, taxonomy):
-        taxonomy.print()
-        captured = capsys.readouterr()
-        assert "├──Animalia :: 0:23" in captured.out
-        assert "│   └──Chordata :: 1:22" in captured.out
diff --git a/tests/test_taxon.py b/tests/test_taxon.py
deleted file mode 100644
index 171cd48..0000000
--- a/tests/test_taxon.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from lib.taxon import Taxon
-
-
-class TestTaxon:
-    def test_initialization(self):
-        taxon = Taxon({"id": 0, "name": "Life"})
-        assert taxon.name == "Life"
-
-    def test_is_or_descendant_of_self(self):
-        taxon = Taxon({"id": 1})
-        assert taxon.is_or_descendant_of(taxon)
-
-    def test_is_or_descendant_of_taxon(self):
-        parent_taxon = Taxon({"id": 1, "left": 0, "right": 3})
-        child_taxon = Taxon({"id": 2, "left": 1, "right": 2})
-        assert child_taxon.is_or_descendant_of(parent_taxon)
-        assert not parent_taxon.is_or_descendant_of(child_taxon)
diff --git a/tests/test_tf_gp_elev_model.py b/tests/test_tf_gp_elev_model.py
index 65f0dae..8c01a3b 100644
--- a/tests/test_tf_gp_elev_model.py
+++ b/tests/test_tf_gp_elev_model.py
@@ -16,7 +16,7 @@ def test_initialization(self, mocker):
         TFGeoPriorModelElev(model_path)
         tf.keras.models.load_model.assert_called_once_with(
             model_path,
-            custom_objects={'ResLayer': ResLayer},
+            custom_objects={"ResLayer": ResLayer},
             compile=False
         )
 

From c4a946f88aa62eacb194b50862a7e14256a418b8 Mon Sep 17 00:00:00 2001
From: Patrick Leary <patrick.r.leary@gmail.com>
Date: Fri, 9 Feb 2024 10:51:40 -0500
Subject: [PATCH 2/4] refactor vision_testing to record obs-level stats, test
 directories, output to CSV; alternate aggregated scoring

---
 lib/inat_inferrer.py            |   1 +
 lib/inat_vision_api.py          | 112 +++++--
 lib/model_taxonomy_dataframe.py |  21 +-
 lib/model_test_data_exporter.py |   4 +-
 lib/templates/home.html         |   4 +-
 lib/test_observation.py         |   3 +
 lib/vision_testing.py           | 521 ++++++++++++++++++--------------
 test_model.py                   |  14 +-
 8 files changed, 423 insertions(+), 257 deletions(-)

diff --git a/lib/inat_inferrer.py b/lib/inat_inferrer.py
index 26e8d57..355b4e8 100644
--- a/lib/inat_inferrer.py
+++ b/lib/inat_inferrer.py
@@ -278,6 +278,7 @@ def aggregate_results(self, leaf_scores, filter_taxon, score_without_geo=False,
             print("\nTree of aggregated results:")
             ModelTaxonomyDataframe.print(thresholded_results, display_taxon_lambda=(
                 lambda row: f"{row.name}    ["
+                            f"ID:{row.taxon_id}, "
                             f"V:{round(row.aggregated_vision_score, 4)}, "
                             f"G:{round(row.aggregated_geo_score, 4)}, "
                             f"C:{round(row.aggregated_combined_score, 4)}, "
diff --git a/lib/inat_vision_api.py b/lib/inat_vision_api.py
index 42fd8b0..e6adfd0 100644
--- a/lib/inat_vision_api.py
+++ b/lib/inat_vision_api.py
@@ -4,10 +4,12 @@
 import urllib
 import uuid
 import json
+import pandas as pd
 
 from flask import Flask, request, render_template
 from web_forms import ImageForm
 from inat_inferrer import InatInferrer
+from lib.model_taxonomy_dataframe import ModelTaxonomyDataframe
 
 
 class InatVisionAPI:
@@ -107,6 +109,52 @@ def index_route(self):
         else:
             return render_template("home.html")
 
+    def best_leaves_from_aggregated_results(self, aggregated_results, iteration=0):
+        # use a lower threshold on the first pass to have higher representation from
+        # original model leaf taxa
+        selection_score_threshold = 0.05 if iteration == 0 else 0.1
+        remaining_results = aggregated_results.query(
+            f"selection_score > {selection_score_threshold}"
+        )
+        # set a rank level cutoff on higher taxa to include in results
+        if iteration > 0:
+            remaining_results = remaining_results.query(
+                "rank_level <= 30"
+            )
+        # after setting a cutoff, get the parent IDs of the remaining taxa
+        parent_taxon_ids = remaining_results["parent_taxon_id"].values  # noqa: F841
+        # the leaves of the pruned taxonomy (not leaves of the original taxonomy), are the
+        # taxa who are not parents of any remaining taxa
+        leaf_results = remaining_results.query("taxon_id not in @parent_taxon_ids")
+
+        # lower the scores of ancestors by the scores of the taxa being moved into the result set
+        for selection_score, aggregated_combined_score, left, right in zip(
+            leaf_results["selection_score"],
+            leaf_results["aggregated_combined_score"],
+            leaf_results["left"],
+            leaf_results["right"]
+        ):
+            self_and_ancestors = remaining_results.query(
+                f"left <= {left} and right >= {right}"
+            )
+            remaining_results.loc[
+                self_and_ancestors.index,
+                "selection_score"
+            ] -= selection_score
+            remaining_results.loc[
+                self_and_ancestors.index,
+                "aggregated_combined_score"
+            ] -= aggregated_combined_score
+
+        # stop picking taxa if one represents more than 80% of aggregated scores
+        if leaf_results["normalized_aggregated_combined_score"].max() >= 0.8:
+            remaining_results = pd.DataFrame()
+        else:
+            remaining_results = remaining_results.query(
+                "selection_score > 0.1"
+            )
+        return [leaf_results, remaining_results]
+
     def score_image(self, form, file_path, lat, lng, iconic_taxon_id, geomodel):
         score_without_geo = (form.score_without_geo.data == "true")
         filter_taxon = self.inferrer.lookup_taxon(iconic_taxon_id)
@@ -117,6 +165,50 @@ def score_image(self, form, file_path, lat, lng, iconic_taxon_id, geomodel):
         if form.aggregated.data == "true":
             aggregated_results = self.inferrer.aggregate_results(leaf_scores, filter_taxon,
                                                                  score_without_geo)
+            if form.format.data == "tree":
+                aggregated_results = aggregated_results.query(
+                    "normalized_aggregated_combined_score > 0.001"
+                )
+                printable_tree = ModelTaxonomyDataframe.printable_tree(
+                    aggregated_results,
+                    display_taxon_lambda=(
+                        lambda row: f"{row.name}\t\t["
+                                    f"ID:{row.taxon_id}, "
+                                    f"V:{round(row.aggregated_vision_score, 4)}, "
+                                    f"G:{round(row.aggregated_geo_score, 4)}, "
+                                    f"C:{round(row.aggregated_combined_score, 4)}, "
+                                    f"NC:{round(row.normalized_aggregated_combined_score, 4)}]"
+                    )
+                )
+                return "<pre>" + "<br/>".join(printable_tree) + "</pre>"
+
+            aggregated_results = aggregated_results.query(
+                "normalized_aggregated_combined_score > 0.05"
+            )
+
+            aggregated_results["selection_score"] = aggregated_results[
+                "normalized_aggregated_combined_score"
+            ]
+            iteration = 0
+            leaf_results, remaining_results = self.best_leaves_from_aggregated_results(
+                aggregated_results, iteration
+            )
+            while len(remaining_results.index) > 0:
+                iteration += 1
+                next_leaf_results, remaining_results = self.best_leaves_from_aggregated_results(
+                    remaining_results, iteration
+                )
+                leaf_results = pd.concat([leaf_results, next_leaf_results])
+
+            leaf_results = leaf_results.sort_values(
+                "aggregated_combined_score",
+                ascending=False
+            ).head(100)
+
+            score_columns = ["aggregated_combined_score", "aggregated_geo_score",
+                             "aggregated_vision_score", "aggregated_geo_threshold"]
+            leaf_results[score_columns] = leaf_results[score_columns].multiply(100, axis="index")
+
             columns_to_return = [
                 "aggregated_combined_score",
                 "aggregated_geo_score",
@@ -132,26 +224,6 @@ def score_image(self, form, file_path, lat, lng, iconic_taxon_id, geomodel):
                 "aggregated_vision_score": "vision_score",
                 "aggregated_geo_threshold": "geo_threshold"
             }
-
-            # set a cutoff where branches whose combined scores are below the threshold are ignored
-            # TODO: this threshold is completely arbitrary and needs testing
-            aggregated_results = aggregated_results.query(
-                "normalized_aggregated_combined_score > 0.05"
-            )
-
-            # after setting a cutoff, get the parent IDs of the remaining taxa
-            parent_taxon_ids = aggregated_results["parent_taxon_id"].values  # noqa: F841
-            # the leaves of the pruned taxonomy (not leaves of the original taxonomy), are the
-            # taxa who are not parents of any remaining taxa
-            leaf_results = aggregated_results.query("taxon_id not in @parent_taxon_ids")
-
-            leaf_results = leaf_results.sort_values(
-                "aggregated_combined_score",
-                ascending=False
-            ).head(100)
-            score_columns = ["aggregated_combined_score", "aggregated_geo_score",
-                             "aggregated_vision_score", "aggregated_geo_threshold"]
-            leaf_results[score_columns] = leaf_results[score_columns].multiply(100, axis="index")
             final_results = leaf_results[columns_to_return].rename(columns=column_mapping)
         else:
             top_combined_score = leaf_scores.sort_values(
diff --git a/lib/model_taxonomy_dataframe.py b/lib/model_taxonomy_dataframe.py
index 4264d1f..d792e41 100644
--- a/lib/model_taxonomy_dataframe.py
+++ b/lib/model_taxonomy_dataframe.py
@@ -56,10 +56,10 @@ def load_mapping(self, path, thresholds_path):
     def assign_nested_values(self, taxon_id=0, index=0, ancestor_taxon_ids=[]):
         for child_id in self.taxon_children[taxon_id]:
             self.df.at[self.taxon_row_mapping[child_id], "left"] = index
-            self.taxon_ancestors[child_id] = ancestor_taxon_ids
+            child_ancestor_taxon_ids = ancestor_taxon_ids + [child_id]
+            self.taxon_ancestors[child_id] = child_ancestor_taxon_ids
             index += 1
             if child_id in self.taxon_children:
-                child_ancestor_taxon_ids = ancestor_taxon_ids + [child_id]
                 index = self.assign_nested_values(child_id, index, child_ancestor_taxon_ids)
             self.df.at[self.taxon_row_mapping[child_id], "right"] = index
             index += 1
@@ -73,26 +73,35 @@ def children(df, taxon_id):
 
     @staticmethod
     def print(df, taxon_id=0, ancestor_prefix="", display_taxon_lambda=None):
+        print("\n".join(ModelTaxonomyDataframe.printable_tree(
+            df, taxon_id, ancestor_prefix, display_taxon_lambda
+        )))
+
+    @staticmethod
+    def printable_tree(df, taxon_id=0, ancestor_prefix="", display_taxon_lambda=None):
         children = ModelTaxonomyDataframe.children(df, taxon_id)
         index = 0
         if "aggregated_combined_score" in children:
             children = children.sort_values("aggregated_combined_score", ascending=False)
         else:
             children = children.sort_values("name")
+        linesToPrint = []
         for row in children.itertuples():
             last_in_branch = (index == len(children) - 1)
             index += 1
             icon = "└──" if last_in_branch else "├──"
             prefixIcon = "   " if last_in_branch else "│   "
-            print(f"{ancestor_prefix}{icon}", end="")
+            lineToPrint = f"{ancestor_prefix}{icon}"
             if display_taxon_lambda is None:
-                print(f"{row.name} :: {row.left}:{row.right}")
+                lineToPrint += f"{row.name} :: {row.left}:{row.right}"
             else:
-                print(display_taxon_lambda(row))
+                lineToPrint += display_taxon_lambda(row)
+            linesToPrint.append(lineToPrint)
             if row.right != row.left + 1:
-                ModelTaxonomyDataframe.print(
+                linesToPrint += ModelTaxonomyDataframe.printable_tree(
                     df,
                     row.taxon_id,
                     f"{ancestor_prefix}{prefixIcon}",
                     display_taxon_lambda
                 )
+        return linesToPrint
diff --git a/lib/model_test_data_exporter.py b/lib/model_test_data_exporter.py
index f0b32c0..1fa554f 100644
--- a/lib/model_test_data_exporter.py
+++ b/lib/model_test_data_exporter.py
@@ -89,7 +89,7 @@ async def generate_test_data(self):
             await self.fetch_more_data()
 
     async def fetch_more_data(self):
-        self.queue = asyncio.Queue(ModelTestDataExporter.N_WORKERS)
+        self.queue = asyncio.Queue()
         self.workers = [asyncio.create_task(self.worker_task())
                         for _ in range(ModelTestDataExporter.N_WORKERS)]
         min_pages_remaining = math.ceil(
@@ -97,7 +97,7 @@ async def fetch_more_data(self):
         )
         print(f"Queueing {min_pages_remaining} workers")
         for i in range(min_pages_remaining):
-            await self.queue.put(i)
+            self.queue.put_nowait(i)
         await self.queue.join()
         for worker in self.workers:
             worker.cancel()
diff --git a/lib/templates/home.html b/lib/templates/home.html
index 37244e9..649d3ee 100644
--- a/lib/templates/home.html
+++ b/lib/templates/home.html
@@ -23,18 +23,20 @@ <h2>Slim vs Legacy Model</h2>
     Lng: <input type="test" name="lng" value="-70">
     <br/>
     <select name="format">
-      <option value="html">HTML</option>
       <option value="json">JSON</option>
+      <option value="tree">Tree</option>
     </select>
     <br/>
     <select name="geomodel">
       <option value="true">With Geo</option>
       <option value="false">Original</option>
     </select>
+    <br/>
     <select name="aggregated">
       <option value="false">Original</option>
       <option value="true">Aggregated</option>
     </select>
+    <br/>
     <select name="score_without_geo">
       <option value="false">Geo affects score</option>
       <option value="true">Geo does not affect score</option>
diff --git a/lib/test_observation.py b/lib/test_observation.py
index e94e953..062b9e7 100644
--- a/lib/test_observation.py
+++ b/lib/test_observation.py
@@ -14,6 +14,9 @@ class TestObservation:
     def __init__(self, row):
         row["taxon_ancestry"] = row["taxon_ancestry"].split("/")
         row["taxon_ancestry"] = list(map(int, row["taxon_ancestry"]))
+        # remove life
         row["taxon_ancestry"].pop(0)
         for key in row:
             setattr(self, key, row[key])
+        self.inferrer_scores = None
+        self.summarized_results = {}
diff --git a/lib/vision_testing.py b/lib/vision_testing.py
index 2da6329..e306fec 100644
--- a/lib/vision_testing.py
+++ b/lib/vision_testing.py
@@ -4,12 +4,13 @@
 import time
 import json
 import pandas as pd
-import tensorflow as tf
 import asyncio
 import aiohttp
 import aiofiles
 import aiofiles.os
-from statistics import mean
+import re
+import traceback
+from datetime import datetime
 from PIL import Image
 from lib.test_observation import TestObservation
 from lib.inat_inferrer import InatInferrer
@@ -20,80 +21,120 @@ class VisionTesting:
     def __init__(self, config, **args):
         self.cmd_args = args
         self.inferrers = {}
-        self.scores = {}
-        score_types = ["matching_indices", "top1_distance_scores",
-                       "top5_distance_scores", "top10_distance_scores",
-                       "sum_ancestor_distance_scores", "average_ancestor_distance_scores"]
-        for score_type in score_types:
-            self.scores[score_type] = {
-                "vision": {},
-                "combined": {}
-            }
+        self.upload_folder = "static/"
+        currentDatetime = datetime.now()
+        self.start_timestamp = currentDatetime.strftime("%Y%m%d")
+        self.set_run_hash(config)
+
         print("Models:")
-        for index, model_config in enumerate(config["models"]):
+        for inferrer_index, model_config in enumerate(config["models"]):
             print(json.dumps(model_config, indent=4))
-            model_name = model_config["name"] if "name" in model_config else f"Model {index}"
+            model_name = model_config["name"] if "name" in model_config \
+                else f"Model {inferrer_index}"
             model_config["name"] = model_name
-            for score_type in score_types:
-                self.scores[score_type]["vision"][index] = []
-                self.scores[score_type]["combined"][index] = []
-
-            self.inferrers[index] = InatInferrer(model_config)
-        print("\n")
-        self.upload_folder = "static/"
+            self.inferrers[inferrer_index] = InatInferrer(model_config)
 
-    async def worker_task(self):
-        while not self.queue.empty():
-            observation = await self.queue.get()
-            try:
-                if self.processed_counter >= self.limit:
-                    continue
-                obs = TestObservation(observation.to_dict())
-                inferrer_results = await self.test_observation_async(obs)
-                if inferrer_results is False:
-                    continue
-                self.append_to_aggregate_results(obs, inferrer_results)
-                self.processed_counter += 1
-                self.report_progress()
+    def set_run_hash(self, config):
+        run_hash_dict = dict(config)
+        run_hash_dict.update(self.cmd_args)
+        dhash = hashlib.md5()
+        encoded = json.dumps(run_hash_dict, sort_keys=True).encode()
+        dhash.update(encoded)
+        self.run_hash = dhash.hexdigest()
 
-            except Exception as err:
-                print(f'\nObservation: {observation["observation_id"]} failed')
-                print(err)
-
-            finally:
-                self.queue.task_done()
+    def export_path(self, filename_addition=None, label=None):
+        if label is None:
+            label = self.cmd_args["label"]
+        export_path = f"test-results-{self.start_timestamp}-{label}-{self.run_hash}"
+        if filename_addition:
+            export_path += f"-{filename_addition}"
+        export_path += ".csv"
+        if self.cmd_args["data_dir"]:
+            export_path = os.path.join(self.cmd_args["data_dir"], export_path)
+        return export_path
 
     async def run_async(self):
+        if self.cmd_args["data_dir"]:
+            for file in sorted(os.listdir(self.cmd_args["data_dir"])):
+                exported_data_filename_match = re.search(r"test-obs-[0-9]{8}-(.*).csv", file)
+                if exported_data_filename_match is None:
+                    continue
+                label = exported_data_filename_match.group(1)
+                path = os.path.join(self.cmd_args["data_dir"], file)
+                print(f"\nProcessing {file}")
+                await self.testObservationsAtPath(path, label)
+                self.display_and_save_results(label)
+        else:
+            print(f"\nProcessing {self.cmd_args['path']}")
+            await self.testObservationsAtPath(self.cmd_args["path"], self.cmd_args["label"])
+            self.display_and_save_results(self.cmd_args["label"])
+
+    async def testObservationsAtPath(self, path, label):
         N_WORKERS = 5
-        # queue = asyncio.Queue(N_WORKERS)
         self.limit = self.cmd_args["limit"] or 100
         target_observation_id = self.cmd_args["observation_id"]
         self.start_time = time.time()
         self.queued_counter = 0
         self.processed_counter = 0
+        self.test_observations = {}
 
         async with aiohttp.ClientSession() as self.session:
-            self.queue = asyncio.Queue(N_WORKERS)
-            self.workers = [asyncio.create_task(self.worker_task())
-                            for _ in range(N_WORKERS)]
-            df = pd.read_csv(self.cmd_args["path"])
+            self.queue = asyncio.Queue()
+            self.workers = [
+                asyncio.create_task(self.worker_task()) for _ in range(N_WORKERS)
+            ]
+            df = pd.read_csv(
+                path,
+                usecols=[
+                    "observation_id",
+                    "observed_on",
+                    "iconic_taxon_id",
+                    "taxon_id",
+                    "taxon_ancestry",
+                    "lat",
+                    "lng",
+                    "photo_url"
+                ],
+                dtype={
+                    "iconic_taxon_id": float,
+                    "taxon_id": int,
+                    "lat": float,
+                    "lng": float
+                }
+            )
             for index, observation in df.iterrows():
-                if self.processed_counter >= self.limit:
-                    break
-                if target_observation_id:
-                    if observation.observation_id == target_observation_id:
-                        await self.queue.put(observation)
-                    else:
-                        continue
-                else:
-                    await self.queue.put(observation)
-
-            # processes the remaining queue
+                if target_observation_id and observation.observation_id != target_observation_id:
+                    continue
+                obs = TestObservation(observation.to_dict())
+                self.test_observations[obs.observation_id] = obs
+                self.queue.put_nowait(obs.observation_id)
+
+            # processes the queue
             await self.queue.join()
             # stop the workers
             for worker in self.workers:
                 worker.cancel()
-            return
+
+    async def worker_task(self):
+        while not self.queue.empty():
+            observation_id = await self.queue.get()
+            try:
+                if self.processed_counter >= self.limit:
+                    continue
+                observation = self.test_observations[observation_id]
+                await self.test_observation_async(observation)
+                if observation.inferrer_scores is None:
+                    continue
+                self.processed_counter += 1
+                self.report_progress()
+
+            except Exception as err:
+                print(f"\nObservation: {observation_id} failed")
+                print(traceback.format_exc())
+                print(err)
+
+            finally:
+                self.queue.task_done()
 
     # given an x, return the number of scores less than x. Otherwise return the number
     # of scores that are empty or greather than or equal to 100 (essentially the fails)
@@ -108,95 +149,112 @@ def top_x_percent(self, x, scores):
         top_x = self.top_x(x, scores)
         return round((top_x / count) * 100, 2)
 
-    def print_scores(self):
-        for index, inferrer in self.inferrers.items():
-            all_metrics = {}
-            for method in ["vision", "combined"]:
-                scores = self.scores["matching_indices"][method][index]
-                top1_distance_scores = self.scores["top1_distance_scores"][method][index]
-                top5_distance_scores = self.scores["top5_distance_scores"][method][index]
-                top10_distance_scores = self.scores["top10_distance_scores"][method][index]
-                metrics = {}
-                metrics["count"] = len(scores)
-                metrics["top1"] = self.top_x(1, scores)
-                metrics["top5"] = self.top_x(5, scores)
-                metrics["top10"] = self.top_x(10, scores)
-                metrics["notIn"] = self.top_x(None, scores)
-                metrics["top1%"] = self.top_x_percent(1, scores)
-                metrics["top5%"] = self.top_x_percent(5, scores)
-                metrics["top10%"] = self.top_x_percent(10, scores)
-                metrics["notIn%"] = self.top_x_percent(None, scores)
-                metrics["top1∆"] = round(
-                    (sum(top1_distance_scores) / metrics["count"]) * 100, 2)
-                metrics["top5∆"] = round(
-                    (sum(top5_distance_scores) / metrics["count"]) * 100, 2)
-                metrics["top10∆"] = round(
-                    (sum(top10_distance_scores) / metrics["count"]) * 100, 2)
-                metrics["avg∆"] = round((
-                    mean(
-                        self.scores["average_ancestor_distance_scores"][method][index]
-                    ) / metrics["count"]
-                ) * 100, 2)
-                metrics["sum∆"] = round((
-                    mean(
-                        self.scores["sum_ancestor_distance_scores"][method][index]
-                    ) / metrics["count"]
-                ) * 100, 2)
-                all_metrics[method] = metrics
-
-            print("method  " + "\t" + "\t".join(all_metrics["vision"].keys()))
-            for method in ["vision", "combined"]:
-                stat_label = inferrer.config["name"] + "-" + self.cmd_args["label"] + "-" + method
-                print(f"{stat_label.ljust(10)}\t" + "\t".join(
-                    str(value) for value in all_metrics[method].values()))
-            print("\n")
-
-    def assess_top_results(self, observation, top_results):
-        match_index = None
-        distance_scores = []
-        for index, row in top_results.reset_index(drop=True).iterrows():
-            if row["taxon_id"] == int(observation.taxon_id):
-                match_index = index
-
-            if index < 10:
-                if row["taxon_id"] == int(observation.taxon_id):
-                    # the taxa match, so the taxon distance score is 1
-                    distance_scores.append(1)
-                    break
-
-                # if this is a top 10 result but not a match, append to taxon_scores
-                # some measure of how far away this taxon is from the expected correct taxon using
-                # (1 - [index of match in reversed target ancestry]/[lenth of target ancestry])
-                # e.g. if the ancestry is 1/2/3/4/5/6/7/8 and this result has an ancestry of
-                # 1/2/3/4/5, the match occcurs at taxon 5, which is in (reverse 0-indexed)
-                # position 3 in the target taxon's ancestry, out of 8 taxa in that ancestry.
-                # So the taxon score will be (1 - (3/8))^2, or (.625)^2, or 0.3090625
-                # NOTE: This is experimental and needs testing
-                try:
-                    taxon_match_index = observation.taxon_ancestry[::-1].index(row["taxon_id"])
-                except ValueError:
-                    taxon_match_index = None
-                if taxon_match_index:
-                    distance_score = (1 - (taxon_match_index / len(observation.taxon_ancestry)))**2
-                    distance_scores.append(distance_score)
-                    break
-                else:
-                    distance_scores.append(0)
-        return match_index, distance_scores
+    def display_and_save_results(self, label):
+        scored_observations = list(filter(
+            lambda observation: len(observation.summarized_results) > 0,
+            self.test_observations.values()
+        ))
+        if len(scored_observations) == 0:
+            return
+
+        # extract a summary score for each observation for each inferrer and scoring method
+        keys = scored_observations[0].summarized_results[0].keys()
+        all_obs_scores = []
+        for obs in scored_observations:
+            for inferrer_index, inferrer in self.inferrers.items():
+                for method in keys:
+                    scores = {
+                        "inferrer_name": inferrer.config["name"],
+                        "label": label,
+                        "method": method,
+                        "uuid": obs.observation_id
+                    }
+                    scores.update(obs.summarized_results[inferrer_index][method])
+                    all_obs_scores.append(scores)
+        pd.set_option("display.max_rows", None)
+        all_obs_scores_df = pd.DataFrame(all_obs_scores)
+        all_obs_scores_df["run_label"] = all_obs_scores_df[
+            ["inferrer_name", "label", "method"]
+        ].agg("-".join, axis=1)
+        observations_export_path = self.export_path("observations", label=label)
+        all_obs_scores_df.to_csv(observations_export_path)
+
+        # generate a summary score of all observations for each inferrer and scoring method
+        grouped_stats = all_obs_scores_df.groupby("run_label").agg(
+            inferrer_name=("inferrer_name", "max"),
+            label=("label", "max"),
+            method=("method", "max"),
+            count=("label", "count"),
+        )
+        top1 = all_obs_scores_df.query("matching_index == 0").groupby("run_label").agg(
+            top1=("label", "count"),
+        )
+        top5 = all_obs_scores_df.query("matching_index < 5").groupby("run_label").agg(
+            top5=("label", "count"),
+        )
+        top10 = all_obs_scores_df.query("matching_index < 10").groupby("run_label").agg(
+            top10=("label", "count"),
+        )
+        notIn = all_obs_scores_df.query("matching_index.isna()").groupby("run_label").agg(
+            notIn=("label", "count"),
+        )
+        grouped_stats = grouped_stats.merge(
+            top1, how="left", left_on="run_label", right_on="run_label"
+        )
+        grouped_stats = grouped_stats.merge(
+            top5, how="left", left_on="run_label", right_on="run_label"
+        )
+        grouped_stats = grouped_stats.merge(
+            top10, how="left", left_on="run_label", right_on="run_label"
+        )
+        grouped_stats = grouped_stats.merge(
+            notIn, how="left", left_on="run_label", right_on="run_label"
+        )
+        grouped_stats["top1%"] = round((grouped_stats["top1"] / grouped_stats["count"]) * 100, 2)
+        grouped_stats["top5%"] = round((grouped_stats["top5"] / grouped_stats["count"]) * 100, 2)
+        grouped_stats["top10%"] = round((grouped_stats["top10"] / grouped_stats["count"]) * 100, 2)
+        grouped_stats["notIn%"] = round((grouped_stats["notIn"] / grouped_stats["count"]) * 100, 2)
+
+        agg_stats = all_obs_scores_df.groupby("run_label").agg(
+            average_results_count=("results_count", "mean"),
+            precision=("precision", "mean"),
+            recall=("recall", "mean"),
+            f1=("f1", "mean")
+        )
+        grouped_stats = grouped_stats.merge(
+            agg_stats, how="left", left_on="run_label", right_on="run_label"
+        )
+        grouped_stats["average_results_count"] = grouped_stats["average_results_count"].round(4)
+        grouped_stats["precision"] = grouped_stats["precision"].round(4)
+        grouped_stats["recall"] = grouped_stats["recall"].round(4)
+        grouped_stats["f1"] = grouped_stats["f1"].round(4)
+        grouped_stats = grouped_stats.sort_index(ascending=False)
+        print(grouped_stats[grouped_stats.columns.difference(["inferrer_name", "label", "method"])])
+
+        grouped_stats_export_path = self.export_path("summary", label=label)
+        grouped_stats.to_csv(grouped_stats_export_path)
 
     async def test_observation_async(self, observation):
         cache_path = await self.download_photo_async(observation.photo_url)
-        if cache_path is None or not os.path.exists(cache_path):
-            return False
-        if observation.lat == "" or observation.lng == "":
-            return False
+
+        # due to asynchronous processing, the requested limit of observations to test
+        # has been reached, so do not test this observation. The rest of this method
+        # will be processed synchronously, so no need to check this again this method
+        if self.processed_counter >= self.limit:
+            return
+
+        if cache_path is None \
+           or not os.path.exists(cache_path) \
+           or observation.lat == "" \
+           or observation.lng == "":
+            return
 
         iconic_taxon_id = None
         if observation.iconic_taxon_id != "" and self.cmd_args["filter_iconic"] is not False:
-            iconic_taxon_id = int(observation.iconic_taxon_id)
+            iconic_taxon_id = observation.iconic_taxon_id
 
         inferrer_scores = {}
-        for index, inferrer in self.inferrers.items():
+        for inferrer_index, inferrer in self.inferrers.items():
             lat = None
             lng = None
             filter_taxon = inferrer.lookup_taxon(iconic_taxon_id)
@@ -204,89 +262,75 @@ async def test_observation_async(self, observation):
                 lat = observation.lat
                 lng = observation.lng
             try:
-                inferrer_scores[index] = inferrer.predictions_for_image(
+                inferrer_all_scores = inferrer.predictions_for_image(
                     cache_path, lat, lng, filter_taxon
                 )
+                # only look at the top 100 results for this testing
+                inferrer_scores[inferrer_index] = {
+                    "vision": inferrer_all_scores.sort_values(
+                        "vision_score", ascending=False
+                    ).reset_index(drop=True).head(100),
+                    "combined": inferrer_all_scores.sort_values(
+                        "combined_score", ascending=False
+                    ).reset_index(drop=True).head(100),
+                    "combined_nearby": inferrer_all_scores.query(
+                        "geo_score >= geo_threshold"
+                    ).sort_values(
+                        "combined_score", ascending=False
+                    ).reset_index(drop=True).head(100),
+                }
             except Exception as e:
+                print(f"Error scoring observation {observation.observation_id}")
                 print(e)
-                print(f"\nError scoring observation {observation.observation_id}")
-                return False
-        return inferrer_scores
-
-    def ancestor_distance_scores(self, observation, inferrer, results):
-        reversed_target_ancestors = observation.taxon_ancestry[::-1]
-        ancestor_distance_scores = []
-        # for each top result
-        for index, row in results.iterrows():
-            result_ancestors = inferrer.taxonomy.df.query(
-                f'left <= {row["left"]} and right >= {row["right"]}'
-            ).sort_values("left", ascending=False).reset_index(drop=True)
-            result_ancestor_match_index = None
-            # find the most specific taxon in the result's taxon's ancestry that is also in
-            # the target taxon's ancestry
-            for ancestor_index, ancestor_row in result_ancestors.iterrows():
-                if ancestor_row["taxon_id"] in reversed_target_ancestors:
-                    result_ancestor_match_index = ancestor_index
-                    break
-            if result_ancestor_match_index is None:
-                result_ancestor_match_index = len(reversed_target_ancestors)
-            # calculate a score of how far from species the result matched the target
-            ancestor_distance_scores.append((1 - (
-                result_ancestor_match_index / len(reversed_target_ancestors)
-            ))**2)
-        return ancestor_distance_scores
-
-    def append_to_aggregate_results(self, observation, inferrer_scores):
-        if self.processed_counter >= self.limit:
-            return
-        vision_indices = set()
-        combined_indices = set()
-        for index, results in inferrer_scores.items():
-            # only look at the top 100 results for this testing
-
-            top100_vision = results.sort_values("vision_score", ascending=False).head(100)
-            top100_combined = results.sort_values("combined_score", ascending=False).head(100)
-
-            vision_index, vision_taxon_distance_scores = self.assess_top_results(
-                observation, top100_vision)
-            combined_index, combined_taxon_distance_scores = self.assess_top_results(
-                observation, top100_combined)
-
-            vision_ancestor_distance_scores = self.ancestor_distance_scores(
-                observation, self.inferrers[index], top100_vision.head(10))
-            combined_ancestor_distance_scores = self.ancestor_distance_scores(
-                observation, self.inferrers[index], top100_combined.head(10))
-
-            self.scores["sum_ancestor_distance_scores"]["vision"][index].append(
-                sum(vision_ancestor_distance_scores))
-            self.scores["average_ancestor_distance_scores"]["vision"][index].append(
-                mean(vision_ancestor_distance_scores))
-            self.scores["sum_ancestor_distance_scores"]["combined"][index].append(
-                sum(combined_ancestor_distance_scores))
-            self.scores["average_ancestor_distance_scores"]["combined"][index].append(
-                mean(combined_ancestor_distance_scores))
-
-            vision_indices.add(vision_index)
-            combined_indices.add(combined_index)
-            self.scores["matching_indices"]["vision"][index].append(vision_index)
-            self.scores["matching_indices"]["combined"][index].append(combined_index)
-            # top1 distance score is just the taxon_distance_score if the first result
-            self.scores["top1_distance_scores"]["vision"][index].append(
-                vision_taxon_distance_scores[0])
-            if len(combined_taxon_distance_scores) > 0:
-                self.scores["top1_distance_scores"]["combined"][index].append(
-                    combined_taxon_distance_scores[0])
-            # for taxon_distance, top n is the max score of the top n results, or the
-            # taxon_distance_score of the most closely related taxon in the first n results
-            self.scores["top5_distance_scores"]["vision"][index].append(
-                max(vision_taxon_distance_scores[0:5]))
-            if len(combined_taxon_distance_scores) > 0:
-                self.scores["top5_distance_scores"]["combined"][index].append(
-                    max(combined_taxon_distance_scores[0:5]))
-                self.scores["top10_distance_scores"]["vision"][index].append(
-                    max(vision_taxon_distance_scores[0:10]))
-                self.scores["top10_distance_scores"]["combined"][index].append(
-                    max(combined_taxon_distance_scores[0:10]))
+                return
+
+        observation.inferrer_scores = inferrer_scores
+        self.summarize_results(observation)
+
+    def matching_index(self, observation, results):
+        matching_indices = results.index[
+            results["taxon_id"] == observation.taxon_id
+        ].tolist()
+        return matching_indices[0] if len(matching_indices) > 0 else None
+
+    def summarize_results(self, observation):
+        for inferrer_index, results in observation.inferrer_scores.items():
+            observation.summarized_results[inferrer_index] = {}
+            for results_index, results_scores in results.items():
+                self.summarize_result_subset(observation, inferrer_index, results, results_index)
+                self.summarize_result_subset(
+                    observation, inferrer_index, results, results_index, cutoff=True
+                )
+
+    def summarize_result_subset(self, observation, index, results, subset, cutoff=False):
+        working_results = results[subset]
+        summary_label = subset
+        if cutoff:
+            summary_label += "-cutoff"
+            score_column = "vision_score" if subset == "vision" else "combined_score"
+            values = results[subset].head(1)[score_column].values
+            if len(values) == 0:
+                top_score = 0
+            else:
+                top_score = values[0]
+            working_results = results[subset].query(
+                f"{score_column} > {top_score * 0.001}"
+            ).head(10)
+
+        matching_index = self.matching_index(observation, working_results)
+
+        results_count = len(working_results.index)
+        summary = {
+            "results_count": results_count,
+            "matching_index": matching_index,
+            "recall": 1 if matching_index is not None else 0,
+            "precision": 0 if matching_index is None else 1 / results_count,
+        }
+        sum_of_precision_and_recall = summary["precision"] + summary["recall"]
+        summary["f1"] = 0 if sum_of_precision_and_recall == 0 else (
+            2 * summary["precision"]
+        ) / sum_of_precision_and_recall
+        observation.summarized_results[index][summary_label] = summary
 
     async def download_photo_async(self, photo_url):
         checksum = hashlib.md5(photo_url.encode()).hexdigest()
@@ -313,9 +357,46 @@ def debug(self, message):
 
     def report_progress(self):
         if self.processed_counter % 10 == 0:
-            total_time = round(time.time() - self.start_time, 3)
+            total_time = round(time.time() - self.start_time, 2)
             remaining_time = round((
                 self.limit - self.processed_counter
-            ) / (self.processed_counter / total_time), 3)
-            print(f"Processed {self.processed_counter} in {total_time} sec\t"
-                  f"estimated {remaining_time} sec remaining")
+            ) / (self.processed_counter / total_time), 2)
+            rate = round(self.processed_counter / total_time, 2)
+            print(
+                f"Processed {self.processed_counter} in {total_time} sec  \t"
+                f"{rate}/sec  \t"
+                f"estimated {remaining_time} sec remaining\t"
+            )
+
+    # def assess_top_results(self, observation, top_results):
+    #     match_index = None
+    #     distance_scores = []
+    #     for index, row in top_results.reset_index(drop=True).iterrows():
+    #         if row["taxon_id"] == observation.taxon_id:
+    #             match_index = index
+
+    #         if index < 10:
+    #             if row["taxon_id"] == observation.taxon_id:
+    #                 # the taxa match, so the taxon distance score is 1
+    #                 distance_scores.append(1)
+    #                 break
+
+    #             # if this is a top 10 result but not a match, append to taxon_scores
+    #             # some measure of how far away this taxon is from the expected correct taxon using
+    #             # (1 - [index of match in reversed target ancestry]/[lenth of target ancestry])
+    #             # e.g. if the ancestry is 1/2/3/4/5/6/7/8 and this result has an ancestry of
+    #             # 1/2/3/4/5, the match occcurs at taxon 5, which is in (reverse 0-indexed)
+    #             # position 3 in the target taxon's ancestry, out of 8 taxa in that ancestry.
+    #             # So the taxon score will be (1 - (3/8))^2, or (.625)^2, or 0.3090625
+    #             # NOTE: This is experimental and needs testing
+    #             try:
+    #                 taxon_match_index = observation.taxon_ancestry[::-1].index(row["taxon_id"])
+    #             except ValueError:
+    #                 taxon_match_index = None
+    #             if taxon_match_index:
+    #                 distance_score = (1 - (taxon_match_index / len(observation.taxon_ancestry)))**2
+    #                 distance_scores.append(distance_score)
+    #                 break
+    #             else:
+    #                 distance_scores.append(0)
+    #     return match_index, distance_scores
diff --git a/test_model.py b/test_model.py
index 5bb98fe..7459c52 100644
--- a/test_model.py
+++ b/test_model.py
@@ -7,24 +7,23 @@
 
 
 @click.command()
-@click.option("--path", required=True, type=click.Path(), help="Path to test data CSV.")
+@click.option("--path", type=click.Path(), help="Path to test data CSV.")
+@click.option("--data_dir", type=click.Path(), help="Path to test data CSVs directory.")
 @click.option("--label", required=True, type=str, help="Label used for output.")
 @click.option("--limit", type=int, show_default=True, default=100,
               help="Max number of observations to test.")
 @click.option("--geo/--no-geo", show_default=True, default=True,
               help="Use geo model.")
-@click.option("--cache", is_flag=True, show_default=True, default=False,
-              help="Use vision results cache.")
-@click.option("--cache-key", type=str, show_default=True, default="default",
-              help="Salt to use when caching vision results.")
 @click.option("--observation_id", type=str, help="Single observation UUID to test.")
 @click.option("--filter-iconic/--no-filter-iconic", show_default=True, default=True,
               help="Use iconic taxon for filtering.")
-@click.option("--print-tree", is_flag=True, show_default=True, default=False,
-              help="Print trees for results.")
 @click.option("--debug", is_flag=True, show_default=True, default=False,
               help="Output debug messages.")
 def test(**args):
+    if not args["path"] and not args["data_dir"]:
+        print("\nYou must specify either a `--path` or a `--data_dir` option\n")
+        exit()
+
     # some libraries are slow to import, so wait until command is validated and properly invoked
     from lib.vision_testing import VisionTesting
     print("\nArguments:")
@@ -34,7 +33,6 @@ def test(**args):
 
     asyncio.run(testing.run_async())
 
-    testing.print_scores()
     print("\nDone\n")
 
 

From b60dbb5ac81d6f4578d849601c149f1817ed8c20 Mon Sep 17 00:00:00 2001
From: Patrick Leary <patrick.r.leary@gmail.com>
Date: Fri, 9 Feb 2024 11:06:17 -0500
Subject: [PATCH 3/4] adjust rank_level cutoff for aggregated results; fix
 location parsing issue

---
 lib/inat_vision_api.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/inat_vision_api.py b/lib/inat_vision_api.py
index e6adfd0..d8f85c6 100644
--- a/lib/inat_vision_api.py
+++ b/lib/inat_vision_api.py
@@ -119,7 +119,7 @@ def best_leaves_from_aggregated_results(self, aggregated_results, iteration=0):
         # set a rank level cutoff on higher taxa to include in results
         if iteration > 0:
             remaining_results = remaining_results.query(
-                "rank_level <= 30"
+                "rank_level < 50"
             )
         # after setting a cutoff, get the parent IDs of the remaining taxa
         parent_taxon_ids = remaining_results["parent_taxon_id"].values  # noqa: F841
@@ -290,7 +290,10 @@ def download_observation(self, observation_id, image_uuid):
         if not os.path.exists(cache_path):
             urllib.request.urlretrieve(
                 data["results"][0]["photos"][0]["url"].replace("square", "medium"), cache_path)
-        latlng = data["results"][0]["location"].split(",")
+        if data["results"][0]["location"] is None:
+            latlng = [None, None]
+        else:
+            latlng = data["results"][0]["location"].split(",")
         # return the path to the cached image, coordinates, and iconic taxon
         return cache_path, latlng[0], latlng[1], data["results"][0]["taxon"]["iconic_taxon_id"]
 

From e4c8caf756ed94a07cba11e76c8fda45ef04800c Mon Sep 17 00:00:00 2001
From: Patrick Leary <patrick.r.leary@gmail.com>
Date: Wed, 27 Mar 2024 10:45:43 -0400
Subject: [PATCH 4/4] inferrer can load synonym mappings; common ancestor can
 be calculated, stats added to testing; move response formulation to new
 class; add aggregated response with common ancestor

---
 lib/inat_inferrer.py             | 389 +++++++++++++++++++++----------
 lib/inat_vision_api.py           | 159 +------------
 lib/inat_vision_api_responses.py | 217 +++++++++++++++++
 lib/model_taxonomy_dataframe.py  |   7 +-
 lib/model_test_data_exporter.py  |   1 +
 lib/templates/home.html          |   1 +
 lib/test_observation.py          |   2 +-
 lib/vision_testing.py            | 317 +++++++++++++++----------
 tests/fixtures/synonyms.csv      |   4 +-
 tests/test_inat_inferrer.py      |   2 -
 10 files changed, 700 insertions(+), 399 deletions(-)
 create mode 100644 lib/inat_vision_api_responses.py

diff --git a/lib/inat_inferrer.py b/lib/inat_inferrer.py
index 355b4e8..3338504 100644
--- a/lib/inat_inferrer.py
+++ b/lib/inat_inferrer.py
@@ -13,55 +13,117 @@
 from lib.vision_inferrer import VisionInferrer
 from lib.model_taxonomy_dataframe import ModelTaxonomyDataframe
 
-# TODO: better way to address the SettingWithCopyWarning warning?
-pd.options.mode.chained_assignment = None
-
-MINIMUM_GEO_SCORE = 0.005
+pd.options.mode.copy_on_write = True
 
 
 class InatInferrer:
 
+    MINIMUM_GEO_SCORE = 0.005
+    COMMON_ANCESTOR_CUTOFF_RATIO = 0.01
+    COMMON_ANCESTOR_WINDOW = 15
+
     def __init__(self, config):
         self.config = config
-        self.setup_taxonomy(config)
-        self.setup_synonyms(config)
-        self.setup_vision_model(config)
-        self.setup_elevation_dataframe(config)
-        self.setup_geo_model(config)
+        self.setup_taxonomy()
+        self.setup_synonyms()
+        self.setup_synonym_taxonomy()
+        self.setup_vision_model()
+        self.setup_elevation_dataframe()
+        self.setup_geo_model()
         self.upload_folder = "static/"
 
-    def setup_taxonomy(self, config):
+    def setup_taxonomy(self):
         self.taxonomy = ModelTaxonomyDataframe(
-            config["taxonomy_path"],
-            config["tf_elev_thresholds"] if "tf_elev_thresholds" in config else None
+            self.config["taxonomy_path"],
+            self.config["tf_elev_thresholds"] if "tf_elev_thresholds" in self.config else None
         )
 
-    def setup_synonyms(self, config):
+    def setup_synonyms(self):
         self.synonyms = None
-        if "synonyms_path" in config:
-            if not os.path.exists(config["synonyms_path"]):
-                return None
-            self.synonyms = pd.read_csv(config["synonyms_path"])
+        if "synonyms_path" not in self.config:
+            return
+
+        if not os.path.exists(self.config["synonyms_path"]):
+            return
+
+        self.synonyms = pd.read_csv(
+            self.config["synonyms_path"],
+            dtype={
+                "model_taxon_id": int,
+                "parent_taxon_id": "Int64",
+                "taxon_id": "Int64",
+                "rank_level": float,
+                "name": pd.StringDtype()
+            }
+        )
+
+    def setup_synonym_taxonomy(self):
+        if self.synonyms is None:
+            return
 
-    def setup_vision_model(self, config):
-        self.vision_inferrer = VisionInferrer(config["vision_model_path"])
+        if "synonyms_taxonomy_path" not in self.config:
+            return
 
-    def setup_elevation_dataframe(self, config):
+        synonym_taxonomy = ModelTaxonomyDataframe(
+            self.config["synonyms_taxonomy_path"],
+            self.config["tf_elev_thresholds"] if "tf_elev_thresholds" in self.config else None
+        )
+        # ensure the leaf_class_ids from the synonym taxonomy are identical
+        # to the taxonomy generated at data export time
+        if not self.taxonomy.leaf_df.index.equals(synonym_taxonomy.leaf_df.index):
+            error = "Synonym taxonomy does not match the model taxonomy"
+            print(error)
+            raise RuntimeError(error)
+
+        synonym_taxon_ids = np.unique(pd.array(self.synonyms["taxon_id"].dropna().values))
+        synonym_taxonomy_taxon_ids = np.unique(
+            pd.array(synonym_taxonomy.df[
+                synonym_taxonomy.df.taxon_id.isin(synonym_taxon_ids)
+            ]["taxon_id"].values)
+        )
+        synonym_taxon_ids_not_present_in_taxonomy = np.setdiff1d(
+            synonym_taxon_ids, synonym_taxonomy_taxon_ids
+        )
+        # ensure all taxa referenced in the synonym mappings file are present in the
+        # updated taxonomy that should include all original taxa plus all synonyms
+        if synonym_taxon_ids_not_present_in_taxonomy.size > 0:
+            error = "There are taxa in the synonyms file not present in the synonyms " + \
+                f"taxonomy:  {synonym_taxon_ids_not_present_in_taxonomy}"
+            print(error)
+            raise RuntimeError(error)
+
+        synonym_taxonomy.leaf_df["has_synonyms"] = False
+        # mark taxa that should be replaced or removed as having synonyms
+        for index, taxon in self.taxonomy.leaf_df[self.taxonomy.leaf_df["taxon_id"].isin(
+            self.synonyms["model_taxon_id"]
+        )].iterrows():
+            synonym_taxonomy.leaf_df.loc[taxon["leaf_class_id"], "has_synonyms"] = True
+
+        # replace the originally exported taxonomy with the updated taxonomy that includes synonyms
+        self.taxonomy = synonym_taxonomy
+
+    def setup_vision_model(self):
+        self.vision_inferrer = VisionInferrer(self.config["vision_model_path"])
+
+    def setup_elevation_dataframe(self):
         self.geo_elevation_cells = None
+        if "elevation_h3_r4" not in self.config:
+            return
+
         # load elevation data stored at H3 resolution 4
-        if "elevation_h3_r4" in config:
-            self.geo_elevation_cells = pd.read_csv(config["elevation_h3_r4"]). \
-                sort_values("h3_04").set_index("h3_04").sort_index()
-            self.geo_elevation_cells = InatInferrer.add_lat_lng_to_h3_geo_dataframe(
-                self.geo_elevation_cells
-            )
+        self.geo_elevation_cells = pd.read_csv(self.config["elevation_h3_r4"]). \
+            sort_values("h3_04").set_index("h3_04").sort_index()
+        self.geo_elevation_cells = InatInferrer.add_lat_lng_to_h3_geo_dataframe(
+            self.geo_elevation_cells
+        )
 
-    def setup_elevation_dataframe_from_worldclim(self, config, resolution):
+    def setup_elevation_dataframe_from_worldclim(self, resolution):
         # preventing from processing at too high a resolution
         if resolution > 5:
             return
-        if "wc2.1_5m_elev_2.tif" in config:
-            im = tifffile.imread(config["wc2.1_5m_elev_2.tif"])
+
+        if "wc2.1_5m_elev_2.tif" in self.config:
+            im = tifffile.imread(self.config["wc2.1_5m_elev_2.tif"])
             im_df = pd.DataFrame(im)
             im_df.index = np.linspace(90, -90, 2160)
             im_df.columns = np.linspace(-180, 180, 4320)
@@ -71,16 +133,21 @@ def setup_elevation_dataframe_from_worldclim(self, config, resolution):
             elev_dfh3 = im_df.h3.geo_to_h3(resolution)
             elev_dfh3 = elev_dfh3.drop(columns=["lng", "lat"]).groupby(f"h3_0{resolution}").mean()
 
-    def setup_geo_model(self, config):
+    def setup_geo_model(self):
         self.geo_elevation_model = None
         self.geo_model_features = None
-        if "tf_geo_elevation_model_path" in config and self.geo_elevation_cells is not None:
-            self.geo_elevation_model = TFGeoPriorModelElev(config["tf_geo_elevation_model_path"])
-            self.geo_model_features = self.geo_elevation_model.features_for_one_class_elevation(
-                latitude=list(self.geo_elevation_cells.lat),
-                longitude=list(self.geo_elevation_cells.lng),
-                elevation=list(self.geo_elevation_cells.elevation)
-            )
+        if "tf_geo_elevation_model_path" not in self.config:
+            return
+
+        if self.geo_elevation_cells is None:
+            return
+
+        self.geo_elevation_model = TFGeoPriorModelElev(self.config["tf_geo_elevation_model_path"])
+        self.geo_model_features = self.geo_elevation_model.features_for_one_class_elevation(
+            latitude=list(self.geo_elevation_cells.lat),
+            longitude=list(self.geo_elevation_cells.lng),
+            elevation=list(self.geo_elevation_cells.elevation)
+        )
 
     def vision_predict(self, image, debug=False):
         if debug:
@@ -95,8 +162,10 @@ def geo_model_predict(self, lat, lng, debug=False):
             start_time = time.time()
         if lat is None or lat == "" or lng is None or lng == "":
             return None
+
         if self.geo_elevation_model is None:
             return None
+
         # lookup the H3 cell this lat lng occurs in
         h3_cell = h3.geo_to_h3(float(lat), float(lng), 4)
         h3_cell_centroid = h3.h3_to_geo(h3_cell)
@@ -111,6 +180,7 @@ def geo_model_predict(self, lat, lng, debug=False):
     def lookup_taxon(self, taxon_id):
         if taxon_id is None:
             return None
+
         try:
             return self.taxonomy.df.loc[taxon_id]
         except Exception as e:
@@ -124,11 +194,13 @@ def predictions_for_image(self, file_path, lat, lng, filter_taxon, score_without
         image = InatInferrer.prepare_image_for_inference(file_path)
         raw_vision_scores = self.vision_predict(image, debug)
         raw_geo_scores = self.geo_model_predict(lat, lng, debug)
-        top100 = self.combine_results(raw_vision_scores, raw_geo_scores, filter_taxon,
-                                      score_without_geo, debug)
+        combined_scores = self.combine_results(
+            raw_vision_scores, raw_geo_scores, filter_taxon, score_without_geo, debug
+        )
+        combined_scores = self.map_result_synonyms(combined_scores, debug)
         if debug:
             print("Prediction Time: %0.2fms" % ((time.time() - start_time) * 1000.))
-        return top100
+        return combined_scores
 
     def combine_results(self, raw_vision_scores, raw_geo_scores, filter_taxon,
                         score_without_geo=False, debug=False):
@@ -146,7 +218,7 @@ def combine_results(self, raw_vision_scores, raw_geo_scores, filter_taxon,
         leaf_scores["geo_score"] = 0 if no_geo_scores else raw_geo_scores
         # set a lower limit for geo scores if there are any
         leaf_scores["normalized_geo_score"] = 0 if no_geo_scores \
-            else leaf_scores["geo_score"].clip(MINIMUM_GEO_SCORE, None)
+            else leaf_scores["geo_score"].clip(InatInferrer.MINIMUM_GEO_SCORE, None)
 
         # if filtering by a taxon, restrict results to that taxon and its descendants
         if filter_taxon is not None:
@@ -173,46 +245,96 @@ def combine_results(self, raw_vision_scores, raw_geo_scores, filter_taxon,
                 leaf_scores["normalized_geo_score"]
         if debug:
             print("Score Combining Time: %0.2fms" % ((time.time() - start_time) * 1000.))
+        leaf_scores.reset_index(drop=True, inplace=True)
+        return leaf_scores
+
+    def map_result_synonyms(self, leaf_scores, debug=False):
+        if self.synonyms is None or "has_synonyms" not in leaf_scores.columns:
+            return leaf_scores
+
+        if debug:
+            start_time = time.time()
+        # create an empty dataframe to hold synonym replacements
+        synonyms_dataframe = pd.DataFrame(
+            columns=leaf_scores.columns
+        ).set_index("taxon_id", drop=False)
+        # loop through the taxa in leaf_scores that have synonym mappings
+        for taxon in leaf_scores[
+            leaf_scores.taxon_id.isin(self.synonyms["model_taxon_id"])
+        ].to_dict("records"):
+            for synonym in self.synonyms[
+                self.synonyms["model_taxon_id"] == taxon["taxon_id"]
+            ].to_dict("records"):
+                # the taxon has been remove, so there is no replacement
+                if pd.isna(synonym["taxon_id"]):
+                    continue
+
+                # the replace some attributes of the leaf_scores taxon, while keeping
+                # all other columns, like the scores, untouched
+                replacement = taxon.copy()
+                replacement["parent_taxon_id"] = synonym["parent_taxon_id"]
+                replacement["taxon_id"] = synonym["taxon_id"]
+                replacement["rank_level"] = synonym["rank_level"]
+                replacement["name"] = synonym["name"]
+                replacement["left"] = np.nan
+                replacement["right"] = np.nan
+                # add the replacement taxon to the synonyms dataframe
+                synonyms_dataframe.loc[replacement["taxon_id"]] = replacement
+        # remove all taxa from leaf scores that had synonym mappings
+        leaf_scores = leaf_scores.query("has_synonyms == False")
+        if not synonyms_dataframe.empty:
+            # inject the synonym replacements into leaf_scores
+            leaf_scores = pd.concat([
+                leaf_scores.query("has_synonyms == False"), synonyms_dataframe
+            ], axis=0)
+        if debug:
+            print("Synonym Mapping Time: %0.2fms" % ((time.time() - start_time) * 1000.))
         return leaf_scores
 
-    def aggregate_results(self, leaf_scores, filter_taxon, score_without_geo=False, debug=False):
+    def aggregate_results(self, leaf_scores, debug=False,
+                          score_ratio_cutoff=0.001,
+                          max_leaf_scores_to_consider=None,
+                          column_for_cutoff="combined_score"):
         if debug:
             start_time = time.time()
 
-        no_geo_scores = (leaf_scores["geo_score"].max() == 0)
-        # make a copy of the full taxonomy including non-leaves to be used for storing results
-        if filter_taxon is not None:
-            # using nested set left and right values, select the filter taxon,
-            # its descendants, and its ancestors
-            all_node_scores = self.taxonomy.df.query(
-                f"(left >= {filter_taxon['left']} and right <= {filter_taxon['right']}) or"
-                f"(left < {filter_taxon['left']} and right > {filter_taxon['right']})"
-            ).copy().reset_index(drop=True)
-        else:
-            all_node_scores = self.taxonomy.df.copy().reset_index(drop=True)
+        # start with a copy of the whole taxonomy
+        all_node_scores = self.taxonomy.df.copy().reset_index(drop=True)
 
-        # copy the score columns from the already-calculated leaf scores
+        # copy columns from the already calculated leaf scores including scores
+        # and class_id columns which will not be populated for synonyms in the taxonomy
         all_node_scores = pd.merge(all_node_scores, leaf_scores[[
-            "taxon_id", "vision_score", "normalized_vision_score", "geo_score",
-            "normalized_geo_score"]], on="taxon_id", how="left").set_index("taxon_id", drop=False)
-
-        # calculate the highest combined score
+            "taxon_id", "vision_score", "normalized_vision_score", "geo_score", "combined_score",
+            "normalized_geo_score", "leaf_class_id", "iconic_class_id", "spatial_class_id"]],
+            on="taxon_id",
+            how="left",
+            suffixes=["_x", None]
+        ).set_index("taxon_id", drop=False)
+
+        # calculate the highest combined score from leaf_scores
         top_combined_score = leaf_scores.sort_values(
-            "combined_score", ascending=False).head(1)["combined_score"].values[0]
-        lower_cutoff_threshold = 0.0001
-        # determine a lower-bound cutoff where results with combined scores below this cutoff
-        # will be ignored. This isn't necessary for scoring, but it improves performance
-        # TODO: evaluate this
-        cutoff = max([0.00001, top_combined_score * lower_cutoff_threshold])
+            column_for_cutoff, ascending=False
+        ).head(1)[column_for_cutoff].values[0]
+        # define some cutoff based on a percentage of the top combined score. Taxa with
+        # scores below the cutoff will be ignored when aggregating scores up the taxonomy
+        cutoff = top_combined_score * score_ratio_cutoff
 
-        aggregated_scores = {}
         # restrict score aggregation to results where the combined score is above the cutoff
-        scores_to_aggregate = leaf_scores.query(f"combined_score > {cutoff}")
+        scores_to_aggregate = leaf_scores.query(
+            f"{column_for_cutoff} > {cutoff}"
+        )
+        if max_leaf_scores_to_consider is not None:
+            scores_to_aggregate = scores_to_aggregate.sort_values(
+                column_for_cutoff, ascending=False
+            ).head(max_leaf_scores_to_consider)
+
         # loop through all results where the combined score is above the cutoff
-        for taxon_id, vision_score, geo_score, geo_threshold in zip(
+        aggregated_scores = {}
+        for taxon_id, vision_score, geo_score, combined_score, geo_threshold in zip(
             scores_to_aggregate["taxon_id"],
             scores_to_aggregate["normalized_vision_score"],
-            scores_to_aggregate["normalized_geo_score"],
+            scores_to_aggregate["geo_score"],
+            scores_to_aggregate["combined_score"],
             scores_to_aggregate["geo_threshold"]
         ):
             # loop through the pre-calculated ancestors of this result's taxon
@@ -221,50 +343,39 @@ def aggregate_results(self, leaf_scores, filter_taxon, score_without_geo=False,
                 if ancestor_taxon_id not in aggregated_scores:
                     aggregated_scores[ancestor_taxon_id] = {}
                     aggregated_scores[ancestor_taxon_id]["aggregated_vision_score"] = 0
-                    if not no_geo_scores:
-                        aggregated_scores[ancestor_taxon_id]["aggregated_geo_score"] = 0
-                        aggregated_scores[ancestor_taxon_id]["aggregated_geo_threshold"] = 100
-                # aggregated vision score is a sum of descendant scores
+                    aggregated_scores[ancestor_taxon_id]["aggregated_combined_score"] = 0
+                    aggregated_scores[ancestor_taxon_id]["aggregated_geo_score"] = 0
+                    aggregated_scores[ancestor_taxon_id][
+                        "aggregated_geo_threshold"
+                    ] = geo_threshold if (ancestor_taxon_id == taxon_id) else 1.0
+                # aggregated vision and combined scores are sums of descendant scores
                 aggregated_scores[ancestor_taxon_id]["aggregated_vision_score"] += vision_score
-                if not no_geo_scores and \
-                   geo_score > aggregated_scores[ancestor_taxon_id]["aggregated_geo_score"]:
-                    # aggregated geo score is the max of descendant geo scores
+                aggregated_scores[ancestor_taxon_id]["aggregated_combined_score"] += combined_score
+
+                # aggregated geo score is the max of descendant geo scores
+                if geo_score > aggregated_scores[ancestor_taxon_id]["aggregated_geo_score"]:
                     aggregated_scores[ancestor_taxon_id]["aggregated_geo_score"] = geo_score
-                if not no_geo_scores and \
-                    aggregated_scores[ancestor_taxon_id]["aggregated_geo_threshold"] != 0 and \
-                        geo_score > geo_threshold:
-                    # aggregated threshold is set to 0 if any descendants are above their threshold
-                    aggregated_scores[ancestor_taxon_id]["aggregated_geo_threshold"] = 0
+
+                # aggregated geo threshold is the min of descendant geo thresholds
+                if ancestor_taxon_id != taxon_id and geo_threshold < aggregated_scores[
+                    ancestor_taxon_id
+                ]["aggregated_geo_threshold"]:
+                    aggregated_scores[ancestor_taxon_id][
+                        "aggregated_geo_threshold"
+                    ] = geo_threshold
 
         # turn the aggregated_scores dict into a data frame
         scores_df = pd.DataFrame.from_dict(aggregated_scores, orient="index")
         # merge the aggregated scores into the scoring taxonomy
-        all_node_scores = all_node_scores.join(scores_df)
-
-        # the aggregated scores of leaves will be NaN, so populate them with their original scores
-        all_node_scores["aggregated_vision_score"].fillna(
-            all_node_scores["normalized_vision_score"], inplace=True)
-        if no_geo_scores:
-            all_node_scores["aggregated_geo_score"] = 0
-            all_node_scores["aggregated_geo_threshold"] = 0
-        else:
-            all_node_scores["aggregated_geo_score"].fillna(
-                all_node_scores["normalized_geo_score"], inplace=True)
-            all_node_scores["aggregated_geo_threshold"].fillna(
-                all_node_scores["geo_threshold"], inplace=True)
-
-        if (no_geo_scores or score_without_geo):
-            # if there are no geo scores, or it was requested to not use geo scores to affect
-            # the final combined score, set the combined scores to be the same as the vision scores
-            all_node_scores["aggregated_combined_score"] = \
-                all_node_scores["aggregated_vision_score"]
-        else:
-            # the combined score is simply the normalized vision score
-            # multipliedby the normalized geo score
-            all_node_scores["aggregated_combined_score"] = \
-                all_node_scores["aggregated_vision_score"] * all_node_scores["aggregated_geo_score"]
+        all_node_scores = all_node_scores.join(scores_df).query(
+            "aggregated_combined_score.notnull()"
+        )
 
-        # calculate a normalized combined score so all values add to 1, to be used for thresholding
+        # calculate normalized scores so all values add to 1, to be used for thresholding
+        sum_of_root_node_aggregated_vision_scores = all_node_scores.query(
+            "parent_taxon_id.isnull()")["aggregated_vision_score"].sum()
+        all_node_scores["normalized_aggregated_vision_score"] = all_node_scores[
+            "aggregated_vision_score"] / sum_of_root_node_aggregated_vision_scores
         sum_of_root_node_aggregated_combined_scores = all_node_scores.query(
             "parent_taxon_id.isnull()")["aggregated_combined_score"].sum()
         all_node_scores["normalized_aggregated_combined_score"] = all_node_scores[
@@ -272,19 +383,7 @@ def aggregate_results(self, leaf_scores, filter_taxon, score_without_geo=False,
 
         if debug:
             print("Aggregation Time: %0.2fms" % ((time.time() - start_time) * 1000.))
-            thresholded_results = all_node_scores.query(
-                "normalized_aggregated_combined_score > 0.05"
-            )
-            print("\nTree of aggregated results:")
-            ModelTaxonomyDataframe.print(thresholded_results, display_taxon_lambda=(
-                lambda row: f"{row.name}    ["
-                            f"ID:{row.taxon_id}, "
-                            f"V:{round(row.aggregated_vision_score, 4)}, "
-                            f"G:{round(row.aggregated_geo_score, 4)}, "
-                            f"C:{round(row.aggregated_combined_score, 4)}, "
-                            f"NC:{round(row.normalized_aggregated_combined_score, 4)}]"
-            ))
-            print("")
+            # InatInferrer.print_aggregated_scores(all_node_scores)
         return all_node_scores
 
     def h3_04_geo_results_for_taxon(self, taxon_id, bounds=[],
@@ -365,6 +464,37 @@ def h3_04_bounds(self, taxon_id):
             "nelng": geomodel_results["lng"].max()
         }
 
+    def common_ancestor_from_leaf_scores(
+        self, leaf_scores, debug=False, score_to_use="combined_score"
+    ):
+        aggregated_scores = self.aggregate_results(
+            leaf_scores,
+            debug=debug,
+            score_ratio_cutoff=InatInferrer.COMMON_ANCESTOR_CUTOFF_RATIO,
+            max_leaf_scores_to_consider=InatInferrer.COMMON_ANCESTOR_WINDOW,
+            column_for_cutoff=score_to_use
+        )
+        return self.common_ancestor_from_aggregated_scores(
+            aggregated_scores,
+            debug=debug,
+            score_to_use=score_to_use
+        )
+
+    def common_ancestor_from_aggregated_scores(
+        self, aggregated_scores, debug=False, score_to_use="combined_score"
+    ):
+        aggregated_score_to_use = "normalized_aggregated_vision_score" if \
+            score_to_use == "vision_score" else "normalized_aggregated_combined_score"
+        common_ancestor_candidates = aggregated_scores.query(
+            f"{aggregated_score_to_use} > 0.78 and rank_level >= 20 and rank_level <= 33"
+        ).sort_values(
+            by=["rank_level"]
+        )
+        if common_ancestor_candidates.empty:
+            return None
+
+        return common_ancestor_candidates.iloc[0]
+
     @staticmethod
     def prepare_image_for_inference(file_path):
         mime_type = magic.from_file(file_path, mime=True)
@@ -376,8 +506,9 @@ def prepare_image_for_inference(file_path):
             image = tf.io.read_file(file_path)
             image = tf.image.decode_jpeg(image, channels=3)
         image = tf.image.convert_image_dtype(image, tf.float32)
-        image = tf.image.central_crop(image, 0.875)
-        image = tf.image.resize(image, [299, 299], tf.image.ResizeMethod.NEAREST_NEIGHBOR)
+        image = tf.image.resize_with_crop_or_pad(
+            image, 299, 299
+        )
         return tf.expand_dims(image, 0)
 
     @staticmethod
@@ -410,3 +541,21 @@ def filter_geo_dataframe_by_bounds(geo_df, bounds):
             f" ((lng >= {bounds[1] - buffer} and lng <= {bounds[3] + buffer})" + \
             f" {antimedirian_condition})"
         return geo_df.query(query)
+
+    @staticmethod
+    def print_aggregated_scores(aggregated_scores):
+        thresholded_results = aggregated_scores.query(
+            "normalized_aggregated_combined_score > 0.005"
+        )
+        print("\nTree of aggregated results:")
+        ModelTaxonomyDataframe.print(thresholded_results, display_taxon_lambda=(
+            lambda row: f"{row.name}    ["
+                        f"ID:{row.taxon_id}, "
+                        f"V:{round(row.aggregated_vision_score, 4)}, "
+                        f"NV:{round(row.normalized_aggregated_vision_score, 4)}, "
+                        f"G:{round(row.aggregated_geo_score, 4)}, "
+                        f"GT:{round(row.aggregated_geo_threshold, 4)}, "
+                        f"C:{round(row.aggregated_combined_score, 4)}, "
+                        f"NC:{round(row.normalized_aggregated_combined_score, 4)}]"
+        ))
+        print("")
diff --git a/lib/inat_vision_api.py b/lib/inat_vision_api.py
index d8f85c6..35b0548 100644
--- a/lib/inat_vision_api.py
+++ b/lib/inat_vision_api.py
@@ -4,12 +4,11 @@
 import urllib
 import uuid
 import json
-import pandas as pd
 
 from flask import Flask, request, render_template
 from web_forms import ImageForm
 from inat_inferrer import InatInferrer
-from lib.model_taxonomy_dataframe import ModelTaxonomyDataframe
+from inat_vision_api_responses import InatVisionAPIResponses
 
 
 class InatVisionAPI:
@@ -109,163 +108,29 @@ def index_route(self):
         else:
             return render_template("home.html")
 
-    def best_leaves_from_aggregated_results(self, aggregated_results, iteration=0):
-        # use a lower threshold on the first pass to have higher representation from
-        # original model leaf taxa
-        selection_score_threshold = 0.05 if iteration == 0 else 0.1
-        remaining_results = aggregated_results.query(
-            f"selection_score > {selection_score_threshold}"
-        )
-        # set a rank level cutoff on higher taxa to include in results
-        if iteration > 0:
-            remaining_results = remaining_results.query(
-                "rank_level < 50"
-            )
-        # after setting a cutoff, get the parent IDs of the remaining taxa
-        parent_taxon_ids = remaining_results["parent_taxon_id"].values  # noqa: F841
-        # the leaves of the pruned taxonomy (not leaves of the original taxonomy), are the
-        # taxa who are not parents of any remaining taxa
-        leaf_results = remaining_results.query("taxon_id not in @parent_taxon_ids")
-
-        # lower the scores of ancestors by the scores of the taxa being moved into the result set
-        for selection_score, aggregated_combined_score, left, right in zip(
-            leaf_results["selection_score"],
-            leaf_results["aggregated_combined_score"],
-            leaf_results["left"],
-            leaf_results["right"]
-        ):
-            self_and_ancestors = remaining_results.query(
-                f"left <= {left} and right >= {right}"
-            )
-            remaining_results.loc[
-                self_and_ancestors.index,
-                "selection_score"
-            ] -= selection_score
-            remaining_results.loc[
-                self_and_ancestors.index,
-                "aggregated_combined_score"
-            ] -= aggregated_combined_score
-
-        # stop picking taxa if one represents more than 80% of aggregated scores
-        if leaf_results["normalized_aggregated_combined_score"].max() >= 0.8:
-            remaining_results = pd.DataFrame()
-        else:
-            remaining_results = remaining_results.query(
-                "selection_score > 0.1"
-            )
-        return [leaf_results, remaining_results]
-
     def score_image(self, form, file_path, lat, lng, iconic_taxon_id, geomodel):
         score_without_geo = (form.score_without_geo.data == "true")
         filter_taxon = self.inferrer.lookup_taxon(iconic_taxon_id)
         leaf_scores = self.inferrer.predictions_for_image(
-            file_path, lat, lng, filter_taxon, score_without_geo
+            file_path, lat, lng, filter_taxon, score_without_geo, debug=True
         )
 
         if form.aggregated.data == "true":
-            aggregated_results = self.inferrer.aggregate_results(leaf_scores, filter_taxon,
-                                                                 score_without_geo)
+            aggregated_scores = self.inferrer.aggregate_results(leaf_scores, debug=True)
             if form.format.data == "tree":
-                aggregated_results = aggregated_results.query(
-                    "normalized_aggregated_combined_score > 0.001"
-                )
-                printable_tree = ModelTaxonomyDataframe.printable_tree(
-                    aggregated_results,
-                    display_taxon_lambda=(
-                        lambda row: f"{row.name}\t\t["
-                                    f"ID:{row.taxon_id}, "
-                                    f"V:{round(row.aggregated_vision_score, 4)}, "
-                                    f"G:{round(row.aggregated_geo_score, 4)}, "
-                                    f"C:{round(row.aggregated_combined_score, 4)}, "
-                                    f"NC:{round(row.normalized_aggregated_combined_score, 4)}]"
-                    )
-                )
-                return "<pre>" + "<br/>".join(printable_tree) + "</pre>"
-
-            aggregated_results = aggregated_results.query(
-                "normalized_aggregated_combined_score > 0.05"
-            )
-
-            aggregated_results["selection_score"] = aggregated_results[
-                "normalized_aggregated_combined_score"
-            ]
-            iteration = 0
-            leaf_results, remaining_results = self.best_leaves_from_aggregated_results(
-                aggregated_results, iteration
+                return InatVisionAPIResponses.aggregated_tree_response(aggregated_scores)
+            return InatVisionAPIResponses.aggregated_object_response(
+                leaf_scores, aggregated_scores, self.inferrer
             )
-            while len(remaining_results.index) > 0:
-                iteration += 1
-                next_leaf_results, remaining_results = self.best_leaves_from_aggregated_results(
-                    remaining_results, iteration
-                )
-                leaf_results = pd.concat([leaf_results, next_leaf_results])
-
-            leaf_results = leaf_results.sort_values(
-                "aggregated_combined_score",
-                ascending=False
-            ).head(100)
-
-            score_columns = ["aggregated_combined_score", "aggregated_geo_score",
-                             "aggregated_vision_score", "aggregated_geo_threshold"]
-            leaf_results[score_columns] = leaf_results[score_columns].multiply(100, axis="index")
-
-            columns_to_return = [
-                "aggregated_combined_score",
-                "aggregated_geo_score",
-                "taxon_id",
-                "name",
-                "aggregated_vision_score",
-                "aggregated_geo_threshold"
-            ]
-            column_mapping = {
-                "taxon_id": "id",
-                "aggregated_combined_score": "combined_score",
-                "aggregated_geo_score": "geo_score",
-                "aggregated_vision_score": "vision_score",
-                "aggregated_geo_threshold": "geo_threshold"
-            }
-            final_results = leaf_results[columns_to_return].rename(columns=column_mapping)
-        else:
-            top_combined_score = leaf_scores.sort_values(
-                "combined_score",
-                ascending=False
-            ).head(1)["combined_score"].values[0]
-            # set a cutoff so results whose combined scores are
-            # much lower than the best score are not returned
-            leaf_scores = leaf_scores.query(f"combined_score > {top_combined_score * 0.001}")
-
-            top100 = leaf_scores.sort_values("combined_score", ascending=False).head(100)
-            score_columns = [
-                "combined_score",
-                "geo_score",
-                "normalized_vision_score",
-                "geo_threshold"
-            ]
-            top100[score_columns] = top100[score_columns].multiply(100, axis="index")
 
-            # legacy dict response
-            if geomodel != "true":
-                top_taxon_combined_scores = top100[
-                    ["taxon_id", "combined_score"]
-                ].to_dict(orient="records")
-                return {x["taxon_id"]: x["combined_score"] for x in top_taxon_combined_scores}
+        # legacy dict response
+        if geomodel != "true":
+            return InatVisionAPIResponses.legacy_dictionary_response(leaf_scores)
 
-            # new array response
-            columns_to_return = [
-                "combined_score",
-                "geo_score",
-                "taxon_id",
-                "name",
-                "normalized_vision_score",
-                "geo_threshold"
-            ]
-            column_mapping = {
-                "taxon_id": "id",
-                "normalized_vision_score": "vision_score"
-            }
-            final_results = top100[columns_to_return].rename(columns=column_mapping)
+        if form.format.data == "object":
+            return InatVisionAPIResponses.object_response(leaf_scores, self.inferrer)
 
-        return final_results.to_dict(orient="records")
+        return InatVisionAPIResponses.array_response(leaf_scores)
 
     def process_upload(self, form_image_data, image_uuid):
         if form_image_data is None:
diff --git a/lib/inat_vision_api_responses.py b/lib/inat_vision_api_responses.py
new file mode 100644
index 0000000..c71155f
--- /dev/null
+++ b/lib/inat_vision_api_responses.py
@@ -0,0 +1,217 @@
+import numpy as np
+import pandas as pd
+from lib.model_taxonomy_dataframe import ModelTaxonomyDataframe
+
+
+class InatVisionAPIResponses:
+    @staticmethod
+    def legacy_dictionary_response(leaf_scores):
+        leaf_scores = InatVisionAPIResponses.limit_leaf_scores_for_response(leaf_scores)
+        leaf_scores = InatVisionAPIResponses.update_leaf_scores_scaling(leaf_scores)
+        top_taxon_combined_scores = leaf_scores[
+            ["taxon_id", "combined_score"]
+        ].to_dict(orient="records")
+        return {x["taxon_id"]: x["combined_score"] for x in top_taxon_combined_scores}
+
+    @staticmethod
+    def array_response(leaf_scores):
+        leaf_scores = InatVisionAPIResponses.limit_leaf_scores_for_response(leaf_scores)
+        leaf_scores = InatVisionAPIResponses.update_leaf_scores_scaling(leaf_scores)
+        return InatVisionAPIResponses.array_response_columns(leaf_scores).to_dict(orient="records")
+
+    @staticmethod
+    def object_response(leaf_scores, inferrer):
+        leaf_scores = InatVisionAPIResponses.limit_leaf_scores_for_response(leaf_scores)
+        leaf_scores = InatVisionAPIResponses.update_leaf_scores_scaling(leaf_scores)
+        results = InatVisionAPIResponses.array_response_columns(
+            leaf_scores
+        ).to_dict(orient="records")
+        common_ancestor = inferrer.common_ancestor_from_leaf_scores(leaf_scores, debug=True)
+        if common_ancestor is not None:
+            common_ancestor_frame = pd.DataFrame([common_ancestor])
+            common_ancestor_frame = InatVisionAPIResponses.update_aggregated_scores_scaling(
+                common_ancestor_frame
+            )
+            common_ancestor = InatVisionAPIResponses.array_response_common_ancestor_columns(
+                common_ancestor_frame
+            ).to_dict(orient="records")[0]
+
+        return {
+            "common_ancestor": common_ancestor,
+            "results": results
+        }
+
+    @staticmethod
+    def aggregated_tree_response(aggregated_scores):
+        top_leaf_combined_score = aggregated_scores.query(
+            "leaf_class_id.notnull()"
+        ).sort_values(
+            "normalized_aggregated_combined_score",
+            ascending=False
+        ).head(1)["normalized_aggregated_combined_score"].values[0]
+        # set a cutoff so results whose combined scores are
+        # much lower than the best score are not returned
+        aggregated_scores = aggregated_scores.query(
+            f"normalized_aggregated_combined_score > {top_leaf_combined_score * 0.001}"
+        )
+
+        printable_tree = ModelTaxonomyDataframe.printable_tree(
+            aggregated_scores,
+            display_taxon_lambda=(
+                lambda row: f"{row.name}\t\t["
+                            f"ID:{row.taxon_id}, "
+                            f"V:{round(row.aggregated_vision_score, 4)}, "
+                            f"NV:{round(row.normalized_aggregated_vision_score, 4)}, "
+                            f"G:{round(row.aggregated_geo_score, 4)}, "
+                            f"C:{round(row.aggregated_combined_score, 4)}, "
+                            f"NC:{round(row.normalized_aggregated_combined_score, 4)}]"
+            )
+        )
+        return "<pre>" + "<br/>".join(printable_tree) + "</pre>"
+
+    @staticmethod
+    def aggregated_object_response(leaf_scores, aggregated_scores, inferrer):
+        top_leaf_combined_score = aggregated_scores.query(
+            "leaf_class_id.notnull()"
+        ).sort_values(
+            "normalized_aggregated_combined_score",
+            ascending=False
+        ).head(1)["normalized_aggregated_combined_score"].values[0]
+
+        top_100_leaves = aggregated_scores.query(
+            "leaf_class_id.notnull() and "
+            f"normalized_aggregated_combined_score > {top_leaf_combined_score * 0.001}"
+        ).sort_values(
+            "normalized_aggregated_combined_score",
+            ascending=False
+        ).head(100)
+
+        common_ancestor = inferrer.common_ancestor_from_leaf_scores(leaf_scores, debug=True)
+        aggregated_scores = InatVisionAPIResponses.update_aggregated_scores_scaling(
+            aggregated_scores
+        )
+
+        filter_taxa = np.array([])
+        for leaf_taxon_id in top_100_leaves["taxon_id"].to_numpy(dtype=int):
+            filter_taxa = np.append(filter_taxa, leaf_taxon_id)
+            filter_taxa = np.append(filter_taxa,
+                                    inferrer.taxonomy.taxon_ancestors[leaf_taxon_id])
+        top_100_and_ancestors = aggregated_scores[aggregated_scores["taxon_id"].isin(filter_taxa)]
+
+        final_results = InatVisionAPIResponses.aggregated_scores_response_columns(
+            top_100_and_ancestors
+        )
+
+        if common_ancestor is not None:
+            common_ancestor_frame = pd.DataFrame([common_ancestor])
+            common_ancestor_frame = InatVisionAPIResponses.update_aggregated_scores_scaling(
+                common_ancestor_frame
+            )
+            common_ancestor = InatVisionAPIResponses.aggregated_scores_response_columns(
+                common_ancestor_frame
+            ).to_dict(orient="records")[0]
+
+        return {
+            "common_ancestor": common_ancestor,
+            "results": final_results.to_dict(orient="records")
+        }
+
+    @staticmethod
+    def limit_leaf_scores_for_response(leaf_scores):
+        top_combined_score = leaf_scores.sort_values(
+            "combined_score",
+            ascending=False
+        ).head(1)["combined_score"].values[0]
+        # set a cutoff so results whose combined scores are
+        # much lower than the best score are not returned
+        leaf_scores = leaf_scores.query(f"combined_score > {top_combined_score * 0.001}")
+        return leaf_scores.sort_values("combined_score", ascending=False).head(100)
+
+    @staticmethod
+    def update_leaf_scores_scaling(leaf_scores):
+        score_columns = [
+            "combined_score",
+            "geo_score",
+            "normalized_vision_score",
+            "geo_threshold"
+        ]
+        leaf_scores[score_columns] = leaf_scores[
+            score_columns
+        ].multiply(100, axis="index")
+        return leaf_scores
+
+    @staticmethod
+    def update_aggregated_scores_scaling(aggregated_scores):
+        score_columns = [
+            "aggregated_combined_score",
+            "normalized_aggregated_combined_score",
+            "aggregated_geo_score",
+            "aggregated_vision_score",
+            "aggregated_geo_threshold"
+        ]
+        aggregated_scores[score_columns] = aggregated_scores[
+            score_columns
+        ].multiply(100, axis="index")
+        return aggregated_scores
+
+    @staticmethod
+    def array_response_columns(leaf_scores):
+        columns_to_return = [
+            "combined_score",
+            "geo_score",
+            "taxon_id",
+            "name",
+            "normalized_vision_score",
+            "geo_threshold"
+        ]
+        column_mapping = {
+            "taxon_id": "id",
+            "normalized_vision_score": "vision_score"
+        }
+        return leaf_scores[columns_to_return].rename(columns=column_mapping)
+
+    @staticmethod
+    def array_response_common_ancestor_columns(common_ancestor_dataframe):
+        columns_to_return = [
+            "aggregated_combined_score",
+            "aggregated_geo_score",
+            "taxon_id",
+            "name",
+            "normalized_aggregated_vision_score",
+            "aggregated_geo_threshold"
+        ]
+        column_mapping = {
+            "aggregated_combined_score": "combined_score",
+            "aggregated_geo_score": "geo_score",
+            "taxon_id": "id",
+            "normalized_aggregated_vision_score": "vision_score",
+            "aggregated_geo_threshold": "geo_threshold"
+        }
+        return common_ancestor_dataframe[columns_to_return].rename(columns=column_mapping)
+
+    @staticmethod
+    def aggregated_scores_response_columns(aggregated_scores):
+        columns_to_return = [
+            "aggregated_combined_score",
+            "normalized_aggregated_combined_score",
+            "aggregated_geo_score",
+            "taxon_id",
+            "parent_taxon_id",
+            "name",
+            "rank_level",
+            "left",
+            "right",
+            "depth",
+            "aggregated_vision_score",
+            "aggregated_geo_threshold",
+        ]
+        column_mapping = {
+            "taxon_id": "id",
+            "parent_taxon_id": "parent_id",
+            "aggregated_combined_score": "combined_score",
+            "normalized_aggregated_combined_score": "normalized_combined_score",
+            "aggregated_geo_score": "geo_score",
+            "aggregated_vision_score": "vision_score",
+            "aggregated_geo_threshold": "geo_threshold"
+        }
+        return aggregated_scores[columns_to_return].rename(columns=column_mapping)
diff --git a/lib/model_taxonomy_dataframe.py b/lib/model_taxonomy_dataframe.py
index d792e41..54d0ebc 100644
--- a/lib/model_taxonomy_dataframe.py
+++ b/lib/model_taxonomy_dataframe.py
@@ -53,14 +53,17 @@ def load_mapping(self, path, thresholds_path):
 
     # calculate nested set left and right values. These can be later used for an efficient
     # way to calculate if a taxon is an ancestor or descendant of another
-    def assign_nested_values(self, taxon_id=0, index=0, ancestor_taxon_ids=[]):
+    def assign_nested_values(self, taxon_id=0, index=0, depth=0, ancestor_taxon_ids=[]):
         for child_id in self.taxon_children[taxon_id]:
             self.df.at[self.taxon_row_mapping[child_id], "left"] = index
+            self.df.at[self.taxon_row_mapping[child_id], "depth"] = depth
             child_ancestor_taxon_ids = ancestor_taxon_ids + [child_id]
             self.taxon_ancestors[child_id] = child_ancestor_taxon_ids
             index += 1
             if child_id in self.taxon_children:
-                index = self.assign_nested_values(child_id, index, child_ancestor_taxon_ids)
+                index = self.assign_nested_values(
+                    child_id, index, depth + 1, child_ancestor_taxon_ids
+                )
             self.df.at[self.taxon_row_mapping[child_id], "right"] = index
             index += 1
         return index
diff --git a/lib/model_test_data_exporter.py b/lib/model_test_data_exporter.py
index 1fa554f..8fd2639 100644
--- a/lib/model_test_data_exporter.py
+++ b/lib/model_test_data_exporter.py
@@ -145,6 +145,7 @@ def process_api_response_row(self, row):
                 metric_counts[metric["metric"]] -= 1
         if ("location" in metric_counts and metric_counts["location"] < 0) \
            or ("evidence" in metric_counts and metric_counts["evidence"] < 0) \
+           or ("subject" in metric_counts and metric_counts["subject"] < 0) \
            or ("date" in metric_counts and metric_counts["date"] < 0) \
            or ("recent" in metric_counts and metric_counts["recent"] < 0):
             self.used_observations[row["uuid"]] = True
diff --git a/lib/templates/home.html b/lib/templates/home.html
index 649d3ee..34b87b8 100644
--- a/lib/templates/home.html
+++ b/lib/templates/home.html
@@ -25,6 +25,7 @@ <h2>Slim vs Legacy Model</h2>
     <select name="format">
       <option value="json">JSON</option>
       <option value="tree">Tree</option>
+      <option value="object">Object</option>
     </select>
     <br/>
     <select name="geomodel">
diff --git a/lib/test_observation.py b/lib/test_observation.py
index 062b9e7..709b2da 100644
--- a/lib/test_observation.py
+++ b/lib/test_observation.py
@@ -18,5 +18,5 @@ def __init__(self, row):
         row["taxon_ancestry"].pop(0)
         for key in row:
             setattr(self, key, row[key])
-        self.inferrer_scores = None
+        self.inferrer_results = None
         self.summarized_results = {}
diff --git a/lib/vision_testing.py b/lib/vision_testing.py
index e306fec..03703ce 100644
--- a/lib/vision_testing.py
+++ b/lib/vision_testing.py
@@ -4,6 +4,7 @@
 import time
 import json
 import pandas as pd
+import numpy as np
 import asyncio
 import aiohttp
 import aiofiles
@@ -62,14 +63,14 @@ async def run_async(self):
                 label = exported_data_filename_match.group(1)
                 path = os.path.join(self.cmd_args["data_dir"], file)
                 print(f"\nProcessing {file}")
-                await self.testObservationsAtPath(path, label)
+                await self.test_observations_at_path(path, label)
                 self.display_and_save_results(label)
         else:
             print(f"\nProcessing {self.cmd_args['path']}")
-            await self.testObservationsAtPath(self.cmd_args["path"], self.cmd_args["label"])
+            await self.test_observations_at_path(self.cmd_args["path"], self.cmd_args["label"])
             self.display_and_save_results(self.cmd_args["label"])
 
-    async def testObservationsAtPath(self, path, label):
+    async def test_observations_at_path(self, path, label):
         N_WORKERS = 5
         self.limit = self.cmd_args["limit"] or 100
         target_observation_id = self.cmd_args["observation_id"]
@@ -85,16 +86,6 @@ async def testObservationsAtPath(self, path, label):
             ]
             df = pd.read_csv(
                 path,
-                usecols=[
-                    "observation_id",
-                    "observed_on",
-                    "iconic_taxon_id",
-                    "taxon_id",
-                    "taxon_ancestry",
-                    "lat",
-                    "lng",
-                    "photo_url"
-                ],
                 dtype={
                     "iconic_taxon_id": float,
                     "taxon_id": int,
@@ -123,7 +114,7 @@ async def worker_task(self):
                     continue
                 observation = self.test_observations[observation_id]
                 await self.test_observation_async(observation)
-                if observation.inferrer_scores is None:
+                if observation.inferrer_results is None:
                     continue
                 self.processed_counter += 1
                 self.report_progress()
@@ -136,19 +127,6 @@ async def worker_task(self):
             finally:
                 self.queue.task_done()
 
-    # given an x, return the number of scores less than x. Otherwise return the number
-    # of scores that are empty or greather than or equal to 100 (essentially the fails)
-    def top_x(self, x, scores):
-        if x is None:
-            return len(list(filter(lambda score: score is None or score >= 100, scores)))
-        return len(list(filter(lambda score: score is not None and score < x, scores)))
-
-    # same as top_x, but returns the percentage of matching scores instead of the raw count
-    def top_x_percent(self, x, scores):
-        count = len(scores)
-        top_x = self.top_x(x, scores)
-        return round((top_x / count) * 100, 2)
-
     def display_and_save_results(self, label):
         scored_observations = list(filter(
             lambda observation: len(observation.summarized_results) > 0,
@@ -186,37 +164,55 @@ def display_and_save_results(self, label):
             method=("method", "max"),
             count=("label", "count"),
         )
-        top1 = all_obs_scores_df.query("matching_index == 0").groupby("run_label").agg(
+        aggs = []
+        aggs.append(all_obs_scores_df.query("matching_index == 0").groupby("run_label").agg(
             top1=("label", "count"),
-        )
-        top5 = all_obs_scores_df.query("matching_index < 5").groupby("run_label").agg(
+        ))
+        aggs.append(all_obs_scores_df.query("matching_index < 5").groupby("run_label").agg(
             top5=("label", "count"),
-        )
-        top10 = all_obs_scores_df.query("matching_index < 10").groupby("run_label").agg(
+        ))
+        aggs.append(all_obs_scores_df.query("matching_index < 10").groupby("run_label").agg(
             top10=("label", "count"),
-        )
-        notIn = all_obs_scores_df.query("matching_index.isna()").groupby("run_label").agg(
+        ))
+        aggs.append(all_obs_scores_df.query("matching_index.isna()").groupby("run_label").agg(
             notIn=("label", "count"),
-        )
-        grouped_stats = grouped_stats.merge(
-            top1, how="left", left_on="run_label", right_on="run_label"
-        )
-        grouped_stats = grouped_stats.merge(
-            top5, how="left", left_on="run_label", right_on="run_label"
-        )
-        grouped_stats = grouped_stats.merge(
-            top10, how="left", left_on="run_label", right_on="run_label"
-        )
-        grouped_stats = grouped_stats.merge(
-            notIn, how="left", left_on="run_label", right_on="run_label"
-        )
+        ))
+        aggs.append(all_obs_scores_df.query("common_ancestor_present == 1").groupby(
+            "run_label"
+        ).agg(
+            withCA=("label", "count"),
+        ))
+        aggs.append(all_obs_scores_df.query("common_ancestor_accurate == 1").groupby(
+            "run_label"
+        ).agg(
+            withRightCA=("label", "count"),
+        ))
+        aggs.append(all_obs_scores_df.query("common_ancestor_present == 1").groupby(
+            "run_label"
+        ).agg(
+            CARankLevel=("common_ancestor_rank_level", "mean"),
+        ))
+        for agg in aggs:
+            grouped_stats = grouped_stats.merge(
+                agg, how="left", left_on="run_label", right_on="run_label"
+            )
         grouped_stats["top1%"] = round((grouped_stats["top1"] / grouped_stats["count"]) * 100, 2)
         grouped_stats["top5%"] = round((grouped_stats["top5"] / grouped_stats["count"]) * 100, 2)
         grouped_stats["top10%"] = round((grouped_stats["top10"] / grouped_stats["count"]) * 100, 2)
         grouped_stats["notIn%"] = round((grouped_stats["notIn"] / grouped_stats["count"]) * 100, 2)
+        grouped_stats["withCA%"] = round((
+            grouped_stats["withCA"] / grouped_stats["count"]
+        ) * 100, 2)
+        grouped_stats["withRightCA%"] = round((
+            grouped_stats["withRightCA"] / grouped_stats["count"]
+        ) * 100, 2)
+        grouped_stats["withRightCAWhenPresent%"] = round((
+            grouped_stats["withRightCA"] / grouped_stats["withCA"]
+        ) * 100, 2)
 
         agg_stats = all_obs_scores_df.groupby("run_label").agg(
             average_results_count=("results_count", "mean"),
+            common_ancestor_pool_size=("common_ancestor_pool_size", "mean"),
             precision=("precision", "mean"),
             recall=("recall", "mean"),
             f1=("f1", "mean")
@@ -225,6 +221,7 @@ def display_and_save_results(self, label):
             agg_stats, how="left", left_on="run_label", right_on="run_label"
         )
         grouped_stats["average_results_count"] = grouped_stats["average_results_count"].round(4)
+        grouped_stats["common_ancestor_pool_size"] = grouped_stats["common_ancestor_pool_size"].round(4)
         grouped_stats["precision"] = grouped_stats["precision"].round(4)
         grouped_stats["recall"] = grouped_stats["recall"].round(4)
         grouped_stats["f1"] = grouped_stats["f1"].round(4)
@@ -253,7 +250,8 @@ async def test_observation_async(self, observation):
         if observation.iconic_taxon_id != "" and self.cmd_args["filter_iconic"] is not False:
             iconic_taxon_id = observation.iconic_taxon_id
 
-        inferrer_scores = {}
+        inferrer_results = {}
+        summarized_results = {}
         for inferrer_index, inferrer in self.inferrers.items():
             lat = None
             lng = None
@@ -262,75 +260,177 @@ async def test_observation_async(self, observation):
                 lat = observation.lat
                 lng = observation.lng
             try:
-                inferrer_all_scores = inferrer.predictions_for_image(
+                # traditional leaf combined scores, vision * geo
+                leaf_scores = inferrer.predictions_for_image(
                     cache_path, lat, lng, filter_taxon
                 )
-                # only look at the top 100 results for this testing
-                inferrer_scores[inferrer_index] = {
-                    "vision": inferrer_all_scores.sort_values(
-                        "vision_score", ascending=False
-                    ).reset_index(drop=True).head(100),
-                    "combined": inferrer_all_scores.sort_values(
-                        "combined_score", ascending=False
-                    ).reset_index(drop=True).head(100),
-                    "combined_nearby": inferrer_all_scores.query(
-                        "geo_score >= geo_threshold"
-                    ).sort_values(
-                        "combined_score", ascending=False
-                    ).reset_index(drop=True).head(100),
-                }
+                # save some high-level data like top 100 scores, common ancestor
+                inferrer_results[inferrer_index] = self.inferrer_results(
+                    inferrer, observation, leaf_scores
+                )
+                # summarize that high-level data further into the metrics we ultimately want
+                summaries = {}
+                for summary_index, summary in inferrer_results[inferrer_index].items():
+                    summaries[summary_index] = self.summarize_result_subset(
+                        inferrer, observation, summary, summary_index)
+                    # the "cutoff" variant limits the top100 retults into the subset users might
+                    # actually be presented. That means removing results with a score less than
+                    # 0.001 times the top score, and using up to 10 of those. This will give values
+                    # for precision, recall, f1 relative to user experience rather than all results
+                    summaries[f"{summary_index}-cutoff"] = self.summarize_result_subset(
+                        inferrer, observation, summary, summary_index, cutoff=True
+                    )
+                summarized_results[inferrer_index] = summaries
+
             except Exception as e:
                 print(f"Error scoring observation {observation.observation_id}")
                 print(e)
+                print(traceback.format_exc())
                 return
 
-        observation.inferrer_scores = inferrer_scores
-        self.summarize_results(observation)
+        # record the results and summaries with the observation only after all have
+        # finished without exception. This ensures we don't store partial results if some
+        # inferrers fail, and we can later filter our observations without results
+        observation.inferrer_results = inferrer_results
+        observation.summarized_results = summarized_results
+
+    def inferrer_results(self, inferrer, observation, leaf_scores):
+        # aggregation for calculating a common ancestor, using only vision scores
+        vision_aggregated_scores = inferrer.aggregate_results(
+            leaf_scores,
+            score_ratio_cutoff=InatInferrer.COMMON_ANCESTOR_CUTOFF_RATIO,
+            max_leaf_scores_to_consider=InatInferrer.COMMON_ANCESTOR_WINDOW,
+            column_for_cutoff="vision_score"
+        )
+        # aggregation for calculating a common ancestor, using combined scores
+        combined_aggregated_scores = inferrer.aggregate_results(
+            leaf_scores,
+            score_ratio_cutoff=InatInferrer.COMMON_ANCESTOR_CUTOFF_RATIO,
+            max_leaf_scores_to_consider=InatInferrer.COMMON_ANCESTOR_WINDOW,
+            column_for_cutoff="combined_score"
+        )
+
+        # calculate common ancestors and scores for both vision-only, and combined scores
+        vision_common_ancestor = inferrer.common_ancestor_from_aggregated_scores(
+            vision_aggregated_scores, score_to_use="vision_score"
+        )
 
-    def matching_index(self, observation, results):
-        matching_indices = results.index[
-            results["taxon_id"] == observation.taxon_id
-        ].tolist()
-        return matching_indices[0] if len(matching_indices) > 0 else None
+        combined_common_ancestor = inferrer.common_ancestor_from_aggregated_scores(
+            combined_aggregated_scores, score_to_use="combined_score"
+        )
 
-    def summarize_results(self, observation):
-        for inferrer_index, results in observation.inferrer_scores.items():
-            observation.summarized_results[inferrer_index] = {}
-            for results_index, results_scores in results.items():
-                self.summarize_result_subset(observation, inferrer_index, results, results_index)
-                self.summarize_result_subset(
-                    observation, inferrer_index, results, results_index, cutoff=True
-                )
+        # record the top 100 scores of 3 score types: vision only, combined, and combined + nearby
+        vision_top_100 = leaf_scores.sort_values(
+            "vision_score", ascending=False
+        ).reset_index(drop=True).head(100)
+
+        combined_top_100 = leaf_scores.sort_values(
+            "combined_score", ascending=False
+        ).reset_index(drop=True).head(100)
+
+        combined_nearby_top_100 = leaf_scores.query(
+            "geo_score >= geo_threshold"
+        ).sort_values(
+            "combined_score", ascending=False
+        ).reset_index(drop=True).head(100)
+
+        return {
+            "vision": {
+                "scores": vision_top_100,
+                "common_ancestor": {
+                    "taxon": vision_common_ancestor,
+                    "pool_size": self.common_ancestor_pool_size(vision_aggregated_scores),
+                    "score": self.common_ancestor_score(vision_common_ancestor)
+                }
+            },
+            "combined": {
+                "scores": combined_top_100,
+                "common_ancestor": {
+                    "taxon": combined_common_ancestor,
+                    "pool_size": self.common_ancestor_pool_size(combined_aggregated_scores),
+                    "score": self.common_ancestor_score(combined_common_ancestor)
+                }
+            },
+            "combined_nearby": {
+                "scores": combined_nearby_top_100,
+                "common_ancestor": {
+                    "taxon": combined_common_ancestor,
+                    "pool_size": self.common_ancestor_pool_size(combined_aggregated_scores),
+                    "score": self.common_ancestor_score(combined_common_ancestor)
+                }
+            }
+        }
+
+    def common_ancestor_score(self, common_ancestor):
+        return common_ancestor[
+            "normalized_aggregated_combined_score"
+        ] if common_ancestor is not None else 0
 
-    def summarize_result_subset(self, observation, index, results, subset, cutoff=False):
-        working_results = results[subset]
-        summary_label = subset
+    def common_ancestor_pool_size(self, aggregated_scores):
+        return aggregated_scores.query(
+            "leaf_class_id.notnull()"
+        ).index.size
+
+    def summarize_result_subset(
+        self, inferrer, observation, inferrer_results, summary_index, cutoff=False
+    ):
+        working_results = inferrer_results["scores"]
         if cutoff:
-            summary_label += "-cutoff"
-            score_column = "vision_score" if subset == "vision" else "combined_score"
-            values = results[subset].head(1)[score_column].values
+            score_column = "vision_score" if summary_index == "vision" else "combined_score"
+            values = working_results.head(1)[score_column].values
             if len(values) == 0:
                 top_score = 0
             else:
                 top_score = values[0]
-            working_results = results[subset].query(
+            working_results = working_results.query(
                 f"{score_column} > {top_score * 0.001}"
             ).head(10)
 
+        summary = {}
+        common_ancestor = inferrer_results["common_ancestor"]
+        if common_ancestor["taxon"] is None:
+            summary["common_ancestor_id"] = 0
+            summary["common_ancestor_score"] = 0
+            summary["common_ancestor_pool_size"] = 0
+            summary["common_ancestor_ancestors"] = ""
+            summary["common_ancestor_rank_level"] = np.nan
+            summary["common_ancestor_accurate"] = 0
+            summary["common_ancestor_present"] = 0
+        else:
+            is_accurate = common_ancestor["taxon"].taxon_id in observation.taxon_ancestry
+            summary["common_ancestor_id"] = common_ancestor["taxon"].taxon_id
+            summary["common_ancestor_score"] = common_ancestor["score"]
+            summary["common_ancestor_pool_size"] = common_ancestor["pool_size"]
+            summary["common_ancestor_ancestors"] = "/".join(
+                str(a) for a in
+                inferrer.taxonomy.taxon_ancestors[
+                    common_ancestor["taxon"].taxon_id
+                ]
+            )
+            summary["common_ancestor_rank_level"] = common_ancestor["taxon"].rank_level
+            summary["common_ancestor_accurate"] = 1 if is_accurate else 0
+            summary["common_ancestor_present"] = 1
+
         matching_index = self.matching_index(observation, working_results)
 
         results_count = len(working_results.index)
-        summary = {
-            "results_count": results_count,
-            "matching_index": matching_index,
-            "recall": 1 if matching_index is not None else 0,
-            "precision": 0 if matching_index is None else 1 / results_count,
-        }
+        summary["results_count"] = results_count
+        summary["matching_index"] = matching_index
+        summary["recall"] = 1 if matching_index is not None else 0
+        summary["precision"] = 0 if matching_index is None else 1 / results_count
+
         sum_of_precision_and_recall = summary["precision"] + summary["recall"]
         summary["f1"] = 0 if sum_of_precision_and_recall == 0 else (
             2 * summary["precision"]
         ) / sum_of_precision_and_recall
-        observation.summarized_results[index][summary_label] = summary
+
+        return summary
+
+    def matching_index(self, observation, results):
+        matching_indices = results.index[
+            results["taxon_id"] == observation.taxon_id
+        ].tolist()
+        return matching_indices[0] if len(matching_indices) > 0 else None
 
     async def download_photo_async(self, photo_url):
         checksum = hashlib.md5(photo_url.encode()).hexdigest()
@@ -367,36 +467,3 @@ def report_progress(self):
                 f"{rate}/sec  \t"
                 f"estimated {remaining_time} sec remaining\t"
             )
-
-    # def assess_top_results(self, observation, top_results):
-    #     match_index = None
-    #     distance_scores = []
-    #     for index, row in top_results.reset_index(drop=True).iterrows():
-    #         if row["taxon_id"] == observation.taxon_id:
-    #             match_index = index
-
-    #         if index < 10:
-    #             if row["taxon_id"] == observation.taxon_id:
-    #                 # the taxa match, so the taxon distance score is 1
-    #                 distance_scores.append(1)
-    #                 break
-
-    #             # if this is a top 10 result but not a match, append to taxon_scores
-    #             # some measure of how far away this taxon is from the expected correct taxon using
-    #             # (1 - [index of match in reversed target ancestry]/[lenth of target ancestry])
-    #             # e.g. if the ancestry is 1/2/3/4/5/6/7/8 and this result has an ancestry of
-    #             # 1/2/3/4/5, the match occcurs at taxon 5, which is in (reverse 0-indexed)
-    #             # position 3 in the target taxon's ancestry, out of 8 taxa in that ancestry.
-    #             # So the taxon score will be (1 - (3/8))^2, or (.625)^2, or 0.3090625
-    #             # NOTE: This is experimental and needs testing
-    #             try:
-    #                 taxon_match_index = observation.taxon_ancestry[::-1].index(row["taxon_id"])
-    #             except ValueError:
-    #                 taxon_match_index = None
-    #             if taxon_match_index:
-    #                 distance_score = (1 - (taxon_match_index / len(observation.taxon_ancestry)))**2
-    #                 distance_scores.append(distance_score)
-    #                 break
-    #             else:
-    #                 distance_scores.append(0)
-    #     return match_index, distance_scores
diff --git a/tests/fixtures/synonyms.csv b/tests/fixtures/synonyms.csv
index 2b83ef1..03e768b 100644
--- a/tests/fixtures/synonyms.csv
+++ b/tests/fixtures/synonyms.csv
@@ -1,2 +1,2 @@
-taxon_id,synonym_id,synonym_rank_level,synonym_name
-979668,979668,10.0,Corvus enca
+model_taxon_id,parent_taxon_id,taxon_id,rank_level,name
+979668,7998,979668,10.0,Corvus enca
diff --git a/tests/test_inat_inferrer.py b/tests/test_inat_inferrer.py
index 26967f3..bf5aaec 100644
--- a/tests/test_inat_inferrer.py
+++ b/tests/test_inat_inferrer.py
@@ -78,8 +78,6 @@ def test_aggregate_results(self, inatInferrer):
         scores.geo_threshold = 0.001
         aggregated_scores = inatInferrer.aggregate_results(
             leaf_scores=scores,
-            filter_taxon=None,
-            score_without_geo=False,
             debug=True
         )
         assert "aggregated_vision_score" in aggregated_scores.columns