diff --git a/export_data.py b/export_data.py
new file mode 100644
index 0000000..96559b1
--- /dev/null
+++ b/export_data.py
@@ -0,0 +1,30 @@
+import click
+import json
+
+
+@click.command()
+@click.option("--exclude-train-photos-path", "-x", multiple=True,
+              help="Exclude photos that were included in these training sets.")
+@click.option("--limit", type=int, show_default=True, default=5000,
+              help="Number of observations to include.")
+@click.option("--standard_set", help="Export the standard set of 18 filtered datasets.")
+@click.option("--filename_suffix", type=str, help="String to add to end of filename.")
+@click.option("--place_id", type=int, help="Export observations in this place.")
+@click.option("--taxon_id", type=int, help="Export observations in this taxon.")
+def test(**args):
+    # some libraries are slow to import, so wait until command is validated and properly invoked
+    from lib.model_test_data_exporter import ModelTestDataExporter
+    print("\nArguments:")
+    print(json.dumps(args, indent=4))
+    print("\nInitializing ModelTestDataExporter...\n")
+    model_test_data_exporter = ModelTestDataExporter(**args)
+    print("Exporting data...\n")
+    if "standard_set" in args and args["standard_set"]:
+        model_test_data_exporter.generate_standard_set()
+    else:
+        model_test_data_exporter.generate_from_cmd_args()
+    print("\nDone\n")
+
+
+if __name__ == "__main__":
+    test()
diff --git a/generate_thresholds.py b/generate_thresholds.py
index 2d3bad4..3f0bd14 100644
--- a/generate_thresholds.py
+++ b/generate_thresholds.py
@@ -3,229 +3,347 @@
 """
 
 import argparse
-from tqdm.auto import tqdm
+import tifffile
+import os
 import pandas as pd
-import h3pandas
 import numpy as np
+import h3
+import h3pandas
+import tensorflow as tf
+import csv
 import math
+import json
+from tqdm.auto import tqdm
+import tensorflow as tf
 from sklearn.metrics import precision_recall_curve
-from sklearn.metrics import auc
-import sys
+import matplotlib.pyplot as plt    
+import warnings
 
-def ratios_for_taxon_id(taxon_id, all_spatial_grid_counts, train_df_h3):
-    
-    #counts in each presence cell for target taxon
-    target_spatial_grid_counts = train_df_h3[train_df_h3.taxon_id==taxon_id].index.value_counts()
+class ResLayer(tf.keras.layers.Layer):
+    def __init__(self):
+        super(ResLayer, self).__init__()
+        self.w1 = tf.keras.layers.Dense(
+            256, activation="relu", kernel_initializer="he_normal"
+        )
+        self.w2 = tf.keras.layers.Dense(
+            256, activation="relu", kernel_initializer="he_normal"
+        )
+        self.dropout = tf.keras.layers.Dropout(rate=0.5)
+        self.add = tf.keras.layers.Add()
+
+    def call(self, inputs):
+        x = self.w1(inputs)
+        x = self.dropout(x)
+        x = self.w2(x)
+        x = self.add([x, inputs])
+        return x
+
+    def get_config(self):
+        return {}
+
+class Taxon:
+
+    def __init__(self, row):
+        for key in row:
+            setattr(self, key, row[key])
+
+    def set(self, attr, val):
+        setattr(self, attr, val)
+
+    def is_or_descendant_of(self, taxon):
+        if self.id == taxon.id:
+            return True
+        return self.descendant_of(taxon)
+
+    # using the nested set left and right values, a taxon is a descendant of another
+    # as long as its left is higher and its right is lower
+    def descendant_of(self, taxon):
+        return self.left > taxon.left and self.right < taxon.right
+
+class ModelTaxonomy:
+
+    def __init__(self, path):
+        self.load_mapping(path)
+        self.assign_nested_values()
+
+    def load_mapping(self, path):
+        self.node_key_to_leaf_class_id = {}
+        self.leaf_class_to_taxon = {}
+        # there is no taxon with ID 0, but roots of the taxonomy with have a parent ID of 0,
+        # so create a fake taxon of Life to represent the root of the entire tree
+        self.taxa = {0: Taxon({"name": "Life", "depth": 0})}
+        self.taxon_children = {}
+        try:
+            with open(path) as csv_file:
+                csv_reader = csv.DictReader(csv_file, delimiter=",")
+                for row in csv_reader:
+                    taxon_id = int(row["taxon_id"])
+                    rank_level = float(row["rank_level"])
+                    leaf_class_id = int(row["leaf_class_id"]) if row["leaf_class_id"] else None
+                    parent_id = int(row["parent_taxon_id"]) if row["parent_taxon_id"] else 0
+                    # some taxa are not leaves and aren't represented in the leaf layer
+                    if leaf_class_id is not None:
+                        self.node_key_to_leaf_class_id[taxon_id] = leaf_class_id
+                        self.leaf_class_to_taxon[leaf_class_id] = taxon_id
+                    self.taxa[taxon_id] = Taxon({
+                        "id": taxon_id,
+                        "name": row["name"],
+                        "parent_id": parent_id,
+                        "leaf_class_id": leaf_class_id,
+                        "rank_level": rank_level
+                    })
+                    if parent_id not in self.taxon_children:
+                        self.taxon_children[parent_id] = []
+                    self.taxon_children[parent_id].append(taxon_id)
+        except IOError as e:
+            print(e)
+            print(f"\n\nCannot open mapping file `{path}`\n\n")
+            raise e
+
+    # prints to the console a representation of this tree
+    def print(self, taxon_id=0, ancestor_prefix=""):
+        children = self.taxon_children[taxon_id]
+        index = 0
+        for child_id in children:
+            last_in_branch = (index == len(children) - 1)
+            index += 1
+            icon = "└──" if last_in_branch else "├──"
+            prefixIcon = "   " if last_in_branch else "│   "
+            taxon = self.taxa[child_id]
+            print(f'{ancestor_prefix}{icon}{taxon.name} :: {taxon.left}:{taxon.right}')
+            if child_id in self.taxon_children:
+                self.print(child_id, f"{ancestor_prefix}{prefixIcon}")
+
+    # calculated nested set left and right values and depth representing how many nodes
+    # down the taxon is from Life. These can be later used for an efficient way to calculate
+    # if a taxon is a descendant of another
+    def assign_nested_values(self, taxon_id=0, index=0, depth=1, ancestors=[]):
+        for child_id in self.taxon_children[taxon_id]:
+            self.taxa[child_id].set("left", index)
+            self.taxa[child_id].set("depth", depth)
+            self.taxa[child_id].set("ancestors", ancestors)
+            index += 1
+            if child_id in self.taxon_children:
+                child_ancestors = ancestors + [child_id]
+                index = self.assign_nested_values(child_id, index, depth + 1, child_ancestors)
+            self.taxa[child_id].set("right", index)
+            index += 1
+        return index
+
+
+class TFGeoPriorModelEnv:
+
+    def __init__(self, model, taxonomy):
+        self.taxonomy = taxonomy
+        # initialize the geo model for inference
+        self.gpmodel = tf.keras.models.load_model(
+            model,
+            custom_objects={'ResLayer': ResLayer},
+            compile=False
+        )
 
-    #background count per target taxon count in target taxon presence cells
-    counts = pd.DataFrame({
-        "target": target_spatial_grid_counts,
-        "all": all_spatial_grid_counts,
-    })
-    counts = counts.fillna(0)
-    counts["target_ratio"] = counts["all"]/counts["target"]
-    counts.replace([np.inf, -np.inf], np.nan, inplace=True)
-    good_counts = counts["target_ratio"].dropna()
-    return good_counts
-
-def pseudo_absences_for_taxon_id(
-        taxon_id,
-        all_spatial_grid_counts,
-        train_df_h3,
-        test_df_h3,
-        full_spatial_data_lookup_table
-    ):
-    
-    #count cutoff is mean background count per target taxon count in target taxon presence cells
-    count_cutoff = ratios_for_taxon_id(taxon_id, all_spatial_grid_counts, train_df_h3).mean()
-    cutoff_grid_cell_indices = set(all_spatial_grid_counts[all_spatial_grid_counts>count_cutoff].index)
     
-    #absence candidates from test
-    ilocs = np.unique(np.random.randint(0, len(test_df_h3), 10_000))
-    sample_occupancy_pseudoabsences = []
-    for i in ilocs:
-        row = test_df_h3.iloc[i]
-
-        if row.name in cutoff_grid_cell_indices:
-            if taxon_id in full_spatial_data_lookup_table.loc[row.name].taxon_id:
-                #candidate in cell containing target taxon so can't be absence
-                pass
-            else:
-                #candidate in cell not containing target taxon and
-                #count is greater than count cutoff. Use as absence
-                sample_occupancy_pseudoabsences.append(
-                    (row.lat, row.lng)
-                )
+    def features_for_one_class_elevation(self, latitude, longitude, elevation):
+        """Evalutes the model for a single class and multiple locations
+
+        Args:
+            latitude (list): A list of latitudes
+            longitude (list): A list of longitudes (same length as latitude)
+            elevation (list): A list of elevations (same length as latitude)
+            class_of_interest (int): The single class to eval
+
+        Returns:
+            numpy array: scores for class of interest at each location
+        """
+        def encode_loc(latitude, longitude, elevation):
+            latitude = np.array(latitude)
+            longitude = np.array(longitude)
+            elevation = np.array(elevation)
+            elevation = elevation.astype("float32")
+            grid_lon = longitude.astype('float32') / 180.0
+            grid_lat = latitude.astype('float32') / 90.0
+            
+            elevation[elevation>0] = elevation[elevation>0]/6574.0
+            elevation[elevation<0] = elevation[elevation<0]/32768.0
+            norm_elev = elevation
+            
+            if np.isscalar(grid_lon):
+                grid_lon = np.array([grid_lon])
+            if np.isscalar(grid_lat):
+                grid_lat = np.array([grid_lat])
+            if np.isscalar(norm_elev):
+                norm_elev = np.array([norm_elev])
+                        
+            norm_loc = tf.stack([grid_lon, grid_lat], axis=1)
+
+            encoded_loc = tf.concat([
+                tf.sin(norm_loc * math.pi),
+                tf.cos(norm_loc * math.pi),
+                tf.expand_dims(norm_elev, axis=1),
+
+            ], axis=1)            
+            
+            return encoded_loc
+
+        encoded_loc = encode_loc(latitude, longitude, elevation)
+        loc_emb = self.gpmodel.layers[0](encoded_loc)
+        
+        # res layers - feature extraction
+        x = self.gpmodel.layers[1](loc_emb)
+        x = self.gpmodel.layers[2](x)
+        x = self.gpmodel.layers[3](x)
+        x = self.gpmodel.layers[4](x)
         
-        #limit number of absences
-        if len(sample_occupancy_pseudoabsences) >= 500:
-            break
+        # process just the one class
+        return x
+
+    def eval_one_class_elevation_from_features(self, x, class_of_interest):
+        return tf.keras.activations.sigmoid(
+            tf.matmul(
+                x, 
+                tf.expand_dims(self.gpmodel.layers[5].weights[0][:,class_of_interest], axis=0),
+                transpose_b=True
+            )
+        ).numpy()
+
+def ignore_shapely_deprecation_warning(message, category, filename, lineno, file=None, line=None):
+    if "array interface is deprecated" in str(message):
+        return None
+    return warnings.defaultaction(message, category, filename, lineno, file, line)
+
+def main(args):
+    print("loading in the model...")
+    mt = ModelTaxonomy(args.taxonomy)
+    tfgpm = TFGeoPriorModelEnv(args.model, mt)
     
-    sample_occupancy_pseudoabsences = pd.DataFrame(
-        sample_occupancy_pseudoabsences,
-        columns=["lat", "lng"]
+    print("setting up the map...")
+    warnings.showwarning = ignore_shapely_deprecation_warning
+    im = tifffile.imread(args.elevation)
+    im_df = pd.DataFrame(im)
+    im_df.index = np.linspace(90, -90, 2160)
+    im_df.columns = np.linspace(-180, 180, 4320)
+    im_df = im_df.reset_index()
+    im_df = im_df.melt(
+        id_vars=["index"],
     )
-    sample_occupancy_pseudoabsences["taxon_id"] = taxon_id
-    return sample_occupancy_pseudoabsences
-
-def get_threshold(test, predictions):
-  
-    precision, recall, thresholds = precision_recall_curve(test, predictions)
-    p1 = (2 * precision * recall)
-    p2 = (precision + recall)
-    out = np.zeros( (len(p1)) )
-    fscore = np.divide(p1,p2, out=out, where=p2!=0)
-    index = np.argmax(fscore)
-    thresholdOpt = round(thresholds[index], ndigits = 4)
-    prauc = auc(recall, precision)
-    return thresholdOpt, prauc
-
-def get_predictions(latitude,longitude,taxon_id, mt, tfgpm):
-    
-    class_of_interest = mt.node_key_to_leaf_class_id[taxon_id]
-    predictions = tfgpm.eval_one_class(
-        np.array(latitude), 
-        np.array(longitude), 
-        class_of_interest
+    im_df.columns = ["lat", "lng", "elevation"]
+    elev_dfh3 = im_df.h3.geo_to_h3(args.h3_resolution)
+    elev_dfh3 = elev_dfh3.drop(
+        columns=['lng', 'lat']
+    ).groupby("h3_0"+str(args.h3_resolution)).mean()
+    gdfk = elev_dfh3.h3.h3_to_geo()
+    gdfk["lng"] = gdfk["geometry"].x
+    gdfk["lat"] = gdfk["geometry"].y
+    _ = gdfk.pop("geometry")
+    gdfk = gdfk.rename_axis('h3index')
+
+    print("making features...")
+    feats = tfgpm.features_for_one_class_elevation(
+        latitude=list(gdfk.lat),
+        longitude=list(gdfk.lng),
+        elevation=list(gdfk.elevation)
     )
-    return predictions
 
-def get_predictions_py(latitude,longitude,taxon_id, net_params, model, torch):
-    
-    class_of_interest = net_params["params"]["class_to_taxa"].index(taxon_id)
-    grid_lon = torch.from_numpy(longitude/180)
-    grid_lat = torch.from_numpy(latitude/90)
-    grid_lon = grid_lon.repeat(1,1).unsqueeze(2)
-    grid_lat = grid_lat.repeat(1, 1).unsqueeze(2)
-    loc_ip = torch.cat((grid_lon, grid_lat), 2)
-    feats = torch.cat((torch.sin(math.pi*loc_ip), torch.cos(math.pi*loc_ip)), 2)
-    model.eval()
-    with torch.no_grad():
-        pred = model(feats, class_of_interest=class_of_interest)
-        predictions = pred.cpu().numpy()[0]
-    return predictions
-    
-def main(args):
-    
-    print("loading in test and train data...")
-    test_df = pd.read_csv(args.test_spatial_data_path,
-        usecols=["taxon_id","latitude","longitude"]).rename({
-        "latitude": "lat",
-        "longitude": "lng"
-    }, axis=1)    
-    train_df = pd.read_csv(args.train_spatial_data_path,
-        usecols=["taxon_id","latitude","longitude"]).rename({
+    print("loading in the training data...")
+    train_df = pd.read_csv(args.train_spatial_data,
+        usecols=["taxon_id","latitude","longitude","captive"]).rename({
         "latitude": "lat",
         "longitude": "lng"
     }, axis=1)
-    taxon_ids = test_df.taxon_id.unique()
-    if args.stop_after is not None:
-        taxon_ids = taxon_ids[0:args.stop_after]
-    
-    print("calculating absences...")
-    test_df_h3 = test_df.h3.geo_to_h3(args.h3_resolution)
+    train_df = train_df[train_df.captive==0] #no-CID ok, wild only
+    train_df.drop(["captive"],axis=1)
     train_df_h3 = train_df.h3.geo_to_h3(args.h3_resolution)
-    full_spatial_data_lookup_table = pd.concat([train_df_h3, test_df_h3]).pivot_table(
-        index="h3_0"+str(args.h3_resolution),
-        values="taxon_id",
-        aggfunc=set
-    )
-    pseudoabsence_df = pd.DataFrame()
     all_spatial_grid_counts = train_df_h3.index.value_counts()
-    for taxon_id in tqdm(taxon_ids):
-        pa = pseudo_absences_for_taxon_id(
-            taxon_id,
-            all_spatial_grid_counts,
-            train_df_h3,
-            test_df_h3,
-            full_spatial_data_lookup_table
-        )
-        pseudoabsence_df = pd.concat([pseudoabsence_df, pa])
-    
-    print("calculating thresholds...")
-    if args.model_type == "tf":
-        from lib.taxon import Taxon
-        from lib.model_taxonomy import ModelTaxonomy
-        from lib.tf_gp_model import TFGeoPriorModel
-        mt = ModelTaxonomy(args.taxonomy_path)
-        tfgpm = TFGeoPriorModel(args.model_path, mt)   
-    elif args.model_type == "pytorch":
-        import torch
-        sys.path.append("../geo_prior_inat")
-        from geo_prior import models
-        net_params = torch.load(args.model_path, map_location="cpu")
-        net_params["params"]['device'] = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        model_name = models.select_model(net_params["params"]["model"])
-        model = model_name(num_inputs=net_params["params"]['num_feats'],
-                           num_classes=net_params["params"]['num_classes'],
-                           num_filts=net_params["params"]['num_filts'],
-                           num_users=net_params["params"]['num_users'],
-                           num_context=net_params["params"]['num_context']).to(net_params["params"]['device'])
-        model.load_state_dict(net_params['state_dict'])        
+    presence_absence = pd.DataFrame({
+        "background": all_spatial_grid_counts,
+    })
+    presence_absence = presence_absence.fillna(0)
+
+    print("...looping through taxa")
     output = []
+    taxa = pd.read_csv(args.taxonomy, usecols=["taxon_id","leaf_class_id","iconic_class_id"]).dropna(subset=['leaf_class_id'])
+    taxon_ids = taxa.taxon_id
+    if args.stop_after is not None:
+            taxon_ids = taxon_ids[0:args.stop_after]
+    desired_recall = 0.95
+    resolution = args.h3_resolution
+    area = h3.hex_area(resolution)
     for taxon_id in tqdm(taxon_ids):
-        absences = pseudoabsence_df[pseudoabsence_df.taxon_id==taxon_id]
-        presences = test_df[test_df.taxon_id==taxon_id]
-        test = np.concatenate(([1] * len(presences), [0] * len(absences)))
-        
-        longitude = np.concatenate((
-            presences.lng.values.astype('float32'),
-            absences.lng.values.astype('float32')
-        ))
-        
-        latitude = np.concatenate((
-            presences.lat.values.astype('float32'),
-            absences.lat.values.astype('float32')
-        ))
-        
-        if args.model_type == "tf":
-            predictions = get_predictions(latitude, longitude, taxon_id, mt, tfgpm)
-        elif args.model_type == "pytorch":
-            predictions = get_predictions_py(latitude, longitude, taxon_id, net_params, model, torch)
-        threshold, prauc = get_threshold(test, predictions)
-        
+        try:
+            class_of_interest = mt.node_key_to_leaf_class_id[taxon_id]
+        except:
+            print('not in the model for some reason')
+            continue
+
+        #get predictions
+        preds = tfgpm.eval_one_class_elevation_from_features(feats, class_of_interest)
+        gdfk["pred"] = tf.squeeze(preds).numpy()
+    
+        #make presence absence dataset
+        target_spatial_grid_counts = train_df_h3[train_df_h3.taxon_id==taxon_id].index.value_counts()
+        presences = gdfk.loc[target_spatial_grid_counts.index]["pred"]
+        if len(presences) == 0:
+            print("not present")
+            continue
+    
+        #calculate threhold
+        presence_absence["forground"] = target_spatial_grid_counts
+        presence_absence["predictions"] = gdfk["pred"]
+        presence_absence.forground = presence_absence.forground.fillna(0)
+        yield_cutoff = np.percentile((presence_absence["background"]/presence_absence["forground"])[presence_absence["forground"]>0], 95)
+        absences = presence_absence[(presence_absence["forground"]==0) & (presence_absence["background"] > yield_cutoff)]["predictions"]
+        presences = presence_absence[(presence_absence["forground"]>0)]["predictions"]
+        df_x = pd.DataFrame({'predictions': presences, 'test': 1})
+        df_y = pd.DataFrame({'predictions': absences, 'test': 0})
+        for_thres = pd.concat([df_x, df_y], ignore_index=False)
+        precision, recall, thresholds = precision_recall_curve(for_thres.test, for_thres.predictions)
+        p1 = (2 * precision * recall)
+        p2 = (precision + recall)
+        out = np.zeros( (len(p1)) )
+        fscore = np.divide(p1,p2, out=out, where=p2!=0)
+        index = np.argmax(fscore)
+        thres = thresholds[index]
+    
+        #store daa
         row = {
             "taxon_id": taxon_id,
-            "threshold": threshold,
-            "auc": prauc,
-            "num_presences": len(presences),
-            "num_absences": len(absences)
+            "thres": thres,
+            "area": len(gdfk[gdfk.pred >= thres])*area
         }
         row_dict = dict(row)
         output.append(row_dict)
-        
+    
     print("writing output...")
-    pd.DataFrame(output).to_csv(args.output_path)
+    output_pd = pd.DataFrame(output)
+    output_pd.to_csv(args.output_dir+"/thresholds.csv")
 
 if __name__ == "__main__":
     
     info_str = '\nrun as follows\n' + \
-               '   python generate_thresholds.py --model_type tf \n' + \
-               '   --model_path tf_geoprior_1_4_r6.h5 \n' + \
-               '   --taxonomy_path taxonomy_1_4.csv\n' + \
-               '   --test_spatial_data_path test_spatial_data.csv\n' + \
-               '   --train_spatial_data_path train_spatial_data.csv\n' + \
-               '   --output_path thresholds.csv\n' + \
-               '   --h3_resolution 3\n' + \
+               '   python generate_thresholds.py --elevation wc2.1_5m_elev.tif \n' + \
+               '   --model v2_6/tf_geoprior_2_5_r6_elevation.h5 \n' + \
+               '   --taxonomy taxonomy_1_4.csv\n' + \
+               '   --train_spatial_data v2_6/taxonomy.csv\n' + \
+               '   --output_dir v2_6\n' + \
+               '   --h3_resolution 4\n' + \
                '   --stop_after 10\n'
     
     parser = argparse.ArgumentParser(usage=info_str)
-    parser.add_argument('--model_type', type=str,
-                        help='Can be either "tf" or "pytorch".', required=True)
-    parser.add_argument('--model_path', type=str,
+    parser.add_argument('--elevation', type=str,
+                        help='Path to elev tif.', required=True)
+    parser.add_argument('--model', type=str,
                         help='Path to tf model.', required=True)
-    parser.add_argument('--taxonomy_path', type=str,
+    parser.add_argument('--taxonomy', type=str,
                         help='Path to taxonomy csv.', required=True)
-    parser.add_argument('--test_spatial_data_path', type=str,
-                        help='Path to test csv for presence/absences.', required=True)
-    parser.add_argument('--train_spatial_data_path', type=str,
+    parser.add_argument('--train_spatial_data', type=str,
                         help='Path to train csv for occupancy.', required=True)
-    parser.add_argument('--output_path', type=str,
-                        help='Path to write thesholds.', required=True)
-    parser.add_argument('--h3_resolution', type=int, default=3,
-        help='grid resolution from 0 - 15, lower numbers are coarser/faster. Recommend 3, 4, or 5')
+    parser.add_argument('--output_dir', type=str,
+                        help='directory to write thesholds.', required=True)
+    parser.add_argument('--h3_resolution', type=int, default=4,
+        help='grid resolution from 0 - 15, lower numbers are coarser/faster. Currently using 4')
     parser.add_argument('--stop_after', type=int,
-        help='just run the first x taxa')
+            help='just run the first x taxa')
     args = parser.parse_args()
 
     main(args)
+    
\ No newline at end of file
diff --git a/lib/inat_inferrer.py b/lib/inat_inferrer.py
index 8e155f9..1a6fb67 100644
--- a/lib/inat_inferrer.py
+++ b/lib/inat_inferrer.py
@@ -1,11 +1,22 @@
-import os
+import time
 import magic
 import tensorflow as tf
+import pandas as pd
+import h3
+import h3pandas  # noqa: F401
+import math
+import os
+import tifffile
+import numpy as np
 from PIL import Image
-from lib.pt_geo_prior_model import PTGeoPriorModel
-from lib.tf_gp_model import TFGeoPriorModel
+from lib.tf_gp_elev_model import TFGeoPriorModelElev
 from lib.vision_inferrer import VisionInferrer
-from lib.model_taxonomy import ModelTaxonomy
+from lib.model_taxonomy_dataframe import ModelTaxonomyDataframe
+
+# TODO: better way to address the SettingWithCopyWarning warning?
+pd.options.mode.chained_assignment = None
+
+MINIMUM_GEO_SCORE = 0.005
 
 
 class InatInferrer:
@@ -14,35 +25,367 @@ def __init__(self, config):
         self.config = config
         self.setup_taxonomy(config)
         self.setup_vision_model(config)
+        self.setup_elevation_dataframe(config)
         self.setup_geo_model(config)
         self.upload_folder = "static/"
 
     def setup_taxonomy(self, config):
-        self.taxonomy = ModelTaxonomy(config["taxonomy_path"])
+        self.taxonomy = ModelTaxonomyDataframe(
+            config["taxonomy_path"],
+            config["tf_elev_thresholds"] if "tf_elev_thresholds" in config else None
+        )
 
     def setup_vision_model(self, config):
         self.vision_inferrer = VisionInferrer(config["vision_model_path"], self.taxonomy)
 
+    def setup_elevation_dataframe(self, config):
+        self.geo_elevation_cells = None
+        # load elevation data stored at H3 resolution 4
+        if "elevation_h3_r4" in config:
+            self.geo_elevation_cells = pd.read_csv(config["elevation_h3_r4"]). \
+                sort_values("h3_04").set_index("h3_04").sort_index()
+            self.geo_elevation_cells = InatInferrer.add_lat_lng_to_h3_geo_dataframe(self.geo_elevation_cells)
+
+    def setup_elevation_dataframe_from_worldclim(self, config, resolution):
+        # preventing from processing at too high a resolution
+        if resolution > 5:
+            return
+        if "wc2.1_5m_elev_2.tif" in config:
+            im = tifffile.imread(config["wc2.1_5m_elev_2.tif"])
+            im_df = pd.DataFrame(im)
+            im_df.index = np.linspace(90, -90, 2160)
+            im_df.columns = np.linspace(-180, 180, 4320)
+            im_df = im_df.reset_index()
+            im_df = im_df.melt(id_vars=["index"])
+            im_df.columns = ["lat", "lng", "elevation"]
+            elev_dfh3 = im_df.h3.geo_to_h3(resolution)
+            elev_dfh3 = elev_dfh3.drop(columns=["lng", "lat"]).groupby(f'h3_0{resolution}').mean()
+
     def setup_geo_model(self, config):
-        if "use_pt_gp_model" in config and config["use_pt_gp_model"] and "pt_geo_model_path" in config:
-            self.geo_model = PTGeoPriorModel(config["pt_geo_model_path"], self.taxonomy)
-        elif "tf_geo_model_path" in config:
-            self.geo_model = TFGeoPriorModel(config["tf_geo_model_path"], self.taxonomy)
-        else:
-            self.geo_model = None
+        self.geo_elevation_model = None
+        self.geo_model_features = None
+        if "tf_geo_elevation_model_path" in config and self.geo_elevation_cells is not None:
+            self.geo_elevation_model = TFGeoPriorModelElev(config["tf_geo_elevation_model_path"])
+            self.geo_model_features = self.geo_elevation_model.features_for_one_class_elevation(
+                latitude=list(self.geo_elevation_cells.lat),
+                longitude=list(self.geo_elevation_cells.lng),
+                elevation=list(self.geo_elevation_cells.elevation)
+            )
 
-    def prepare_image_for_inference(self, file_path, image_uuid):
+    def prepare_image_for_inference(self, file_path):
         mime_type = magic.from_file(file_path, mime=True)
         # attempt to convert non jpegs
         if mime_type != "image/jpeg":
             im = Image.open(file_path)
-            rgb_im = im.convert("RGB")
-            file_path = os.path.join(self.upload_folder, image_uuid) + ".jpg"
-            rgb_im.save(file_path)
-
-        image = tf.io.read_file(file_path)
-        image = tf.image.decode_jpeg(image, channels=3)
+            image = im.convert("RGB")
+        else:
+            image = tf.io.read_file(file_path)
+            image = tf.image.decode_jpeg(image, channels=3)
         image = tf.image.convert_image_dtype(image, tf.float32)
         image = tf.image.central_crop(image, 0.875)
         image = tf.image.resize(image, [299, 299], tf.image.ResizeMethod.NEAREST_NEIGHBOR)
         return tf.expand_dims(image, 0)
+
+    def vision_predict(self, image, debug=False):
+        if debug:
+            start_time = time.time()
+        vision_scores = self.vision_inferrer.process_image(image)
+        if debug:
+            print("Vision Time: %0.2fms" % ((time.time() - start_time) * 1000.))
+        return vision_scores
+
+    def geo_model_predict(self, lat, lng, debug=False):
+        if debug:
+            start_time = time.time()
+        if lat is None or lat == "" or lng is None or lng == "":
+            return None
+        if self.geo_elevation_model is None:
+            return None
+        # lookup the H3 cell this lat lng occurs in
+        h3_cell = h3.geo_to_h3(float(lat), float(lng), 4)
+        h3_cell_centroid = h3.h3_to_geo(h3_cell)
+        # get the average elevation of the above H3 cell
+        elevation = self.geo_elevation_cells.loc[h3_cell].elevation
+        geo_scores = self.geo_elevation_model.predict(
+            h3_cell_centroid[0], h3_cell_centroid[1], float(elevation))
+        if debug:
+            print("Geo Time: %0.2fms" % ((time.time() - start_time) * 1000.))
+        return geo_scores
+
+    def lookup_taxon(self, taxon_id):
+        if taxon_id is None:
+            return None
+        try:
+            return self.taxonomy.df.loc[taxon_id]
+        except Exception as e:
+            print(f'taxon `{taxon_id}` does not exist in the taxonomy')
+            raise e
+
+    def predictions_for_image(self, file_path, lat, lng, filter_taxon, score_without_geo=False,
+                              debug=False):
+        if debug:
+            start_time = time.time()
+        image = self.prepare_image_for_inference(file_path)
+        raw_vision_scores = self.vision_predict(image, debug)
+        raw_geo_scores = self.geo_model_predict(lat, lng, debug)
+        top100 = self.combine_results(raw_vision_scores, raw_geo_scores, filter_taxon,
+                                      score_without_geo, debug)
+        if debug:
+            print("Prediction Time: %0.2fms" % ((time.time() - start_time) * 1000.))
+        return top100
+
+    def combine_results(self, raw_vision_scores, raw_geo_scores, filter_taxon,
+                        score_without_geo=False, debug=False):
+        if debug:
+            start_time = time.time()
+        no_geo_scores = (raw_geo_scores is None)
+
+        # make a copy of the model taxonomy leaf nodes to be used for storing results. Skip any
+        # filtering at this stage as the taxonomy dataframe needs to have the same number of
+        # leaf taxa and in the same order as the raw scores
+        leaf_scores = self.taxonomy.leaf_df.copy()
+        # add a column for vision scores
+        leaf_scores["vision_score"] = raw_vision_scores
+        # add a column for geo scores
+        leaf_scores["geo_score"] = 0 if no_geo_scores else raw_geo_scores
+        # set a lower limit for geo scores if there are any
+        leaf_scores["normalized_geo_score"] = 0 if no_geo_scores \
+            else leaf_scores["geo_score"].clip(MINIMUM_GEO_SCORE, None)
+
+        # if filtering by a taxon, restrict results to that taxon and its descendants
+        if filter_taxon is not None:
+            # using nested set left and right values, select the filter taxon and its descendants
+            leaf_scores = leaf_scores.query(
+                f'left >= {filter_taxon["left"]} and right <= {filter_taxon["right"]}'
+            )
+            # normalize the vision scores so they add up to 1 after filtering
+            sum_of_vision_scores = leaf_scores["vision_score"].sum()
+            leaf_scores["normalized_vision_score"] = leaf_scores["vision_score"] / sum_of_vision_scores
+        else:
+            # when not filtering by a taxon, the normalized vision score is the same as the original
+            leaf_scores["normalized_vision_score"] = leaf_scores["vision_score"]
+
+        if no_geo_scores or score_without_geo:
+            # if there are no geo scores, or it was requested to not use geo scores to affect
+            # the final combined score, set the combined scores to be the same as the vision scores
+            leaf_scores["combined_score"] = leaf_scores["normalized_vision_score"]
+        else:
+            # the combined score is simply the normalized vision score
+            # multipliedby the normalized geo score
+            leaf_scores["combined_score"] = leaf_scores["normalized_vision_score"] * \
+                leaf_scores["normalized_geo_score"]
+        if debug:
+            print("Score Combining Time: %0.2fms" % ((time.time() - start_time) * 1000.))
+        return leaf_scores
+
+    def aggregate_results(self, leaf_scores, filter_taxon, score_without_geo=False, debug=False):
+        if debug:
+            start_time = time.time()
+
+        no_geo_scores = (leaf_scores["geo_score"].max() == 0)
+        # make a copy of the full taxonomy including non-leaves to be used for storing results
+        if filter_taxon is not None:
+            # using nested set left and right values, select the filter taxon,
+            # its descendants, and its ancestors
+            all_node_scores = self.taxonomy.df.query(
+                f'(left >= {filter_taxon["left"]} and right <= {filter_taxon["right"]}) or' +
+                f'(left < {filter_taxon["left"]} and right > {filter_taxon["right"]})'
+            ).copy().reset_index(drop=True)
+        else:
+            all_node_scores = self.taxonomy.df.copy().reset_index(drop=True)
+
+        # copy the score columns from the already-calculated leaf scores
+        all_node_scores = pd.merge(all_node_scores, leaf_scores[[
+            "taxon_id", "vision_score", "normalized_vision_score", "geo_score",
+            "normalized_geo_score"]], on="taxon_id", how="left").set_index("taxon_id", drop=False)
+
+        # calculate the highest combined score
+        top_combined_score = leaf_scores.sort_values(
+            "combined_score", ascending=False).head(1)["combined_score"].values[0]
+        lower_cutoff_threshold = 0.0001
+        # determine a lower-bound cutoff where results with combined scores below this cutoff
+        # will be ignored. This isn't necessary for scoring, but it improves performance
+        # TODO: evaluate this
+        cutoff = max([0.00001, top_combined_score * lower_cutoff_threshold])
+
+        aggregated_scores = {}
+        # restrict score aggregation to results where the combined score is above the cutoff
+        scores_to_aggregate = leaf_scores.query(f'combined_score > {cutoff}')
+        # loop through all results where the combined score is above the cutoff
+        for taxon_id, vision_score, geo_score, geo_threshold in zip(
+            scores_to_aggregate["taxon_id"],
+            scores_to_aggregate["normalized_vision_score"],
+            scores_to_aggregate["normalized_geo_score"],
+            scores_to_aggregate["geo_threshold"]
+        ):
+            # loop through the pre-calculated ancestors of this result's taxon
+            for ancestor_taxon_id in self.taxonomy.taxon_ancestors[taxon_id]:
+                # set default values for the ancestor the first time it is referenced
+                if ancestor_taxon_id not in aggregated_scores:
+                    aggregated_scores[ancestor_taxon_id] = {}
+                    aggregated_scores[ancestor_taxon_id]["aggregated_vision_score"] = 0
+                    if not no_geo_scores:
+                        aggregated_scores[ancestor_taxon_id]["aggregated_geo_score"] = 0
+                        aggregated_scores[ancestor_taxon_id]["aggregated_geo_threshold"] = 100
+                # aggregated vision score is a sum of descendant scores
+                aggregated_scores[ancestor_taxon_id]["aggregated_vision_score"] += vision_score
+                if not no_geo_scores and geo_score > aggregated_scores[ancestor_taxon_id]["aggregated_geo_score"]:
+                    # aggregated geo score is the max of descendant geo scores
+                    aggregated_scores[ancestor_taxon_id]["aggregated_geo_score"] = geo_score
+                if not no_geo_scores and \
+                    aggregated_scores[ancestor_taxon_id]["aggregated_geo_threshold"] != 0 and \
+                        geo_score > geo_threshold:
+                    # aggregated geo threshold is set to 0 if any descendants are above their threshold
+                    aggregated_scores[ancestor_taxon_id]["aggregated_geo_threshold"] = 0
+
+        # turn the aggregated_scores dict into a data frame
+        scores_df = pd.DataFrame.from_dict(aggregated_scores, orient="index")
+        # merge the aggregated scores into the scoring taxonomy
+        all_node_scores = all_node_scores.join(scores_df)
+
+        # the aggregated scores of leaves will be NaN, so populate them with their original scores
+        all_node_scores["aggregated_vision_score"].fillna(
+            all_node_scores["normalized_vision_score"], inplace=True)
+        if no_geo_scores:
+            all_node_scores["aggregated_geo_score"] = 0
+            all_node_scores["aggregated_geo_threshold"] = 0
+        else:
+            all_node_scores["aggregated_geo_score"].fillna(
+                all_node_scores["normalized_geo_score"], inplace=True)
+            all_node_scores["aggregated_geo_threshold"].fillna(
+                all_node_scores["geo_threshold"], inplace=True)
+
+        if (no_geo_scores or score_without_geo):
+            # if there are no geo scores, or it was requested to not use geo scores to affect
+            # the final combined score, set the combined scores to be the same as the vision scores
+            all_node_scores["aggregated_combined_score"] = all_node_scores["aggregated_vision_score"]
+        else:
+            # the combined score is simply the normalized vision score
+            # multipliedby the normalized geo score
+            all_node_scores["aggregated_combined_score"] = all_node_scores["aggregated_vision_score"] * \
+                all_node_scores["aggregated_geo_score"]
+
+        # calculate a normalized combined score so all values add to 1, to be used for thresholding
+        sum_of_root_node_aggregated_combined_scores = all_node_scores.query(
+            "parent_taxon_id.isnull()")["aggregated_combined_score"].sum()
+        all_node_scores["normalized_aggregated_combined_score"] = all_node_scores[
+            "aggregated_combined_score"] / sum_of_root_node_aggregated_combined_scores
+
+        if debug:
+            print("Aggregation Time: %0.2fms" % ((time.time() - start_time) * 1000.))
+            thresholded_results = all_node_scores.query("normalized_aggregated_combined_score > 0.05")
+            print("\nTree of aggregated results:")
+            ModelTaxonomyDataframe.print(thresholded_results, display_taxon_lambda=(
+                lambda row: f'{row.name}    [' +
+                            f'V:{round(row.aggregated_vision_score, 4)}, ' +
+                            f'G:{round(row.aggregated_geo_score, 4)}, ' +
+                            f'C:{round(row.aggregated_combined_score, 4)}, ' +
+                            f'NC:{round(row.normalized_aggregated_combined_score, 4)}]'))
+            print("")
+        return all_node_scores
+
+    def h3_04_geo_results_for_taxon(self, taxon_id, bounds=[], thresholded=False, raw_results=False):
+        if (self.geo_elevation_cells is None) or (self.geo_elevation_model is None):
+            return
+        try:
+            taxon = self.taxonomy.df.loc[taxon_id]
+        except Exception as e:
+            print(f'taxon `{taxon_id}` does not exist in the taxonomy')
+            raise e
+        if math.isnan(taxon["leaf_class_id"]):
+            return
+
+        geo_scores = self.geo_elevation_model.eval_one_class_elevation_from_features(
+            self.geo_model_features, int(taxon["leaf_class_id"]))
+        geo_score_cells = self.geo_elevation_cells.copy()
+        geo_score_cells["geo_score"] = tf.squeeze(geo_scores).numpy()
+        if thresholded:
+            geo_score_cells = geo_score_cells.query(f'geo_score > {taxon["geo_threshold"]}')
+        else:
+            # return scores more than 10% of the taxon threshold, or more than 0.0001, whichever
+            # is smaller. This reduces data needed to be redendered client-side for the Data Layer
+            # mapping approach, and maybe can be removed once switching to map tiles
+            lower_bound_score = np.array([0.0001, taxon["geo_threshold"] / 10]).min()
+            geo_score_cells = geo_score_cells.query(f'geo_score > {lower_bound_score}')
+
+        if bounds:
+            min = geo_score_cells["geo_score"].min()
+            max = geo_score_cells["geo_score"].max()
+            geo_score_cells = InatInferrer.filter_geo_dataframe_by_bounds(geo_score_cells, bounds)
+            # perform a log transform on the scores based on the min/max score for the unbounded set
+            geo_score_cells["geo_score"] = \
+                (np.log10(geo_score_cells["geo_score"]) - math.log10(min)) / \
+                (math.log10(max) - math.log10(min))
+
+        if raw_results:
+            return geo_score_cells
+        return dict(zip(geo_score_cells.index.astype(str), geo_score_cells["geo_score"]))
+
+    def h3_04_taxon_range(self, taxon_id, bounds=[]):
+        taxon_range_path = os.path.join(self.config["taxon_ranges_path"], f'{taxon_id}.csv')
+        if not os.path.exists(taxon_range_path):
+            return None
+        taxon_range_df = pd.read_csv(taxon_range_path, names=["h3_04"], header=None). \
+            sort_values("h3_04").set_index("h3_04").sort_index()
+        taxon_range_df = InatInferrer.add_lat_lng_to_h3_geo_dataframe(taxon_range_df)
+        if bounds:
+            taxon_range_df = InatInferrer.filter_geo_dataframe_by_bounds(taxon_range_df, bounds)
+        taxon_range_df["value"] = 1
+        return dict(zip(taxon_range_df.index.astype(str), taxon_range_df["value"]))
+
+    def h3_04_taxon_range_comparison(self, taxon_id, bounds=[]):
+        geomodel_results = self.h3_04_geo_results_for_taxon(taxon_id, bounds, thresholded=True) or {}
+        taxon_range_results = self.h3_04_taxon_range(taxon_id, bounds) or {}
+        combined_results = {}
+        for cell_key in geomodel_results:
+            if cell_key in taxon_range_results:
+                combined_results[cell_key] = 0.5
+            else:
+                combined_results[cell_key] = 0
+        for cell_key in taxon_range_results:
+            if cell_key not in geomodel_results:
+                combined_results[cell_key] = 1
+        return combined_results
+
+    def h3_04_bounds(self, taxon_id):
+        geomodel_results = self.h3_04_geo_results_for_taxon(
+            taxon_id, bounds=None, thresholded=True, raw_results=True)
+        if geomodel_results is None:
+            return
+        return {
+            "swlat": geomodel_results["lat"].min(),
+            "swlng": geomodel_results["lng"].min(),
+            "nelat": geomodel_results["lat"].max(),
+            "nelng": geomodel_results["lng"].max()
+        }
+
+    @staticmethod
+    def add_lat_lng_to_h3_geo_dataframe(geo_df):
+        geo_df = geo_df.h3.h3_to_geo()
+        geo_df["lng"] = geo_df["geometry"].x
+        geo_df["lat"] = geo_df["geometry"].y
+        geo_df.pop("geometry")
+        return geo_df
+
+    @staticmethod
+    def filter_geo_dataframe_by_bounds(geo_df, bounds):
+        # this is querying on the centroid, but cells just outside the bounds may have a
+        # centroid outside the bounds while part of the polygon is within the bounds. Add
+        # a small buffer to ensure this returns any cell whose polygon is
+        # even partially within the bounds
+        buffer = 0.6
+
+        # similarly, the centroid may be on the other side of the antimedirian, so lookup
+        # cells that might be just over the antimeridian on either side
+        antimedirian_condition = ""
+        if bounds[1] < -179:
+            antimedirian_condition = "or (lng > 179)"
+        elif bounds[3] > 179:
+            antimedirian_condition = "or (lng < -179)"
+
+        # query for cells wtihin the buffered bounds, and potentially
+        # on the other side of the antimeridian
+        query = f'lat >= {bounds[0] - buffer} and lat <= {bounds[2] + buffer} and ' + \
+            f' ((lng >= {bounds[1] - buffer} and lng <= {bounds[3] + buffer})' + \
+            f' {antimedirian_condition})'
+        return geo_df.query(query)
diff --git a/lib/inat_vision_api.py b/lib/inat_vision_api.py
index 1cdb8f1..8703c43 100644
--- a/lib/inat_vision_api.py
+++ b/lib/inat_vision_api.py
@@ -8,7 +8,6 @@
 from flask import Flask, request, render_template
 from web_forms import ImageForm
 from inat_inferrer import InatInferrer
-from model_scoring import ModelScoring
 
 
 class InatVisionAPI:
@@ -18,12 +17,59 @@ def __init__(self, config):
         self.app = Flask(__name__)
         self.app.secret_key = config["app_secret"]
         self.upload_folder = "static/"
-        self.app.add_url_rule(
-            "/", "index", self.index_route, methods=["GET", "POST"])
+        self.app.add_url_rule("/", "index", self.index_route, methods=["GET", "POST"])
+        self.app.add_url_rule("/h3_04", "h3_04", self.h3_04_route, methods=["GET"])
+        self.app.add_url_rule("/h3_04_taxon_range", "h3_04_taxon_range",
+                              self.h3_04_taxon_range_route, methods=["GET"])
+        self.app.add_url_rule("/h3_04_taxon_range_comparison", "h3_04_taxon_range_comparison",
+                              self.h3_04_taxon_range_comparison_route, methods=["GET"])
+        self.app.add_url_rule("/h3_04_bounds", "h3_04_bounds",
+                              self.h3_04_bounds_route, methods=["GET"])
 
     def setup_inferrer(self, config):
         self.inferrer = InatInferrer(config)
 
+    def h3_04_route(self):
+        return self.h3_04_default_route(self.inferrer.h3_04_geo_results_for_taxon)
+
+    def h3_04_taxon_range_route(self):
+        if "taxon_ranges_path" not in self.inferrer.config:
+            return "taxon range data unavilable", 422
+        return self.h3_04_default_route(self.inferrer.h3_04_taxon_range)
+
+    def h3_04_taxon_range_comparison_route(self):
+        if "taxon_ranges_path" not in self.inferrer.config:
+            return "taxon range data unavilable", 422
+        return self.h3_04_default_route(self.inferrer.h3_04_taxon_range_comparison)
+
+    def h3_04_default_route(self, h3_04_method):
+        taxon_id, error_message, error_code = self.valid_leaf_taxon_id_for_request(request)
+        if error_message:
+            return error_message, error_code
+
+        bounds, error_message, error_code = self.valid_bounds_for_request(request)
+        if error_message:
+            return error_message, error_code
+
+        if h3_04_method == self.inferrer.h3_04_geo_results_for_taxon \
+           and "thresholded" in request.args and request.args["thresholded"] == "true":
+            results_dict = h3_04_method(taxon_id, bounds, thresholded=True)
+        else:
+            results_dict = h3_04_method(taxon_id, bounds)
+        if results_dict is None:
+            return f'Unknown taxon_id {taxon_id}', 422
+        return InatVisionAPI.round_floats(results_dict, 8)
+
+    def h3_04_bounds_route(self):
+        taxon_id, error_message, error_code = self.valid_leaf_taxon_id_for_request(request)
+        if error_message:
+            return error_message, error_code
+
+        results_dict = self.inferrer.h3_04_bounds(taxon_id)
+        if results_dict is None:
+            return f'Unknown taxon_id {taxon_id}', 422
+        return results_dict
+
     def index_route(self):
         form = ImageForm()
         if "observation_id" in request.args:
@@ -41,6 +87,7 @@ def index_route(self):
             lng = form.lng.data
             file_path = None
             image_uuid = None
+            iconic_taxon_id = None
             if observation_id:
                 image_uuid = "downloaded-obs-" + observation_id
                 file_path, lat, lng, iconic_taxon_id = self.download_observation(
@@ -48,67 +95,95 @@ def index_route(self):
             else:
                 image_uuid = str(uuid.uuid4())
                 file_path = self.process_upload(form.image.data, image_uuid)
-                iconic_taxon_id = None
+                if form.taxon_id.data and form.taxon_id.data.isdigit():
+                    iconic_taxon_id = int(form.taxon_id.data)
             if file_path is None:
                 return render_template("home.html")
 
-            image = self.inferrer.prepare_image_for_inference(file_path, image_uuid)
-            scores = self.score_image(image, lat, lng, iconic_taxon_id, geomodel)
-            self.write_logstash(image_uuid, file_path, request_start_datetime, request_start_time)
+            scores = self.score_image(form, file_path, lat, lng, iconic_taxon_id, geomodel)
+            InatVisionAPI.write_logstash(
+                image_uuid, file_path, request_start_datetime, request_start_time)
             return scores
         else:
             return render_template("home.html")
 
-    def score_image(self, image, lat, lng, iconic_taxon_id, geomodel):
-        # Vision
-        vision_start_time = time.time()
-        vision_scores = self.inferrer.vision_inferrer.process_image(image, iconic_taxon_id)
-        vision_total_time = time.time() - vision_start_time
-        print("Vision Time: %0.2fms" % (vision_total_time * 1000.))
-
-        if geomodel != "true":
-            top_x = dict(sorted(
-                vision_scores.items(), key=lambda x: x[1], reverse=True)[:100])
-            to_return = {}
-            for index, arg in enumerate(top_x):
-                to_return[arg] = round(vision_scores[arg] * 100, 6)
-            return to_return
-
-        # Geo
-        geo_start_time = time.time()
-        if lat is not None and lat != "" and lng is not None and lng != "":
-            geo_scores = self.inferrer.geo_model.predict(lat, lng, iconic_taxon_id)
+    def score_image(self, form, file_path, lat, lng, iconic_taxon_id, geomodel):
+        score_without_geo = (form.score_without_geo.data == "true")
+        filter_taxon = self.inferrer.lookup_taxon(iconic_taxon_id)
+        leaf_scores = self.inferrer.predictions_for_image(
+            file_path, lat, lng, filter_taxon, score_without_geo
+        )
+
+        if form.aggregated.data == "true":
+            aggregated_results = self.inferrer.aggregate_results(leaf_scores, filter_taxon,
+                                                                 score_without_geo)
+            columns_to_return = [
+                "aggregated_combined_score",
+                "aggregated_geo_score",
+                "taxon_id",
+                "name",
+                "aggregated_vision_score",
+                "aggregated_geo_threshold"
+            ]
+            column_mapping = {
+                "taxon_id": "id",
+                "aggregated_combined_score": "combined_score",
+                "aggregated_geo_score": "geo_score",
+                "aggregated_vision_score": "vision_score",
+                "aggregated_geo_threshold": "geo_threshold"
+            }
+
+            no_geo_scores = (leaf_scores["geo_score"].max() == 0)
+
+            # set a cutoff where branches whose combined scores are below the threshold are ignored
+            # TODO: this threshold is completely arbitrary and needs testing
+            aggregated_results = aggregated_results.query("normalized_aggregated_combined_score > 0.05")
+
+            # after setting a cutoff, get the parent IDs of the remaining taxa
+            parent_taxon_ids = aggregated_results["parent_taxon_id"].values  # noqa: F841
+            # the leaves of the pruned taxonomy (not leaves of the original taxonomy), are the
+            # taxa who are not parents of any remaining taxa
+            leaf_results = aggregated_results.query("taxon_id not in @parent_taxon_ids")
+
+            leaf_results = leaf_results.sort_values("aggregated_combined_score", ascending=False).head(100)
+            score_columns = ["aggregated_combined_score", "aggregated_geo_score",
+                             "aggregated_vision_score", "aggregated_geo_threshold"]
+            leaf_results[score_columns] = leaf_results[score_columns].multiply(100, axis="index")
+            final_results = leaf_results[columns_to_return].rename(columns=column_mapping)
         else:
-            geo_scores = {}
-        geo_total_time = time.time() - geo_start_time
-        print("GeoTime: %0.2fms" % (geo_total_time * 1000.))
-
-        # Scoring
-        scoring_start_time = time.time()
-        combined_scores = ModelScoring.combine_vision_and_geo_scores(
-            vision_scores, geo_scores,
-            self.inferrer.config["geo_min"] if "geo_min" in self.inferrer.config else 0)
-
-        # results.aggregate_scores()
-        scoring_total_time = time.time() - scoring_start_time
-        print("Score Time: %0.2fms" % (scoring_total_time * 1000.))
-
-        top_x = dict(sorted(
-            combined_scores.items(), key=lambda x: x[1], reverse=True)[:100])
-        to_return = []
-        for index, arg in enumerate(top_x):
-            geo_score = geo_scores[arg] if arg in geo_scores else 0.0000000001
-            to_return.append({
-                "combined_score": round(combined_scores[arg] * 100, 6),
-                "vision_score": round(vision_scores[arg] * 100, 6),
-                "geo_score": round(geo_score * 100, 6),
-                "id": self.inferrer.taxonomy.taxa[arg].id,
-                "name": self.inferrer.taxonomy.taxa[arg].name
-            })
-
-        total_time = time.time() - vision_start_time
-        print("Total: %0.2fms" % (total_time * 1000.))
-        return to_return
+            no_geo_scores = (leaf_scores["geo_score"].max() == 0)
+            top_combined_score = leaf_scores.sort_values("combined_score", ascending=False).head(1)["combined_score"].values[0]
+            # set a cutoff so results whose combined scores are
+            # much lower than the best score are not returned
+            leaf_scores = leaf_scores.query(f'combined_score > {top_combined_score * 0.001}')
+
+            top100 = leaf_scores.sort_values("combined_score", ascending=False).head(100)
+            score_columns = ["combined_score", "geo_score", "normalized_vision_score", "geo_threshold"]
+            top100[score_columns] = top100[score_columns].multiply(100, axis="index")
+
+            # legacy dict response
+            if geomodel != "true":
+                top_taxon_combined_scores = top100[
+                    ["taxon_id", "combined_score"]
+                ].to_dict(orient="records")
+                return {x["taxon_id"]: x["combined_score"] for x in top_taxon_combined_scores}
+
+            # new array response
+            columns_to_return = [
+                "combined_score",
+                "geo_score",
+                "taxon_id",
+                "name",
+                "normalized_vision_score",
+                "geo_threshold"
+            ]
+            column_mapping = {
+                "taxon_id": "id",
+                "normalized_vision_score": "vision_score"
+            }
+            final_results = top100[columns_to_return].rename(columns=column_mapping)
+
+        return final_results.to_dict(orient="records")
 
     def process_upload(self, form_image_data, image_uuid):
         if form_image_data is None:
@@ -137,7 +212,34 @@ def download_observation(self, observation_id, image_uuid):
         # return the path to the cached image, coordinates, and iconic taxon
         return cache_path, latlng[0], latlng[1], data["results"][0]["taxon"]["iconic_taxon_id"]
 
-    def write_logstash(self, image_uuid, file_path, request_start_datetime, request_start_time):
+    def valid_leaf_taxon_id_for_request(self, request):
+        if "taxon_id" in request.args:
+            taxon_id = request.args["taxon_id"]
+        else:
+            return None, "taxon_id required", 422
+        if not taxon_id.isdigit():
+            return None, "taxon_id must be an integer", 422
+
+        taxon_id = int(taxon_id)
+        if float(taxon_id) not in self.inferrer.taxonomy.leaf_df["taxon_id"].values:
+            return None, f'Unknown taxon_id {taxon_id}', 422
+        return taxon_id, None, None
+
+    def valid_bounds_for_request(self, request):
+        bounds = []
+        if "swlat" in request.args:
+            try:
+                swlat = float(request.args["swlat"])
+                swlng = float(request.args["swlng"])
+                nelat = float(request.args["nelat"])
+                nelng = float(request.args["nelng"])
+            except ValueError:
+                return None, "bounds must be floats", 422
+            bounds = [swlat, swlng, nelat, nelng]
+        return bounds, None, None
+
+    @staticmethod
+    def write_logstash(image_uuid, file_path, request_start_datetime, request_start_time):
         request_end_time = time.time()
         request_time = round((request_end_time - request_start_time) * 1000, 6)
         logstash_log = open('log/logstash.log', 'a')
@@ -149,3 +251,13 @@ def write_logstash(self, image_uuid, file_path, request_start_datetime, request_
         json.dump(log_data, logstash_log)
         logstash_log.write("\n")
         logstash_log.close()
+
+    @staticmethod
+    def round_floats(o, sig):
+        if isinstance(o, float):
+            return round(o, sig)
+        if isinstance(o, dict):
+            return {k: InatVisionAPI.round_floats(v, sig) for k, v in o.items()}
+        if isinstance(o, (list, tuple)):
+            return [InatVisionAPI.round_floats(x, sig) for x in o]
+        return o
diff --git a/lib/model_scoring.py b/lib/model_scoring.py
deleted file mode 100644
index 645de55..0000000
--- a/lib/model_scoring.py
+++ /dev/null
@@ -1,11 +0,0 @@
-class ModelScoring:
-
-    @staticmethod
-    def combine_vision_and_geo_scores(vision_scores, geo_scores, geo_min=0):
-        combined_scores = {}
-        for arg in vision_scores:
-            geo_score = geo_scores[arg] if arg in geo_scores else 0.0000000001
-            if geo_min > 0 and geo_score < geo_min:
-                geo_score = geo_min
-            combined_scores[arg] = vision_scores[arg] * geo_score
-        return combined_scores
diff --git a/lib/model_taxonomy.py b/lib/model_taxonomy.py
index 0362587..d390516 100644
--- a/lib/model_taxonomy.py
+++ b/lib/model_taxonomy.py
@@ -11,7 +11,7 @@ def __init__(self, path):
     def load_mapping(self, path):
         self.node_key_to_leaf_class_id = {}
         self.leaf_class_to_taxon = {}
-        # there is no taxon with ID 0, but roots of the taxonomy with have a parent ID of 0,
+        # there is no taxon with ID 0, but roots of the taxonomy have a parent ID of 0,
         # so create a fake taxon of Life to represent the root of the entire tree
         self.taxa = {0: Taxon({"name": "Life", "depth": 0})}
         self.taxon_children = {}
diff --git a/lib/model_taxonomy_dataframe.py b/lib/model_taxonomy_dataframe.py
new file mode 100644
index 0000000..a707c7d
--- /dev/null
+++ b/lib/model_taxonomy_dataframe.py
@@ -0,0 +1,74 @@
+import math
+import pandas as pd
+
+
+class ModelTaxonomyDataframe:
+
+    def __init__(self, path, thresholds_path):
+        self.load_mapping(path, thresholds_path)
+
+    def load_mapping(self, path, thresholds_path):
+        self.df = pd.read_csv(path)
+        # left and right will be used to store nested set indices
+        self.df["left"] = pd.Series([], dtype=object)
+        self.df["right"] = pd.Series([], dtype=object)
+        self.taxon_children = {}
+        self.taxon_row_mapping = {}
+        self.taxon_ancestors = {}
+        for index, taxon in self.df.iterrows():
+            self.taxon_row_mapping[taxon["taxon_id"]] = index
+            parent_id = 0 if math.isnan(taxon["parent_taxon_id"]) else int(taxon["parent_taxon_id"])
+            if parent_id not in self.taxon_children:
+                self.taxon_children[parent_id] = []
+            self.taxon_children[parent_id].append(taxon["taxon_id"])
+        self.assign_nested_values()
+        self.df = self.df.set_index("taxon_id", drop=False).sort_index()
+        if thresholds_path is not None:
+            thresholds = pd.read_csv(thresholds_path)[["taxon_id", "thres"]]. \
+                rename(columns={"thres": "geo_threshold"}).set_index("taxon_id").sort_index()
+            self.df = self.df.join(thresholds)
+
+        # create a data frame with just the leaf taxa using leaf_class_id as the index
+        self.leaf_df = self.df.query("leaf_class_id.notnull()").set_index(
+            "leaf_class_id", drop=False).sort_index()
+
+    # calculate nested set left and right values. These can be later used for an efficient
+    # way to calculate if a taxon is an ancestor or descendant of another
+    def assign_nested_values(self, taxon_id=0, index=0, ancestor_taxon_ids=[]):
+        for child_id in self.taxon_children[taxon_id]:
+            self.df.at[self.taxon_row_mapping[child_id], "left"] = index
+            self.taxon_ancestors[child_id] = ancestor_taxon_ids
+            index += 1
+            if child_id in self.taxon_children:
+                child_ancestor_taxon_ids = ancestor_taxon_ids + [child_id]
+                index = self.assign_nested_values(child_id, index, child_ancestor_taxon_ids)
+            self.df.at[self.taxon_row_mapping[child_id], "right"] = index
+            index += 1
+        return index
+
+    @staticmethod
+    def children(df, taxon_id):
+        if taxon_id == 0:
+            return df.query("parent_taxon_id.isnull()")
+        return df.query(f'parent_taxon_id == {taxon_id}')
+
+    @staticmethod
+    def print(df, taxon_id=0, ancestor_prefix="", display_taxon_lambda=None):
+        children = ModelTaxonomyDataframe.children(df, taxon_id)
+        index = 0
+        if "aggregated_combined_score" in children:
+            children = children.sort_values("aggregated_combined_score", ascending=False)
+        else:
+            children = children.sort_values("name")
+        for row in children.itertuples():
+            last_in_branch = (index == len(children) - 1)
+            index += 1
+            icon = "└──" if last_in_branch else "├──"
+            prefixIcon = "   " if last_in_branch else "│   "
+            print(f'{ancestor_prefix}{icon}', end="")
+            if display_taxon_lambda is None:
+                print(f'{row.name} :: {row.left}:{row.right}')
+            else:
+                print(display_taxon_lambda(row))
+            if row.right != row.left + 1:
+                ModelTaxonomyDataframe.print(df, row.taxon_id, f"{ancestor_prefix}{prefixIcon}", display_taxon_lambda)
diff --git a/lib/model_test_data_exporter.py b/lib/model_test_data_exporter.py
new file mode 100644
index 0000000..df60bad
--- /dev/null
+++ b/lib/model_test_data_exporter.py
@@ -0,0 +1,204 @@
+import pandas as pd
+import requests
+import json
+import prison
+import re
+from datetime import datetime
+
+
+class ModelTestDataExporter:
+
+    def __init__(self, **args):
+        self.cmd_args = args
+        self.load_train_data_photo_ids()
+
+    def load_train_data_photo_ids(self, train_data_paths=[]):
+        if not self.cmd_args["exclude_train_photos_path"]:
+            self.train_data_photo_ids = []
+            return
+        self.train_data_photo_ids = pd.concat(
+            map(lambda x: pd.read_csv(x, usecols=["photo_id"]),
+                self.cmd_args["exclude_train_photos_path"])
+        ).drop_duplicates("photo_id").set_index("photo_id").sort_index().index
+
+    def generate_from_cmd_args(self):
+        additional_parameters = {}
+        if self.cmd_args["place_id"]:
+            additional_parameters["place_id"] = self.cmd_args["place_id"]
+        if self.cmd_args["taxon_id"]:
+            additional_parameters["taxon_id"] = self.cmd_args["taxon_id"]
+        currentDatetime = datetime.now()
+        timestamp = currentDatetime.strftime("%Y%m%d")
+        export_path = f'test-obs-{timestamp}'
+        if additional_parameters:
+            parameter_string = "-".join(map(lambda index: f'{index}-{additional_parameters[index]}',
+                                            additional_parameters))
+            export_path += "-" + parameter_string
+        if "filename_suffix" in self.cmd_args and self.cmd_args["filename_suffix"]:
+            export_path += "-" + self.cmd_args["filename_suffix"]
+        export_path += ".csv"
+        self.generate_test_data(export_path, self.cmd_args["limit"], additional_parameters)
+
+    def export_test_data_parameters(self, additional_parameters={}):
+        api_parameters = {}
+        api_parameters["quality_grade"] = "research,casual"
+        api_parameters["rank"] = "species"
+        api_parameters["photos"] = "true"
+        api_parameters["geo"] = "true"
+        api_parameters["identified"] = "true"
+        api_parameters["per_page"] = 200
+        api_parameters["order_by"] = "random"
+        api_parameters["identifications"] = "most_agree"
+        api_parameters["ttl"] = -1
+        api_parameters.update(additional_parameters)
+        print(api_parameters)
+
+        fields = {
+            "quality_grade": True,
+            "observed_on_details": {
+                "date": True
+            },
+            "location": True,
+            "photos": {
+                "url": True
+            },
+            "community_taxon_id": True,
+            "taxon": {
+                "id": True,
+                "ancestor_ids": True,
+                "iconic_taxon_id": True
+            },
+            "quality_metrics": {
+                "agree": True,
+                "metric": True
+            }
+        }
+        api_parameters["fields"] = prison.dumps(fields)
+        return api_parameters
+
+    def process_api_response(self, api_parameters, used_observations):  # noqa: C901
+        response = requests.get("https://api.inaturalist.org/v2/observations",
+                                params=api_parameters)
+        json_object = response.json()
+        useable_rows = []
+        for row in json_object["results"]:
+            if row["uuid"] in used_observations:
+                continue
+
+            # must have a taxon and observed_on_details
+            if not row["taxon"] or "observed_on_details" not in row \
+               or not row["observed_on_details"] or "taxon" not in row \
+               or "iconic_taxon_id" not in row["taxon"] or not row["taxon"]["iconic_taxon_id"]:
+                used_observations[row["uuid"]] = True
+                continue
+
+            # must pass quality metrics except wild
+            metric_counts = {}
+            for metric in row["quality_metrics"]:
+                if metric["metric"] not in metric_counts:
+                    metric_counts[metric["metric"]] = 0
+                if metric["agree"]:
+                    metric_counts[metric["metric"]] += 1
+                else:
+                    metric_counts[metric["metric"]] -= 1
+            if ("location" in metric_counts and metric_counts["location"] < 0) \
+               or ("evidence" in metric_counts and metric_counts["evidence"] < 0) \
+               or ("date" in metric_counts and metric_counts["date"] < 0) \
+               or ("recent" in metric_counts and metric_counts["recent"] < 0):
+                used_observations[row["uuid"]] = True
+                continue
+
+            # check if any photos are included in the test data
+            photo_in_training_data = False
+            for photo in row["photos"]:
+                if photo["id"] in self.train_data_photo_ids:
+                    photo_in_training_data = True
+                    break
+            if photo_in_training_data is True:
+                used_observations[row["uuid"]] = True
+                continue
+            if re.search(r"\.jpe?g", row["photos"][0]["url"]) is None:
+                used_observations[row["uuid"]] = True
+                continue
+
+            if row["quality_grade"] == "casual" and not (row["community_taxon_id"] and row["community_taxon_id"] == row["taxon"]["id"]):
+                used_observations[row["uuid"]] = True
+                continue
+
+            useable_rows.append(row)
+            used_observations[row["uuid"]] = True
+        return useable_rows
+
+    def generate_test_data(self, filename, max_results=5000, additional_parameters={}):
+        iterations_with_zero_results = 0
+        rows_to_use = []
+        used_observations = {}
+        api_parameters = self.export_test_data_parameters(additional_parameters)
+
+        while len(rows_to_use) < max_results and iterations_with_zero_results < 5:
+            print(f'Fetching more results... {len(rows_to_use)} so far')
+            useable_rows = self.process_api_response(api_parameters, used_observations)
+            print(f'{len(useable_rows)} this batch')
+            if not useable_rows:
+                iterations_with_zero_results += 1
+                continue
+            iterations_with_zero_results = 0
+            rows_to_use += useable_rows
+
+        columns = [
+            "observation_id",
+            "observed_on",
+            "iconic_taxon_id",
+            "taxon_id",
+            "taxon_ancestry",
+            "lat",
+            "lng",
+            "photo_url"
+        ]
+        output_file = open(filename, "w")
+        output_file.write(",".join(columns) + "\n")
+        for row in rows_to_use[:max_results]:
+            [latitude, longitude] = row["location"].split(",")
+            columns_to_write = [
+                row["uuid"],
+                row["observed_on_details"]["date"],
+                row["taxon"]["iconic_taxon_id"],
+                row["taxon"]["id"],
+                "/".join(map(str, row["taxon"]["ancestor_ids"])),
+                latitude,
+                longitude,
+                row["photos"][0]["url"].replace("square", "medium").replace("http://", "https://")
+            ]
+            output_file.write(",".join(map(str, columns_to_write)) + "\n")
+
+    def generate_standard_set(self):
+        files_to_generate = {
+            "global": {},
+            "fungi": {"taxon_id": 47170},
+            "insecta": {"taxon_id": 47158},
+            "mammalia": {"taxon_id": 40151},
+            "plantae": {"taxon_id": 47126},
+            "actinopterygii": {"taxon_id": 47178},
+            "reptilia": {"taxon_id": 26036},
+            "amphibia": {"taxon_id": 20978},
+            "arachnida": {"taxon_id": 47119},
+            "aves": {"taxon_id": 3},
+            "animalia": {"taxon_id": 1},
+            "mollusca": {"taxon_id": 47115},
+            "north-america": {"place_id": 97394},
+            "south-america": {"place_id": 97389},
+            "europe": {"place_id": 97391},
+            "asia": {"place_id": 97395},
+            "africa": {"place_id": 97392},
+            "oceania": {"place_id": 97393}
+        }
+
+        currentDatetime = datetime.now()
+        timestamp = currentDatetime.strftime("%Y%m%d")
+
+        for key in files_to_generate:
+            export_path = f'test-obs-{timestamp}-{key}'
+            if "filename_suffix" in self.cmd_args and self.cmd_args["filename_suffix"]:
+                export_path += "-" + self.cmd_args["filename_suffix"]
+            export_path += ".csv"
+            self.generate_test_data(export_path, self.cmd_args["limit"], files_to_generate[key])
diff --git a/lib/pt_geo_prior_model.py b/lib/pt_geo_prior_model.py
index 218a0d0..b81b4e9 100644
--- a/lib/pt_geo_prior_model.py
+++ b/lib/pt_geo_prior_model.py
@@ -29,7 +29,7 @@ def predict(self, latitude, longitude, filter_taxon_id=None):
         filter_taxon = None
         if filter_taxon_id is not None:
             try:
-                filter_taxon = self.taxonomy.taxa[filter_taxon_id]
+                filter_taxon = self.taxonomy.df.iloc[filter_taxon_id]
             except Exception as e:
                 print(f'filter_taxon `{filter_taxon_id}` does not exist in the taxonomy')
                 raise e
diff --git a/lib/templates/home.html b/lib/templates/home.html
index baad79e..37244e9 100644
--- a/lib/templates/home.html
+++ b/lib/templates/home.html
@@ -13,11 +13,11 @@
   <h2>Slim vs Legacy Model</h2>
   <form action="/" method="POST" enctype="multipart/form-data">
     ObservationID: <input type="text" name="observation_id" />
-    <br>
-    <br>
+    <br/>
+    <br/>
     or upload file: <input type="file" name="image" accept="image/*" />
-    <br>
-    <br>
+    <br/>
+    <br/>
     Lat: <input type="text" name="lat" value="42"/>
     <br/>
     Lng: <input type="test" name="lng" value="-70">
@@ -31,11 +31,17 @@ <h2>Slim vs Legacy Model</h2>
       <option value="true">With Geo</option>
       <option value="false">Original</option>
     </select>
-    <br>
-    <br>
+    <select name="aggregated">
+      <option value="false">Original</option>
+      <option value="true">Aggregated</option>
+    </select>
+    <select name="score_without_geo">
+      <option value="false">Geo affects score</option>
+      <option value="true">Geo does not affect score</option>
+    </select>
+    <br/>
+    <br/>
     <button type="submit">Submit</button>
   </form>
 </body>
 </html>
-
-
diff --git a/lib/tf_gp_elev_model.py b/lib/tf_gp_elev_model.py
new file mode 100644
index 0000000..ad9ebbd
--- /dev/null
+++ b/lib/tf_gp_elev_model.py
@@ -0,0 +1,133 @@
+import tensorflow as tf
+import numpy as np
+import math
+import os
+
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+
+
+class ResLayer(tf.keras.layers.Layer):
+    def __init__(self):
+        super(ResLayer, self).__init__()
+        self.w1 = tf.keras.layers.Dense(
+            256,
+            activation="relu",
+            kernel_initializer="he_normal"
+        )
+        self.w2 = tf.keras.layers.Dense(
+            256,
+            activation="relu",
+            kernel_initializer="he_normal"
+        )
+        self.dropout = tf.keras.layers.Dropout(rate=0.5)
+        self.add = tf.keras.layers.Add()
+
+    def call(self, inputs):
+        x = self.w1(inputs)
+        x = self.dropout(x)
+        x = self.w2(x)
+        x = self.add([x, inputs])
+        return x
+
+    def get_config(self):
+        return {}
+
+
+class TFGeoPriorModelElev:
+
+    def __init__(self, model_path):
+        # initialize the geo model for inference
+        tf.config.set_visible_devices([], 'GPU')
+        visible_devices = tf.config.get_visible_devices()
+        for device in visible_devices:
+            assert device.device_type != 'GPU'
+        self.gpmodel = tf.keras.models.load_model(
+            model_path,
+            custom_objects={'ResLayer': ResLayer},
+            compile=False
+        )
+
+    def predict(self, latitude, longitude, elevation):
+        norm_lat = latitude / 90.0
+        norm_lng = longitude / 180.0
+        norm_loc = tf.stack([norm_lng, norm_lat])
+
+        if elevation > 0:
+            norm_elev = elevation / 6574
+        elif elevation == 0:
+            norm_elev = 0.0
+        else:
+            norm_elev = elevation / 32768
+
+        norm_elev = tf.expand_dims(norm_elev, axis=0)
+        encoded_loc = tf.concat([
+            tf.sin(norm_loc * math.pi),
+            tf.cos(norm_loc * math.pi),
+            norm_elev
+        ], axis=0)
+
+        return self.gpmodel(tf.convert_to_tensor(
+            tf.expand_dims(encoded_loc, axis=0)
+        ), training=False)[0]
+
+    def features_for_one_class_elevation(self, latitude, longitude, elevation):
+        """Evalutes the model for a single class and multiple locations
+
+        Args:
+            latitude (list): A list of latitudes
+            longitude (list): A list of longitudes (same length as latitude)
+            elevation (list): A list of elevations (same length as latitude)
+            class_of_interest (int): The single class to eval
+
+        Returns:
+            numpy array: scores for class of interest at each location
+        """
+        def encode_loc(latitude, longitude, elevation):
+            latitude = np.array(latitude)
+            longitude = np.array(longitude)
+            elevation = np.array(elevation)
+            elevation = elevation.astype("float32")
+            grid_lon = longitude.astype('float32') / 180.0
+            grid_lat = latitude.astype('float32') / 90.0
+
+            elevation[elevation > 0] = elevation[elevation > 0] / 6574.0
+            elevation[elevation < 0] = elevation[elevation < 0] / 32768.0
+            norm_elev = elevation
+
+            if np.isscalar(grid_lon):
+                grid_lon = np.array([grid_lon])
+            if np.isscalar(grid_lat):
+                grid_lat = np.array([grid_lat])
+            if np.isscalar(norm_elev):
+                norm_elev = np.array([norm_elev])
+
+            norm_loc = tf.stack([grid_lon, grid_lat], axis=1)
+
+            encoded_loc = tf.concat([
+                tf.sin(norm_loc * math.pi),
+                tf.cos(norm_loc * math.pi),
+                tf.expand_dims(norm_elev, axis=1),
+
+            ], axis=1)
+            return encoded_loc
+
+        encoded_loc = encode_loc(latitude, longitude, elevation)
+        loc_emb = self.gpmodel.layers[0](encoded_loc)
+
+        # res layers - feature extraction
+        x = self.gpmodel.layers[1](loc_emb)
+        x = self.gpmodel.layers[2](x)
+        x = self.gpmodel.layers[3](x)
+        x = self.gpmodel.layers[4](x)
+
+        return x
+
+    def eval_one_class_elevation_from_features(self, features, class_of_interest):
+        # process just the one class
+        return tf.keras.activations.sigmoid(
+            tf.matmul(
+                tf.expand_dims(self.gpmodel.layers[5].weights[0][:, class_of_interest], axis=0),
+                features,
+                transpose_b=True
+            )
+        ).numpy()
diff --git a/lib/tf_gp_model.py b/lib/tf_gp_model.py
index dd542d3..1e12ade 100644
--- a/lib/tf_gp_model.py
+++ b/lib/tf_gp_model.py
@@ -44,15 +44,7 @@ def __init__(self, model_path, taxonomy):
             compile=False
         )
 
-    def predict(self, latitude, longitude, filter_taxon_id=None):
-        filter_taxon = None
-        if filter_taxon_id is not None:
-            try:
-                filter_taxon = self.taxonomy.taxa[filter_taxon_id]
-            except Exception as e:
-                print(f'filter_taxon `{filter_taxon_id}` does not exist in the taxonomy')
-                raise e
-
+    def predict(self, latitude, longitude):
         norm_lat = np.array([float(latitude)]) / 90.0
         norm_lng = np.array([float(longitude)]) / 180.0
         norm_loc = tf.stack([norm_lng, norm_lat], axis=1)
@@ -60,22 +52,7 @@ def predict(self, latitude, longitude, filter_taxon_id=None):
             tf.sin(norm_loc * math.pi),
             tf.cos(norm_loc * math.pi)
         ], axis=1)
-
-        preds = self.gpmodel.predict([encoded_loc], verbose=0)[0]
-        geo_pred_dict = {}
-        for index, pred in enumerate(preds):
-            if index not in self.taxonomy.leaf_class_to_taxon:
-                continue
-            taxon_id = self.taxonomy.leaf_class_to_taxon[index]
-            if filter_taxon_id is not None:
-                taxon = self.taxonomy.taxa[taxon_id]
-                # the predicted taxon is not the filter_taxon or a descendent, so skip it
-                if not taxon.is_or_descendant_of(filter_taxon):
-                    continue
-
-            geo_pred_dict[taxon_id] = pred
-
-        return geo_pred_dict
+        return self.gpmodel.predict([encoded_loc], verbose=0)[0]
 
     def eval_one_class(self, latitude, longitude, class_of_interest):
         """Evalutes the model for a single class and multiple locations
diff --git a/lib/vision_inferrer.py b/lib/vision_inferrer.py
index 0796d8f..938c2e6 100644
--- a/lib/vision_inferrer.py
+++ b/lib/vision_inferrer.py
@@ -47,41 +47,7 @@ def results_fully_populated(self, results, score):
         number_of_results = len(results)
         return (number_of_results >= 500 and score < 0.00000001) or number_of_results >= 5000
 
-    # given an image object (usually coming from prepare_image_for_inference), an optional
-    # filter taxon, and an optional cache_key, calculate vision results for the image, within the
-    # filter taxon if provided, and save to a file if provided a cache_key
-    def process_image(self, image, filter_taxon_id=None, cache_key=None):
-        cache_path = self.cache_path_for_request(cache_key)
-        cached_results = self.cached_results(cache_path)
-        # vision results for this cache_key have already been generated, so use them
-        if cached_results is not None:
-            preds = cached_results
-        else:
-            preds = self.vision_model.predict(image, verbose=0)[0]
-            self.cache_results(cache_path, preds)
-        filtered_results = {}
-        filter_taxon = None if filter_taxon_id is None else self.taxonomy.taxa[filter_taxon_id]
-        sum_scores = 0
-        # loop through vision results from highest score to lowest
-        for arg in preds.argsort()[::-1]:
-            score = preds[arg]
-            # if there are already enough results, or scores are getting too low, then stop
-            # NOTE: This may not be helpful and needs testing for accuracy and processing time
-            if self.results_fully_populated(filtered_results, score):
-                break
-            taxon_id = self.taxonomy.leaf_class_to_taxon[arg]
-            # ignore this result if the taxon is outside the requested filter taxon
-            if filter_taxon is not None:
-                result_taxon = self.taxonomy.taxa[taxon_id]
-                if not result_taxon.is_or_descendant_of(filter_taxon):
-                    continue
-            filtered_results[taxon_id] = score
-            sum_scores += score
-        # normalize filtered scores to add to 1
-        # NOTE: If there was a filter_taxon, or only a partial results list is returned (which is
-        # the case now while using results_fully_populated), then this modifies the raw vision
-        # scores. This ensures the scores of this method always sum to 1, even if the results have
-        # been filtered
-        for taxon_id in filtered_results:
-            filtered_results[taxon_id] = filtered_results[taxon_id] / sum_scores
-        return filtered_results
+    # given an image object (usually coming from prepare_image_for_inference),
+    # calculate vision results for the image
+    def process_image(self, image):
+        return self.vision_model(tf.convert_to_tensor(image), training=False)[0]
diff --git a/lib/vision_testing.py b/lib/vision_testing.py
index 57653a4..b45bb4d 100644
--- a/lib/vision_testing.py
+++ b/lib/vision_testing.py
@@ -6,10 +6,10 @@
 import time
 import json
 import tensorflow as tf
+from statistics import mean
 from PIL import Image
 from lib.test_observation import TestObservation
 from lib.inat_inferrer import InatInferrer
-from lib.model_scoring import ModelScoring
 
 
 class VisionTesting:
@@ -18,8 +18,10 @@ def __init__(self, config, **args):
         self.cmd_args = args
         self.inferrers = {}
         self.scores = {}
-        for score_type in ["matching_indices", "top1_distance_scores",
-                           "top5_distance_scores", "top10_distance_scores"]:
+        score_types = ["matching_indices", "top1_distance_scores",
+                       "top5_distance_scores", "top10_distance_scores",
+                       "sum_ancestor_distance_scores", "average_ancestor_distance_scores"]
+        for score_type in score_types:
             self.scores[score_type] = {
                 "vision": {},
                 "combined": {}
@@ -29,8 +31,7 @@ def __init__(self, config, **args):
             print(json.dumps(model_config, indent=4))
             model_name = model_config["name"] if "name" in model_config else f'Model {index}'
             model_config["name"] = model_name
-            for score_type in ["matching_indices", "top1_distance_scores",
-                               "top5_distance_scores", "top10_distance_scores"]:
+            for score_type in score_types:
                 self.scores[score_type]["vision"][index] = []
                 self.scores[score_type]["combined"][index] = []
 
@@ -52,11 +53,11 @@ def run(self):
                         # if only one target observation was requested, test this row if it
                         # matches the request, otherwise skip it
                         if int(observation.observation_id) == target_observation_id:
-                            self.test_observation(observation)
-                            return
+                            inferrer_results = self.test_observation(observation)
                         else:
                             continue
-                    inferrer_results = self.test_observation(observation)
+                    else:
+                        inferrer_results = self.test_observation(observation)
                     if inferrer_results is False:
                         # there was some problem processing this test observation. Continue but
                         # don't increment the counter so the requested number of observations
@@ -87,15 +88,8 @@ def top_x_percent(self, x, scores):
         top_x = self.top_x(x, scores)
         return round((top_x / count) * 100, 2)
 
-    # sort results by score (x[1]) descending and return the first x results
-    def top_x_results(self, results, x):
-        top_x = dict(sorted(
-            results.items(), key=lambda x: x[1], reverse=True)[:x])
-        return top_x
-
     def print_scores(self):
         for index, inferrer in self.inferrers.items():
-            print(inferrer.config["name"])
             all_metrics = {}
             for method in ["vision", "combined"]:
                 scores = self.scores["matching_indices"][method][index]
@@ -118,11 +112,16 @@ def print_scores(self):
                     (sum(top5_distance_scores) / metrics["count"]) * 100, 2)
                 metrics["top10∆"] = round(
                     (sum(top10_distance_scores) / metrics["count"]) * 100, 2)
+                metrics["avg∆"] = round(
+                    (mean(self.scores["average_ancestor_distance_scores"][method][index]) / metrics["count"]) * 100, 2)
+                metrics["sum∆"] = round(
+                    (mean(self.scores["sum_ancestor_distance_scores"][method][index]) / metrics["count"]) * 100, 2)
                 all_metrics[method] = metrics
 
             print("method  " + "\t" + "\t".join(all_metrics["vision"].keys()))
             for method in ["vision", "combined"]:
-                print(f"{method.ljust(10)}\t" + "\t".join(
+                stat_label = inferrer.config["name"] + "-" + self.cmd_args["label"] + "-" + method
+                print(f"{stat_label.ljust(10)}\t" + "\t".join(
                     str(value) for value in all_metrics[method].values()))
             print("\n")
 
@@ -133,19 +132,22 @@ def prepare_image_for_inference(self, cache_path):
         image = tf.image.decode_jpeg(image, channels=3)
         image = tf.image.convert_image_dtype(image, tf.float32)
         image = tf.image.central_crop(image, 0.875)
-        image = tf.image.resize(image, [299, 299])
+        image = tf.image.resize(image, [299, 299], tf.image.ResizeMethod.NEAREST_NEIGHBOR)
         return tf.expand_dims(image, 0)
 
     def assess_top_results(self, observation, top_results):
         match_index = None
         distance_scores = []
-        for index, key in enumerate(top_results):
-            if int(key) == int(observation.taxon_id):
+        for index, row in top_results.reset_index(drop=True).iterrows():
+            if row["taxon_id"] == int(observation.taxon_id):
                 match_index = index
-                # the taxa match, so the taxon distance score is 1
-                distance_scores.append(1)
-                break
-            elif index < 10:
+
+            if index < 10:
+                if row["taxon_id"] == int(observation.taxon_id):
+                    # the taxa match, so the taxon distance score is 1
+                    distance_scores.append(1)
+                    break
+
                 # if this is a top 10 result but not a match, append to taxon_scores
                 # some measure of how far away this taxon is from the expected correct taxon using
                 # (1 - [index of match in reversed target ancestry]/[lenth of target ancestry])
@@ -155,7 +157,7 @@ def assess_top_results(self, observation, top_results):
                 # So the taxon score will be (1 - (3/8))^2, or (.625)^2, or 0.3090625
                 # NOTE: This is experimental and needs testing
                 try:
-                    taxon_match_index = observation.taxon_ancestry[::-1].index(key)
+                    taxon_match_index = observation.taxon_ancestry[::-1].index(row["taxon_id"])
                 except ValueError:
                     taxon_match_index = None
                 if taxon_match_index:
@@ -170,68 +172,105 @@ def test_observation(self, observation):
         cache_path = self.download_photo(observation.photo_url)
         if cache_path is None or not os.path.exists(cache_path):
             return False
-        try:
-            image = self.prepare_image_for_inference(cache_path)
-        except Exception as e:
-            print(f'\nObservation: {observation.observation_id} has a bad image')
+        if observation.lat == '' or observation.lng == '':
             return False
 
-        if observation.iconic_taxon_id == "" or self.cmd_args["filter_iconic"] is False:
-            iconic_taxon_id = None
-        else:
+        iconic_taxon_id = None
+        if observation.iconic_taxon_id != "" and self.cmd_args["filter_iconic"] is not False:
             iconic_taxon_id = int(observation.iconic_taxon_id)
-        # calculate vision scores
+
+
         inferrer_scores = {}
         for index, inferrer in self.inferrers.items():
-            inferrer_scores[index] = {}
-            inferrer_scores[index]["vision"] = inferrer.vision_inferrer.process_image(
-                image, iconic_taxon_id, None)
-
-            # calculate geo scores
-            if inferrer.geo_model and self.cmd_args["geo"]:
-                geo_scores = inferrer.geo_model.predict(
-                    observation.lat, observation.lng, iconic_taxon_id)
-                inferrer_scores[index]["combined"] = ModelScoring.combine_vision_and_geo_scores(
-                    inferrer_scores[index]["vision"], geo_scores)
-            else:
-                inferrer_scores[index]["combined"] = {}
+            lat = None
+            lng = None
+            filter_taxon = inferrer.lookup_taxon(iconic_taxon_id)
+            if inferrer.geo_elevation_model and self.cmd_args["geo"]:
+                lat = observation.lat
+                lng = observation.lng
+            try:
+                inferrer_scores[index] = inferrer.predictions_for_image(
+                    cache_path, lat, lng, filter_taxon
+                )
+            except Exception as e:
+                print(e)
+                print(f'\nError scoring observation {observation.observation_id}')
+                return False
         return inferrer_scores
 
+    def ancestor_distance_scores(self, observation, inferrer, results):
+        reversed_target_ancestors = observation.taxon_ancestry[::-1]
+        ancestor_distance_scores = []
+        # for each top result
+        for index, row in results.iterrows():
+            result_ancestors = inferrer.taxonomy.df.query(
+                f'left <= {row["left"]} and right >= {row["right"]}'
+            ).sort_values("left", ascending=False).reset_index(drop=True)
+            result_ancestor_match_index = None
+            # find the most specific taxon in the result's taxon's ancestry that is also in
+            # the target taxon's ancestry
+            for ancestor_index, ancestor_row in result_ancestors.iterrows():
+                if ancestor_row["taxon_id"] in reversed_target_ancestors:
+                    result_ancestor_match_index = ancestor_index
+                    break
+            if result_ancestor_match_index is None:
+                result_ancestor_match_index = len(reversed_target_ancestors)
+            # calculate a score of how far from species the result matched the target
+            ancestor_distance_scores.append((1 - (result_ancestor_match_index / len(reversed_target_ancestors)))**2)
+        return ancestor_distance_scores
+
     def append_to_aggregate_results(self, observation, inferrer_scores):
         vision_indices = set()
+        combined_indices = set()
         for index, results in inferrer_scores.items():
             # only look at the top 100 results for this testing
-            top100_vision = self.top_x_results(results["vision"], 100)
-            top100_combined = self.top_x_results(results["combined"], 100)
+
+            top100_vision = results.sort_values("vision_score", ascending=False).head(100)
+            top100_combined = results.sort_values("combined_score", ascending=False).head(100)
+
             vision_index, vision_taxon_distance_scores = self.assess_top_results(
                 observation, top100_vision)
             combined_index, combined_taxon_distance_scores = self.assess_top_results(
                 observation, top100_combined)
+
+            vision_ancestor_distance_scores = self.ancestor_distance_scores(
+                observation, self.inferrers[index], top100_vision.head(10))
+            combined_ancestor_distance_scores = self.ancestor_distance_scores(
+                observation, self.inferrers[index], top100_combined.head(10))
+
+            self.scores["sum_ancestor_distance_scores"]["vision"][index].append(
+                sum(vision_ancestor_distance_scores))
+            self.scores["average_ancestor_distance_scores"]["vision"][index].append(
+                mean(vision_ancestor_distance_scores))
+            self.scores["sum_ancestor_distance_scores"]["combined"][index].append(
+                sum(combined_ancestor_distance_scores))
+            self.scores["average_ancestor_distance_scores"]["combined"][index].append(
+                mean(combined_ancestor_distance_scores))
+
             vision_indices.add(vision_index)
+            combined_indices.add(combined_index)
             self.scores["matching_indices"]["vision"][index].append(vision_index)
             self.scores["matching_indices"]["combined"][index].append(combined_index)
             # top1 distance score is just the taxon_distance_score if the first result
             self.scores["top1_distance_scores"]["vision"][index].append(
                 vision_taxon_distance_scores[0])
-            self.scores["top1_distance_scores"]["combined"][index].append(
-                combined_taxon_distance_scores[0])
+            if len(combined_taxon_distance_scores) > 0:
+                self.scores["top1_distance_scores"]["combined"][index].append(
+                    combined_taxon_distance_scores[0])
             # for taxon_distance, top n is the max score of the top n results, or the
             # taxon_distance_score of the most closely related taxon in the first n results
             self.scores["top5_distance_scores"]["vision"][index].append(
                 max(vision_taxon_distance_scores[0:5]))
-            self.scores["top5_distance_scores"]["combined"][index].append(
-                max(combined_taxon_distance_scores[0:5]))
-            self.scores["top10_distance_scores"]["vision"][index].append(
-                max(vision_taxon_distance_scores[0:10]))
-            self.scores["top10_distance_scores"]["combined"][index].append(
-                max(combined_taxon_distance_scores[0:10]))
-        # if len(vision_indices) > 1:
-        #     print(vision_indices)
-        #     print(f'\nResults of Observation: {observation.observation_id}')
-        #     for index, results in inferrer_results.items():
-        #         print(self.inferrers[index].config["name"])
-        #         results.print()
-        #     print("===================================================\n")
+            if len(combined_taxon_distance_scores) > 0:
+                self.scores["top5_distance_scores"]["combined"][index].append(
+                    max(combined_taxon_distance_scores[0:5]))
+                self.scores["top10_distance_scores"]["vision"][index].append(
+                    max(vision_taxon_distance_scores[0:10]))
+                self.scores["top10_distance_scores"]["combined"][index].append(
+                    max(combined_taxon_distance_scores[0:10]))
+
+        # if len(combined_indices) > 1:
+        #     print(f'Results of Observation: {observation.observation_id}: {combined_indices}')
 
     def download_photo(self, photo_url):
         checksum = hashlib.md5(photo_url.encode()).hexdigest()
diff --git a/lib/web_forms.py b/lib/web_forms.py
index 5bd09bf..153ba02 100644
--- a/lib/web_forms.py
+++ b/lib/web_forms.py
@@ -10,5 +10,8 @@ class ImageForm(FlaskForm):
     ])
     lat = StringField("lat")
     lng = StringField("lng")
+    taxon_id = StringField("taxon_id")
     geomodel = StringField("geomodel")
+    aggregated = StringField("aggregated")
+    score_without_geo = StringField("score_without_geo")
     format = StringField("format")
diff --git a/requirements.txt b/requirements.txt
index 265debd..609548f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,4 +18,6 @@ itsdangerous
 werkzeug
 WTForms
 flake8
-tensorflow
+h3
+h3pandas
+prison
diff --git a/templates/home.html b/templates/home.html
deleted file mode 100644
index 0d9f682..0000000
--- a/templates/home.html
+++ /dev/null
@@ -1,13 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-        <meta charset="UTF-8">
-        <title>Classify Image</title>
-</head>
-<body>
-        <form action="/" method="POST" enctype="multipart/form-data">
-        <input type="file" name="image" accept="image/*">
-        <button type="submit">Submit</button>
-        </form>
-</body>
-</html>
diff --git a/test_model.py b/test_model.py
index 31d915f..4a555dc 100644
--- a/test_model.py
+++ b/test_model.py
@@ -7,6 +7,7 @@
 
 @click.command()
 @click.option("--path", required=True, type=click.Path(), help="Path to test data CSV.")
+@click.option("--label", required=True, type=str, help="Label used for output.")
 @click.option("--limit", type=int, show_default=True, default=100,
               help="Max number of observations to test.")
 @click.option("--geo/--no-geo", show_default=True, default=True,