Add ensemble.prob_rank_average

mxbi · May 6, 2021 · 3a25193 · 3a25193
1 parent 74bc457
commit 3a25193
Showing 1 changed file with 33 additions and 0 deletions.
diff --git a/mlcrate/ensemble.py b/mlcrate/ensemble.py
@@ -40,3 +40,36 @@ def rank_average(*args, **opts):
     result = np.array(elems)[np.argsort(scores)]
 
     return result
+
+def prob_rank_average(*arrs, **opts):
+    """Return a rank average across input arrays of probabilities (one array per input model)
+    These are ensembled using a weighted ranking average (defaults to equal weights to each model)
+    The output array of probabilities is linear in the range [0, 1], since we only care about how each
+    model ranks the provided inputs.
+    
+    For example:
+    
+    prob_rank_average([0.1, 0.2, 0.5, 0.8, 0], [0.2, 0.1, 0.6, 1.5, 1000000], weights=[0.4, 0.6])
+    => array([0.  , 0.25, 1.  , 0.5 , 0.75], dtype=float32)
+    
+    Keyword Arguments:
+    weights (optional): Array of weights for a weighted rank average of the inputs
+    """
+
+    weights = opts.get('weights', None)
+
+    arrs = [np.array(a) for a in arrs]
+
+    len0 = len(arrs[0])
+    for arr in arrs:
+        assert arr.ndim == 1, "All input arrays must be 1-D"
+        assert len(arr == len0), "All input arrays must have the same length"
+
+    ranked_arrs = []
+    for arr in arrs:
+        ranked_arrs.append(np.argsort(arr))
+
+    reranked = rank_average(*ranked_arrs, weights=weights).astype(np.float32)
+    reranked /= len(reranked) - 1
+
+    return reranked