From e88bd2ca46c420d321c59992a467ece1c2b117dd Mon Sep 17 00:00:00 2001
From: pl0xz0rz <majo.ivanov@gmail.com>
Date: Wed, 5 Apr 2023 14:02:39 +0200
Subject: [PATCH 1/8] Added matrix transpose to predictRFStat

---
 RootInteractive/MLpipeline/MIForestErrPDF.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/RootInteractive/MLpipeline/MIForestErrPDF.py b/RootInteractive/MLpipeline/MIForestErrPDF.py
index 57edcb8b..a1a37a44 100644
--- a/RootInteractive/MLpipeline/MIForestErrPDF.py
+++ b/RootInteractive/MLpipeline/MIForestErrPDF.py
@@ -50,22 +50,23 @@ def predictRFStat(rf, X, statDictionary,n_jobs):
     allRF = np.zeros((len(rf.estimators_), X.shape[0]))
     lock = threading.Lock()
     statOut={}
-    Parallel(n_jobs=n_jobs, verbose=rf.verbose,**_joblib_parallel_args(require="sharedmem"),)(
+    Parallel(n_jobs=n_jobs, verbose=rf.verbose,require="sharedmem")(
             delayed(_accumulate_prediction)(e.predict, X, allRF, col,lock)
             for col,e in enumerate(rf.estimators_)
     )
     #
-    if "median" in statDictionary: statOut["median"]=np.median(allRF, 0)
-    if "mean"  in statDictionary: statOut["mean"]=np.mean(allRF, 0)
-    if "std"  in statDictionary: statOut["std"]=np.std(allRF, 0)
+    allRFTranspose = allRF.T
+    if "median" in statDictionary: statOut["median"]=np.median(allRFTranspose, 1)
+    if "mean"  in statDictionary: statOut["mean"]=np.mean(allRFTranspose, 1)
+    if "std"  in statDictionary: statOut["std"]=np.std(allRFTranspose, 1)
     if "quantile" in   statDictionary:
-        statOut["quantiles"]={}
-        for quant in statDictionary["quantile"]:
-            statOut["quantiles"][quant]=np.quantile(allRF,quant,axis=0)
+        statOut["quantile"]={}
+        for quant in statDictionary["quantil"]:
+            statOut["quantile"][quant]=np.quantile(allRF,quant,axis=1)
     if "trim_mean" in   statDictionary:
         statOut["trim_mean"]={}
         for quant in statDictionary["trim_mean"]:
-            statOut["trim_mean"][quant]=stats.trim_mean(allRF,quant,axis=0)
+            statOut["trim_mean"][quant]=stats.trim_mean(allRF,quant,axis=1)
     return statOut
 def predictRFStatNew(rf, X, statDictionary,n_jobs):
     """
@@ -371,4 +372,4 @@ def getImportance(self):
         impTree = np.zeros((len(self.trees[0]), len(self.trees[0][0])))
         for row,tree in enumerate(self.trees[0]):
             impTree[row]=tree.feature_importances_
-        return    impTree.mean(axis=0)
\ No newline at end of file
+        return    impTree.mean(axis=0)

From e61446b7150087034a1c2c647c7cea110a6b504b Mon Sep 17 00:00:00 2001
From: pl0xz0rz <majo.ivanov@gmail.com>
Date: Wed, 5 Apr 2023 14:47:58 +0200
Subject: [PATCH 2/8] Added ascontiguous

---
 RootInteractive/MLpipeline/MIForestErrPDF.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RootInteractive/MLpipeline/MIForestErrPDF.py b/RootInteractive/MLpipeline/MIForestErrPDF.py
index a1a37a44..9a1e72eb 100644
--- a/RootInteractive/MLpipeline/MIForestErrPDF.py
+++ b/RootInteractive/MLpipeline/MIForestErrPDF.py
@@ -55,7 +55,7 @@ def predictRFStat(rf, X, statDictionary,n_jobs):
             for col,e in enumerate(rf.estimators_)
     )
     #
-    allRFTranspose = allRF.T
+    allRFTranspose = allRF.T.ascontiguousarray()
     if "median" in statDictionary: statOut["median"]=np.median(allRFTranspose, 1)
     if "mean"  in statDictionary: statOut["mean"]=np.mean(allRFTranspose, 1)
     if "std"  in statDictionary: statOut["std"]=np.std(allRFTranspose, 1)

From 2d45a2d779935a131a135561cecee1ddcb4eb0bd Mon Sep 17 00:00:00 2001
From: pl0xz0rz <majo.ivanov@gmail.com>
Date: Wed, 5 Apr 2023 14:54:12 +0200
Subject: [PATCH 3/8] bugfix

---
 RootInteractive/MLpipeline/MIForestErrPDF.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/RootInteractive/MLpipeline/MIForestErrPDF.py b/RootInteractive/MLpipeline/MIForestErrPDF.py
index 9a1e72eb..de76c236 100644
--- a/RootInteractive/MLpipeline/MIForestErrPDF.py
+++ b/RootInteractive/MLpipeline/MIForestErrPDF.py
@@ -55,13 +55,13 @@ def predictRFStat(rf, X, statDictionary,n_jobs):
             for col,e in enumerate(rf.estimators_)
     )
     #
-    allRFTranspose = allRF.T.ascontiguousarray()
+    allRFTranspose = allRF.T.copy(order='C')
     if "median" in statDictionary: statOut["median"]=np.median(allRFTranspose, 1)
     if "mean"  in statDictionary: statOut["mean"]=np.mean(allRFTranspose, 1)
     if "std"  in statDictionary: statOut["std"]=np.std(allRFTranspose, 1)
     if "quantile" in   statDictionary:
         statOut["quantile"]={}
-        for quant in statDictionary["quantil"]:
+        for quant in statDictionary["quantile"]:
             statOut["quantile"][quant]=np.quantile(allRF,quant,axis=1)
     if "trim_mean" in   statDictionary:
         statOut["trim_mean"]={}

From e7f31a3fea9b41243241b6f40ebc202e70c467c8 Mon Sep 17 00:00:00 2001
From: pl0xz0rz <majo.ivanov@gmail.com>
Date: Thu, 6 Apr 2023 09:59:29 +0200
Subject: [PATCH 4/8] Replaced median algorithm with simpler one, should only
 help when other quantiles are used by reusing sorting

---
 RootInteractive/MLpipeline/MIForestErrPDF.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/RootInteractive/MLpipeline/MIForestErrPDF.py b/RootInteractive/MLpipeline/MIForestErrPDF.py
index de76c236..1021320c 100644
--- a/RootInteractive/MLpipeline/MIForestErrPDF.py
+++ b/RootInteractive/MLpipeline/MIForestErrPDF.py
@@ -47,7 +47,8 @@ def predictRFStat(rf, X, statDictionary,n_jobs):
     :param n_jobs:              number of parallel jobs for prediction
     :return:                    dictionary with requested output statistics
     """
-    allRF = np.zeros((len(rf.estimators_), X.shape[0]))
+    nEstimators = len(rf.estimators_)
+    allRF = np.zeros((nEstimators, X.shape[0]))
     lock = threading.Lock()
     statOut={}
     Parallel(n_jobs=n_jobs, verbose=rf.verbose,require="sharedmem")(
@@ -56,9 +57,11 @@ def predictRFStat(rf, X, statDictionary,n_jobs):
     )
     #
     allRFTranspose = allRF.T.copy(order='C')
-    if "median" in statDictionary: statOut["median"]=np.median(allRFTranspose, 1)
-    if "mean"  in statDictionary: statOut["mean"]=np.mean(allRFTranspose, 1)
-    if "std"  in statDictionary: statOut["std"]=np.std(allRFTranspose, 1)
+    if "median" in statDictionary:
+        allRFTranspose = allRFTranspose.partition(nEstimators//2, -1)
+        statOut["median"]= allRFTranspose[:,nEstimators//2]
+    if "mean"  in statDictionary: statOut["mean"]=np.mean(allRFTranspose, -1)
+    if "std"  in statDictionary: statOut["std"]=np.std(allRFTranspose, -1)
     if "quantile" in   statDictionary:
         statOut["quantile"]={}
         for quant in statDictionary["quantile"]:

From f86c1f19a5d69aa05159a4bc0a33a1a4e9bcaacf Mon Sep 17 00:00:00 2001
From: pl0xz0rz <majo.ivanov@gmail.com>
Date: Thu, 6 Apr 2023 10:10:36 +0200
Subject: [PATCH 5/8] bugfix

---
 RootInteractive/MLpipeline/MIForestErrPDF.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RootInteractive/MLpipeline/MIForestErrPDF.py b/RootInteractive/MLpipeline/MIForestErrPDF.py
index 1021320c..b2bfc608 100644
--- a/RootInteractive/MLpipeline/MIForestErrPDF.py
+++ b/RootInteractive/MLpipeline/MIForestErrPDF.py
@@ -58,7 +58,7 @@ def predictRFStat(rf, X, statDictionary,n_jobs):
     #
     allRFTranspose = allRF.T.copy(order='C')
     if "median" in statDictionary:
-        allRFTranspose = allRFTranspose.partition(nEstimators//2, -1)
+        allRFTranspose.partition(nEstimators//2, -1)
         statOut["median"]= allRFTranspose[:,nEstimators//2]
     if "mean"  in statDictionary: statOut["mean"]=np.mean(allRFTranspose, -1)
     if "std"  in statDictionary: statOut["std"]=np.std(allRFTranspose, -1)

From 6a7a219f51b31696f2aedb2915e4f81b74847236 Mon Sep 17 00:00:00 2001
From: pl0xz0rz <majo.ivanov@gmail.com>
Date: Thu, 6 Apr 2023 10:51:29 +0200
Subject: [PATCH 6/8] Using joblib.Parallel on median

---
 RootInteractive/MLpipeline/MIForestErrPDF.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/RootInteractive/MLpipeline/MIForestErrPDF.py b/RootInteractive/MLpipeline/MIForestErrPDF.py
index b2bfc608..1d92c5a8 100644
--- a/RootInteractive/MLpipeline/MIForestErrPDF.py
+++ b/RootInteractive/MLpipeline/MIForestErrPDF.py
@@ -37,6 +37,9 @@ def _accumulate_predictionNL(predict, X, out,col):
     prediction = predict(X, check_input=False)
     out[col] += prediction
 
+def partitionBlock(allRF, k, begin, end):
+    allRF[begin:end].partition(k)
+
 def predictRFStat(rf, X, statDictionary,n_jobs):
     """
     inspired by https://github.com/scikit-learn/scikit-learn/blob/37ac6788c/sklearn/ensemble/_forest.py#L1410
@@ -58,7 +61,14 @@ def predictRFStat(rf, X, statDictionary,n_jobs):
     #
     allRFTranspose = allRF.T.copy(order='C')
     if "median" in statDictionary:
-        allRFTranspose.partition(nEstimators//2, -1)
+        blockSize = X.shape[0] // n_jobs + 1
+        block_begin = arange(0, X.shape[0], blockSize)
+        block_end = block_begin[1:]
+        block_end.append(X.shape[0])
+        Parallel(n_jobs=n_jobs, verbose=rf.verbose, require="sharedmem")(
+                delayed(partitionBlock)(allRFTranspose, nEstimators // 2, first, last)
+                for first, last in zip(block_begin, block_end)
+
         statOut["median"]= allRFTranspose[:,nEstimators//2]
     if "mean"  in statDictionary: statOut["mean"]=np.mean(allRFTranspose, -1)
     if "std"  in statDictionary: statOut["std"]=np.std(allRFTranspose, -1)

From 2a9fe041833a46975be162fcebcc32349cc39ca9 Mon Sep 17 00:00:00 2001
From: pl0xz0rz <majo.ivanov@gmail.com>
Date: Thu, 6 Apr 2023 10:53:09 +0200
Subject: [PATCH 7/8] Fixed mising parenthesis

---
 RootInteractive/MLpipeline/MIForestErrPDF.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RootInteractive/MLpipeline/MIForestErrPDF.py b/RootInteractive/MLpipeline/MIForestErrPDF.py
index 1d92c5a8..71047373 100644
--- a/RootInteractive/MLpipeline/MIForestErrPDF.py
+++ b/RootInteractive/MLpipeline/MIForestErrPDF.py
@@ -68,7 +68,7 @@ def predictRFStat(rf, X, statDictionary,n_jobs):
         Parallel(n_jobs=n_jobs, verbose=rf.verbose, require="sharedmem")(
                 delayed(partitionBlock)(allRFTranspose, nEstimators // 2, first, last)
                 for first, last in zip(block_begin, block_end)
-
+                )
         statOut["median"]= allRFTranspose[:,nEstimators//2]
     if "mean"  in statDictionary: statOut["mean"]=np.mean(allRFTranspose, -1)
     if "std"  in statDictionary: statOut["std"]=np.std(allRFTranspose, -1)

From 51230967d64139d36462c191786cf3e762eef798 Mon Sep 17 00:00:00 2001
From: pl0xz0rz <majo.ivanov@gmail.com>
Date: Thu, 6 Apr 2023 11:02:10 +0200
Subject: [PATCH 8/8] fixed typo in range

---
 RootInteractive/MLpipeline/MIForestErrPDF.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RootInteractive/MLpipeline/MIForestErrPDF.py b/RootInteractive/MLpipeline/MIForestErrPDF.py
index 71047373..615a586e 100644
--- a/RootInteractive/MLpipeline/MIForestErrPDF.py
+++ b/RootInteractive/MLpipeline/MIForestErrPDF.py
@@ -62,7 +62,7 @@ def predictRFStat(rf, X, statDictionary,n_jobs):
     allRFTranspose = allRF.T.copy(order='C')
     if "median" in statDictionary:
         blockSize = X.shape[0] // n_jobs + 1
-        block_begin = arange(0, X.shape[0], blockSize)
+        block_begin = list(range(0, X.shape[0], blockSize))
         block_end = block_begin[1:]
         block_end.append(X.shape[0])
         Parallel(n_jobs=n_jobs, verbose=rf.verbose, require="sharedmem")(