bootstrap resampling for train

mieskolainen · Nov 18, 2024 · 9497861 · 9497861
1 parent ddb44a6
commit 9497861
Show file tree

Hide file tree

Showing 12 changed files with 246 additions and 156 deletions.
diff --git a/.github/workflows/icenet-install-test.yml b/.github/workflows/icenet-install-test.yml
@@ -101,6 +101,21 @@ jobs:
         run: |
           source setenv-github-actions.sh && python icefit/peakfit.py --analyze --group --test_mode --fit_type dual-unitary-II --output_name dual-unitary-II
       
+      #
+      - name: Deep Learning system (runme_eid)
+        run: |
+          source setenv-github-actions.sh && maxevents=10000; source tests/runme_eid.sh
+      
+      #
+      - name: Deep Learning system (runme_eid_deep)
+        run: |
+          source setenv-github-actions.sh && maxevents=10000; source tests/runme_eid_deep.sh
+      
+      #
+      - name: Deep Learning system (runme_eid_visual)
+        run: |
+          source setenv-github-actions.sh && maxevents=10000; source tests/runme_eid_visual.sh
+      
       # (This is run twice to test cache files)
       - name: Deep Learning system (runme_brem)
         run: |
@@ -127,7 +142,7 @@ jobs:
           source tests/runme_zee_gridtune.sh
           
           echo "yes" | source superclean.sh
-
+      
       #
       - name: Deep Learning system (runme_zee)
         run: |
@@ -151,21 +166,6 @@ jobs:
         run: |
           source setenv-github-actions.sh && maxevents=10000; source tests/runme_trg.sh
           echo "yes" | source superclean.sh
-
-      #
-      - name: Deep Learning system (runme_eid)
-        run: |
-          source setenv-github-actions.sh && maxevents=10000; source tests/runme_eid.sh
-      
-      #
-      - name: Deep Learning system (runme_eid_deep)
-        run: |
-          source setenv-github-actions.sh && maxevents=10000; source tests/runme_eid_deep.sh
-      
-      #
-      - name: Deep Learning system (runme_eid_visual)
-        run: |
-          source setenv-github-actions.sh && maxevents=10000; source tests/runme_eid_visual.sh
         
         ## source setenv-github-actions.sh && maxevents=10000; source tests/runme_brk.sh
         ## source setenv-github-actions.sh && maxevents=10000; source tests/runme_dqcd_vector_train.sh

diff --git a/configs/zee/models.yml b/configs/zee/models.yml
@@ -193,6 +193,8 @@ iceboost_swd:
   <<: *ICEBOOST0
 
   label: 'ICEBOOST-SWD'
+
+  bootstrap: 3
 
   # BCE loss domains [use with custom:binary_cross_entropy]
   BCE_param:

diff --git a/icedqcd/common.py b/icedqcd/common.py
@@ -353,10 +353,12 @@ def splitfactor(x, y, w, ids, args, skip_graph=True, use_dequantize=True):
 
             data_graph += sum(ray.get(graph_futures), []) # Join split array results
             ray.shutdown()
-
+        
         print(f'ray_results: {time.time() - start_time:0.1f} sec')
         io.showmem()
 
+        data_graph = np.array(data_graph, dtype=object) # !
+
     # -------------------------------------------------------------------------
     ## Tensor representation
     data_tensor = None
@@ -392,5 +394,9 @@ def splitfactor(x, y, w, ids, args, skip_graph=True, use_dequantize=True):
     """
     # --------------------------------------------------------------------------
 
-
-    return {'data': data, 'data_MI': data_MI, 'data_kin': data_kin, 'data_deps': data_deps, 'data_tensor': data_tensor, 'data_graph': data_graph}
+    return {'data':        data,
+            'data_MI':     data_MI,
+            'data_kin':    data_kin,
+            'data_deps':   data_deps,
+            'data_tensor': data_tensor,
+            'data_graph':  data_graph}
diff --git a/icefit/icepeak.py b/icefit/icepeak.py
@@ -366,7 +366,7 @@ def TH1_to_numpy(hist, dtype=np.float64):
 
     #for n, v in hist.__dict__.items(): # class generated on the fly
     #   print(f'{n} {v}')
-    
+
     hh         = hist.to_numpy()
     counts     = np.array(hist.values(), dtype=dtype)
     errors     = np.array(hist.errors(), dtype=dtype)
@@ -1430,7 +1430,8 @@ def integral_wrapper(lambdafunc, x, edges, norm=False, N_int: int=128, EPS=1E-8,
     if norm:
         # Normalization based on a numerical integral over edge bounds
         x_fine = np.linspace(edges[0], edges[-1], N_int)
-        I = max(np.trapz(y=lambdafunc(x_fine), x=x_fine), EPS)
+        y_fine = lambdafunc(x_fine)
+        I = max(np.trapz(x=x_fine, y=y_fine), EPS)
 
         return f / I * edges2binwidth(edges)
     else:

diff --git a/icehgcal/common.py b/icehgcal/common.py
@@ -242,23 +242,31 @@ def splitfactor(x, y, w, ids, args):
     ### DeepSets representation
     data_deps   = None
 
+    # -------------------------------------------------------------------------
+    ### Mutual Information
+    data_MI     = None
+
     # -------------------------------------------------------------------------
     ### Tensor representation
     data_tensor = None
 
     # -------------------------------------------------------------------------
     ## Graph representation
-    data_graph = None
+    data_graph  = None
 
-    data_graph = graphio.parse_graph_data_candidate(X=data.x, Y=data.y, weights=data.w, ids=data.ids,
+    data_graph  = graphio.parse_graph_data_candidate(X=data.x, Y=data.y, weights=data.w, ids=data.ids,
         features=scalar_vars, graph_param=args['graph_param'])
 
+    data_graph = np.array(data_graph, dtype=object) # !
+
     # --------------------------------------------------------------------
     ### Finally pick active scalar variables out
-
+    
     data.x = None # To protect other routines (TBD see global impact --> comment this line)
 
-    return {'data': data, 'data_kin': data_kin, 'data_deps': data_deps, 'data_tensor': data_tensor, 'data_graph': data_graph}
-
-# ========================================================================
-# ========================================================================
+    return {'data':        data,
+            'data_MI':     data_MI,
+            'data_kin':    data_kin,
+            'data_deps':   data_deps,
+            'data_tensor': data_tensor,
+            'data_graph':  data_graph}
diff --git a/icehnl/common.py b/icehnl/common.py
@@ -146,4 +146,9 @@ def splitfactor(x, y, w, ids, args):
     data   = data[vars]
     data.x = data.x.astype(np.float32)
 
-    return {'data': data, 'data_MI': data_MI, 'data_kin': data_kin, 'data_deps': data_deps, 'data_tensor': data_tensor, 'data_graph': data_graph}
+    return {'data':        data,
+            'data_MI':     data_MI,
+            'data_kin':    data_kin,
+            'data_deps':   data_deps,
+            'data_tensor': data_tensor,
+            'data_graph':  data_graph}
diff --git a/iceid/common.py b/iceid/common.py
@@ -150,6 +150,10 @@ def splitfactor(x, y, w, ids, args):
         data_kin   = data[vars]
         data_kin.x = data_kin.x.astype(np.float32)
 
+    # -------------------------------------------------------------------------
+    ### MI variables
+    data_MI = None
+
     # -------------------------------------------------------------------------
     ### DeepSets representation
     data_deps = None
@@ -206,11 +210,18 @@ def splitfactor(x, y, w, ids, args):
         print(f'ray_results: {time.time() - start_time:0.1f} sec')
         io.showmem()
 
+    data_graph = np.array(data_graph, dtype=object) # !
+
     # --------------------------------------------------------------------
     ### Finally pick active scalar variables out
 
     vars   = aux.process_regexp_ids(all_ids=data.ids, ids=scalar_vars)
     data   = data[vars]
     data.x = data.x.astype(np.float32)
 
-    return {'data': data, 'data_kin': data_kin, 'data_deps': data_deps, 'data_tensor': data_tensor, 'data_graph': data_graph}
+    return {'data':        data,
+            'data_MI':     data_MI,
+            'data_kin':    data_kin,
+            'data_deps':   data_deps,
+            'data_tensor': data_tensor,
+            'data_graph':  data_graph}
diff --git a/icenet/__init__.py b/icenet/__init__.py
@@ -3,9 +3,9 @@
 import os
 import psutil
 
-__version__    = '0.1.3.6'
+__version__    = '0.1.3.7'
 __release__    = 'alpha'
-__date__       = '04/11/2024'
+__date__       = '18/11/2024'
 __author__     = '[email protected]'
 __repository__ = 'github.com/mieskolainen/icenet'
 __asciiart__   = \

diff --git a/icenet/tools/io.py b/icenet/tools/io.py
@@ -391,7 +391,11 @@ def __getitem__(self, key):
             return IceXYW(x=self.x[..., col], y=self.y, w=self.w, ids=ids)
         else:
             return IceXYW(x=self.x[col], y=self.y, w=self.w, ids=ids)
-
+
+    # length operator
+    def __len__(self):
+        return len(self.x)
+
     # + operator
     def __add__(self, other):