diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
new file mode 100644
index 00000000..e44da58a
--- /dev/null
+++ b/.github/workflows/build_wheels.yml
@@ -0,0 +1,189 @@
+name: Build wheels
+run-name: Build wheels - ${{ github.sha }}
+on:
+  push:
+    branches:
+      - 'master'
+      - 'release/**'
+jobs:
+  build_windows_wheels:
+    strategy:
+      matrix:
+        py: [cp38, cp39, cp310, cp311, cp312]
+        arch:
+          - [AMD64, win_amd64, x64, x64, 64bit]
+          - [x86, win32, x86, Win32, 32bit]
+    name: ${{ matrix.py }}-${{ matrix.arch[1] }}
+    runs-on: windows-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Compile pthread-win32
+        run: |
+          Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll'
+          Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -DevCmdArguments '-arch=x64' -StartInPath 'C:\'
+          git clone https://github.com/GerHobbelt/pthread-win32.git
+          cd C:\pthread-win32\windows\VS2022
+          msbuild .\pthread.2022.sln -t:pthread_static_lib -p:Configuration=Release,Platform=${{ matrix.arch[3] }}
+          cd C:\
+          mkdir C:\pthread-win32_static_lib
+          mkdir C:\pthread-win32_static_lib\include
+          mkdir C:\pthread-win32_static_lib\lib
+          cp C:\pthread-win32\windows\VS2022\bin\Release-Unicode-${{ matrix.arch[4] }}-${{ matrix.arch[2] }}\pthread_static_lib.lib C:\pthread-win32_static_lib\lib\pthread.lib
+          cp C:\pthread-win32\_ptw32.h C:\pthread-win32_static_lib\include
+          cp C:\pthread-win32\pthread.h C:\pthread-win32_static_lib\include
+          cp C:\pthread-win32\sched.h C:\pthread-win32_static_lib\include
+          cp C:\pthread-win32\semaphore.h C:\pthread-win32_static_lib\include
+
+      - name: Build wheel ${{ matrix.py }}-${{ matrix.arch[1] }}
+        uses: pypa/cibuildwheel@v2.19.1
+        env:
+          PTHREAD_WIN_INCLUDE: C:\pthread-win32_static_lib\include
+          PTHREAD_WIN_LIB: C:\pthread-win32_static_lib\lib
+          CIBW_PLATFORM: windows
+          CIBW_BUILD: ${{ matrix.py }}-${{ matrix.arch[1] }}
+          CIBW_ARCHS_WINDOWS: ${{ matrix.arch[0] }}
+      
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels_${{ matrix.py }}_${{ matrix.arch[1] }}
+          path: ./wheelhouse/*.whl
+          if-no-files-found: error
+
+  build_macos_wheels:
+    strategy:
+      matrix:
+        config:
+          [
+            {
+              py: cp38,
+              arch: [x86_64, macosx_x86_64, 12.0, macos-12]
+            },
+            {
+              py: cp39,
+              arch: [x86_64, macosx_x86_64, 12.0, macos-12]
+            },
+            {
+              py: cp310,
+              arch: [x86_64, macosx_x86_64, 12.0, macos-12]
+            },
+            {
+              py: cp311,
+              arch: [x86_64, macosx_x86_64, 12.0, macos-12]
+            },
+            {
+              py: cp312,
+              arch: [x86_64, macosx_x86_64, 12.0, macos-12]
+            },
+            {
+              py: cp38,
+              arch: [arm64, macosx_arm64, 12.0, macos-14]
+            },
+            {
+              py: cp39,
+              arch: [arm64, macosx_arm64, 12.0, macos-14]
+            },
+            {
+              py: cp310,
+              arch: [arm64, macosx_arm64, 12.0, macos-14]
+            },
+            {
+              py: cp311,
+              arch: [arm64, macosx_arm64, 12.0, macos-14]
+            },
+            {
+              py: cp312,
+              arch: [arm64, macosx_arm64, 12.0, macos-14]
+            }
+          ]
+    name: ${{ matrix.config.py }}-${{ matrix.config.arch[1] }}
+    runs-on: ${{ matrix.config.arch[3] }}
+    if: 
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install pipx # NOTE: required only for arm64
+        if: startsWith(matrix.config.arch[0], 'arm64')
+        run: |
+          brew install pipx
+
+      - name: Build wheel ${{ matrix.config.py }}-${{ matrix.config.arch[1] }}
+        uses: pypa/cibuildwheel@v2.19.1
+        env:
+          MACOSX_DEPLOYMENT_TARGET: ${{ matrix.config.arch[2] }}
+          CIBW_PLATFORM: macos
+          CIBW_BUILD: ${{ matrix.config.py }}-${{ matrix.config.arch[1] }}
+          CIBW_ARCHS_MACOS: ${{ matrix.config.arch[0] }}
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels_${{ matrix.config.py }}_${{ matrix.config.arch[1] }}
+          path: ./wheelhouse/*.whl
+          if-no-files-found: error
+
+  build_linux_wheels:
+    strategy:
+      matrix:
+        py: [cp38, cp39, cp310, cp311, cp312]
+        arch:
+          - [x86_64, manylinux_x86_64, amd64]
+          - [aarch64, manylinux_aarch64, arm64]
+    name: ${{ matrix.py }}-${{ matrix.arch[1] }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3.0.0
+        with:
+          platforms: ${{ matrix.arch[2] }}
+
+      - name: Build wheel ${{ matrix.py }}-${{ matrix.arch[1] }}
+        uses: pypa/cibuildwheel@v2.19.1
+        env:
+          CIBW_PLATFORM: linux
+          CIBW_BUILD: ${{ matrix.py }}-${{ matrix.arch[1] }}
+          CIBW_ARCHS_LINUX: ${{ matrix.arch[0] }}
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels_${{ matrix.py }}_${{ matrix.arch[1] }}
+          path: ./wheelhouse/*.whl
+          if-no-files-found: error
+
+  build_source_distribution:
+    name: sdist
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Build source distribution
+        run: |
+          pip install -U pip
+          pip install -U build
+          python -m build --sdist
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: sdist
+          path: ./dist/*.tar.gz
+          if-no-files-found: error
+
+  run_id:
+    name: Create/Update WHEELS_ARTIFACTS_RUN_ID secret
+    runs-on: ubuntu-latest
+    needs: [build_windows_wheels, build_macos_wheels, build_linux_wheels, build_source_distribution]
+    steps:
+      - uses: actions/checkout@v4
+      - run: |
+          gh secret set WHEELS_ARTIFACTS_RUN_ID --body ${{ github.run_id }}
+        env:
+          GH_TOKEN: ${{ secrets.GH_PAT }}
diff --git a/.github/workflows/publish_on_pypi.yml b/.github/workflows/publish_on_pypi.yml
new file mode 100644
index 00000000..bb020956
--- /dev/null
+++ b/.github/workflows/publish_on_pypi.yml
@@ -0,0 +1,56 @@
+name: Publish on PyPI
+run-name: Publish on PyPI - ${{ github.sha }}
+on:
+  release:
+    types: [published]
+jobs:
+  publish_on_pypi:
+    name: Publish on PyPI
+    if: github.event.release.prerelease == false
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/project/dmri-commit
+    permissions:
+      id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing
+    steps:
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          github-token: ${{ secrets.GH_PAT }}
+          run-id: ${{ secrets.WHEELS_ARTIFACTS_RUN_ID }}
+          path: dist
+          merge-multiple: true
+
+      - name: Publish on PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          skip-existing: true
+          verbose: true
+          print-hash: true
+
+  publish_on_pypi_test:
+    name: Publish on PyPI Test
+    if: github.event.release.prerelease == true && contains(github.event.release.tag_name, 'rc')
+    runs-on: ubuntu-latest
+    environment:
+      name: testpypi
+      url: https://test.pypi.org/project/dmri-commit
+    permissions:
+      id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing
+    steps:
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          github-token: ${{ secrets.GH_PAT }}
+          run-id: ${{ secrets.WHEELS_ARTIFACTS_RUN_ID }}
+          path: dist
+          merge-multiple: true
+
+      - name: Publish on PyPI Test
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          repository-url: https://test.pypi.org/legacy/
+          skip-existing: true
+          verbose: true
+          print-hash: true
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4032f88a..0a8fb89f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,20 @@
 # Change Log
 ### All notable changes to `COMMIT` will be documented in this file.
 
+## `v2.3.0`<br>_2024-07-04_
+### ✨Added
+- Added support for Windows (requires the `pthread-win32` library) 
+- Precompiled wheels for Windows, MacOS, and Linux are now available on PyPI
+
+### 🛠️Changed
+- `operator.pyx` no more compiled at runtime
+- Restict the `numpy` version to `<2.0.0`
+
+### 🐛Fixed
+- Improved output when running from Jupyter notebooks
+
+---
+---
 
 ## `v2.2.0`<br>_2024-04-12_
 ### 🐛Fixed
diff --git a/MANIFEST.in b/MANIFEST.in
index 46c7147b..e7ff52f8 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,3 +2,4 @@ recursive-include commit *.c
 recursive-include commit *.cpp
 recursive-include commit *.h
 recursive-include commit *pyx
+include setup_operator.py
diff --git a/commit/core.pyx b/commit/core.pyx
old mode 100755
new mode 100644
index 3454d46c..27656767
--- a/commit/core.pyx
+++ b/commit/core.pyx
@@ -27,6 +27,7 @@ from dicelib.utils import format_time
 
 import commit.models
 import commit.solvers
+from commit.operator import operator
 
 
 logger = setup_logger('core')
@@ -78,6 +79,7 @@ cdef class Evaluation :
     cdef public A
     cdef public regularisation_params
     cdef public x
+    cdef public x_nnls
     cdef public CONFIG
     cdef public temp_data
     cdef public confidence_map_img
@@ -103,6 +105,7 @@ cdef class Evaluation :
         self.regularisation_params  = None # set by "set_regularisation" method
         self.x                      = None # set by "fit" method
         self.confidence_map_img     = None # set by "fit" method
+        self.x_nnls                 = None # set by "fit" method (coefficients of IC compartment estimated without regularization)
         self.verbose                = 3
 
         # store all the parameters of an evaluation with COMMIT
@@ -223,8 +226,9 @@ cdef class Evaluation :
 
             if self.get_config('doNormalizeSignal') :
                 if self.scheme.b0_count > 0:
-                    logger.subinfo('Normalizing to b0:', with_progress=True, indent_char='*', indent_lvl=1)
-                    with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+                    log_list = []
+                    ret_subinfo = logger.subinfo('Normalizing to b0:', with_progress=True, indent_char='*', indent_lvl=1)
+                    with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
                         b0 = np.mean( self.niiDWI_img[:,:,:,self.scheme.b0_idx], axis=3 )
                         idx = b0 <= b0_min_signal * b0[b0>0].mean()
                         b0[ idx ] = 1
@@ -352,8 +356,9 @@ cdef class Evaluation :
         tic = time.time()
         logger.subinfo('')
         logger.info( 'Loading the kernels' )
-        logger.subinfo( 'Resampling LUT for subject "%s":' % self.get_config('subject'), indent_char='*', indent_lvl=1, with_progress=True ) # TODO: check why not printed
-        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+        log_list = []
+        ret_subinfo = logger.subinfo( 'Resampling LUT for subject "%s":' % self.get_config('subject'), indent_char='*', indent_lvl=1, with_progress=True ) # TODO: check why not printed
+        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
             # auxiliary data structures
             idx_OUT, Ylm_OUT = amico.lut.aux_structures_resample( self.scheme, self.get_config('lmax') )
 
@@ -375,8 +380,9 @@ cdef class Evaluation :
 
         # De-mean kernels
         if self.get_config('doDemean') :
-            logger.subinfo('Demeaning signal', with_progress=True, indent_lvl=2, indent_char='-' )
-            with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+            log_list = []
+            ret_subinfo = logger.subinfo('Demeaning signal', with_progress=True, indent_lvl=2, indent_char='-' )
+            with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
                 for j in xrange(self.get_config('ndirs')) :
                     for i in xrange(nIC) :
                         self.KERNELS['wmr'][i,j,:] -= self.KERNELS['wmr'][i,j,:].mean()
@@ -387,8 +393,9 @@ cdef class Evaluation :
 
         # Normalize atoms
         if self.get_config('doNormalizeKernels') :
-            logger.subinfo('Normalizing kernels', with_progress=True, indent_lvl=2, indent_char='-' )
-            with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+            log_list = []
+            ret_subinfo = logger.subinfo('Normalizing kernels', with_progress=True, indent_lvl=2, indent_char='-' )
+            with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
                 self.KERNELS['wmr_norm'] = np.zeros( nIC )
                 for i in xrange(nIC) :
                     self.KERNELS['wmr_norm'][i] = np.linalg.norm( self.KERNELS['wmr'][i,0,:] )
@@ -458,8 +465,9 @@ cdef class Evaluation :
 
         # segments from the tracts
         # ------------------------
-        logger.subinfo('Segments from the tracts:', indent_char='*', indent_lvl=1, with_progress=True )
-        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+        log_list = []
+        ret_subinfo = logger.subinfo('Segments from the tracts:', indent_char='*', indent_lvl=1, with_progress=True )
+        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
             self.DICTIONARY['TRK'] = {}
             self.DICTIONARY['TRK']['kept']   = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_TRK_kept.dict'), dtype=np.uint8 )
             self.DICTIONARY['TRK']['norm']   = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_TRK_norm.dict'), dtype=np.float32 )
@@ -498,8 +506,9 @@ cdef class Evaluation :
 
         # segments from the peaks
         # -----------------------
-        logger.subinfo('Segments from the peaks:', indent_char='*', indent_lvl=1, with_progress=True )
-        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+        log_list = []
+        ret_subinfo = logger.subinfo('Segments from the peaks:', indent_char='*', indent_lvl=1, with_progress=True )
+        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
             self.DICTIONARY['EC'] = {}
             self.DICTIONARY['EC']['v']  = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_v.dict'), dtype=np.uint32 )
             self.DICTIONARY['EC']['o']  = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_o.dict'), dtype=np.uint16 )
@@ -515,8 +524,9 @@ cdef class Evaluation :
 
         # isotropic compartments
         # ----------------------
-        logger.subinfo('Isotropic contributions:', indent_char='*', indent_lvl=1, with_progress=True )
-        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+        log_list = []
+        ret_subinfo = logger.subinfo('Isotropic contributions:', indent_char='*', indent_lvl=1, with_progress=True )
+        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
             self.DICTIONARY['ISO'] = {}
 
             self.DICTIONARY['ISO']['v'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_ISO_v.dict'), dtype=np.uint32 )
@@ -533,8 +543,9 @@ cdef class Evaluation :
         
         # post-processing
         # ---------------
-        logger.subinfo('Post-processing', indent_char='*', indent_lvl=1, with_progress=True )
-        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+        log_list = []
+        ret_subinfo = logger.subinfo('Post-processing', indent_char='*', indent_lvl=1, with_progress=True )
+        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
             # get the indices to extract the VOI as in MATLAB (in place of DICTIONARY.MASKidx)
             idx = self.DICTIONARY['MASK'].ravel(order='F').nonzero()[0]
             self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] = np.unravel_index( idx, self.DICTIONARY['MASK'].shape, order='F' )
@@ -557,7 +568,7 @@ cdef class Evaluation :
         ----------
         n : integer
             Number of threads to use (default : number of CPUs in the system)
-        """
+        """  
         if n is None :
             # Use the same number of threads used in trk2dictionary
             n = self.DICTIONARY['n_threads']
@@ -574,7 +585,7 @@ cdef class Evaluation :
         self.THREADS['n'] = n
 
         cdef :
-            long [:] C
+            long long [:] C
             long t, tot, i1, i2, N, c
             int i
 
@@ -584,8 +595,9 @@ cdef class Evaluation :
         logger.subinfo('Number of threads: %d' % n , indent_char='*', indent_lvl=1 )
 
         # Distribute load for the computation of A*x product
-        logger.subinfo('A operator ', indent_char='*', indent_lvl=1, with_progress=True )
-        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+        log_list = []
+        ret_subinfo = logger.subinfo('A operator ', indent_char='*', indent_lvl=1, with_progress=True )
+        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
             if self.DICTIONARY['IC']['n'] > 0 :
                 self.THREADS['IC'] = np.zeros( n+1, dtype=np.uint32 )
                 if n > 1 :
@@ -636,8 +648,9 @@ cdef class Evaluation :
 
 
         # Distribute load for the computation of At*y product
-        logger.subinfo('A\' operator', indent_char='*', indent_lvl=1, with_progress=True )
-        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+        log_list = []
+        ret_subinfo = logger.subinfo('A\' operator', indent_char='*', indent_lvl=1, with_progress=True )
+        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
             if self.DICTIONARY['IC']['n'] > 0 :
                 self.THREADS['ICt'] = np.full( self.DICTIONARY['IC']['n'], n-1, dtype=np.uint8 )
                 if n > 1 :
@@ -692,19 +705,9 @@ cdef class Evaluation :
         logger.info( f'[ {format_time(time.time() - tic)} ]' )
 
 
-    def build_operator( self, build_dir=None ) :
-        """Compile/build the operator for computing the matrix-vector multiplications by A and A'
+    def build_operator( self ) :
+        """Build the operator for computing the matrix-vector multiplications by A and A'
         using the informations from self.DICTIONARY, self.KERNELS and self.THREADS.
-        NB: needs to call this function to update pointers to data structures in case
-            the data is changed in self.DICTIONARY, self.KERNELS or self.THREADS.
-
-        Parameters
-        ----------
-        build_dir : string
-            The folder in which to store the compiled files.
-            If None (default), they will end up in the .pyxbld directory in the user’s home directory.
-            If using this option, it is recommended to use a temporary directory, quit your python
-                console between each build, and delete the content of the temporary directory.
         """
         if self.DICTIONARY is None :
             logger.error( 'Dictionary not loaded; call "load_dictionary()" first' )
@@ -722,49 +725,7 @@ cdef class Evaluation :
         logger.subinfo('')
         logger.info( 'Building linear operator A' )
 
-        # need to pass these parameters at runtime for compiling the C code
-        from commit.operator import config
-
-        compilation_is_needed = False
-
-        if config.nTHREADS is None or config.nTHREADS != self.THREADS['n']:
-            compilation_is_needed = True
-        if config.nIC is None or config.nIC != self.KERNELS['wmr'].shape[0]:
-            compilation_is_needed = True
-        if config.model is None or config.model != self.model.id:
-            compilation_is_needed = True
-        if config.nEC is None or config.nEC != self.KERNELS['wmh'].shape[0]:
-            compilation_is_needed = True
-        if config.nISO is None or config.nISO != self.KERNELS['iso'].shape[0]:
-            compilation_is_needed = True
-        if config.build_dir != build_dir:
-            compilation_is_needed = True
-
-        if compilation_is_needed or not 'commit.operator.operator' in sys.modules :
-
-            if build_dir is not None:
-                if isdir(build_dir) and not len(listdir(build_dir)) == 0:
-                    logger.error( '\nbuild_dir is not empty, unsafe build option.' )
-                elif config.nTHREADS is not None:
-                    logger.error( '\nThe parameter build_dir has changed, unsafe build option.' )
-                else:
-                    logger.warning( '\nUsing build_dir, always quit your python console between COMMIT Evaluation.' )
-
-            config.nTHREADS   = self.THREADS['n']
-            config.model      = self.model.id
-            config.nIC        = self.KERNELS['wmr'].shape[0]
-            config.nEC        = self.KERNELS['wmh'].shape[0]
-            config.nISO       = self.KERNELS['iso'].shape[0]
-            config.build_dir  = build_dir
-
-            sys.dont_write_bytecode = True
-            pyximport.install( reload_support=True, language_level=3, build_dir=build_dir, build_in_temp=True, inplace=False )
-
-        if 'commit.operator.operator' in sys.modules :
-            del sys.modules['commit.operator.operator']
-        import commit.operator.operator
-
-        self.A = commit.operator.operator.LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS )
+        self.A = operator.LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, True if hasattr(self.model, 'nolut') else False )
 
         logger.info( f'[ {format_time(time.time() - tic)} ]' )
 
@@ -869,7 +830,7 @@ cdef class Evaluation :
                     This field can be specified only if regularisers[0] is 'group_lasso' or 'sparse_group_lasso'.
                     NB: this array must have the same size as the number of groups in the IC compartment and contain only non-negative values.
                 'coeff_weights' - np.array(np.float64) :
-                    weights associated to each individual element of the compartment (implemented for all compartments).
+                    weights associated to each individual element of the compartment (for the moment implemented only for IC compartment).
                     This field can be specified only if the chosen regulariser is 'lasso' or 'sparse_group_lasso'.
                     NB: this array must have the same size as the number of elements in the compartment and contain only non-negative values.
 
@@ -960,7 +921,7 @@ cdef class Evaluation :
                 logger.error('Regularisation parameters for the IC compartment ara not exactly two')
             elif lambdas[0][0] < 0 or lambdas[0][1] < 0:
                 logger.error('Regularisation parameters for the IC compartment must be greater than 0')
-            elif lambdas[0][0] == 0 or lambdas[0][1] == 0:
+            elif lambdas[0][0] == 0 and lambdas[0][1] == 0:
                 logger.warning('Regularisation parameters for the IC compartment are both 0, the solution will be the same as the one without regularisation')
                 regularisation['lambdaIC_perc'] = lambdas[0]
             else:
@@ -972,7 +933,7 @@ cdef class Evaluation :
 
         # check if coeff_weights is consistent with the regularisation
         if regularisation['regIC'] not in ['lasso', 'sparse_group_lasso'] and dictIC_params is not None and 'coeff_weights' in dictIC_params:
-            logger.warning('Coefficients weights are allowed only for lasso and sparse_group_lasso regularisation')
+            logger.warning('Coefficients weights are allowed only for lasso and sparse_group_lasso regularisations. Ignoring "coeff_weights"')
 
         # check if coeff_weights is consistent with the compartment size
         if regularisation['regIC'] == 'lasso' or regularisation['regIC'] == 'sparse_group_lasso':
@@ -981,7 +942,7 @@ cdef class Evaluation :
                     logger.error('All coefficients weights must be non-negative')
                 if dictIC_params['coeff_weights'].size != len(self.DICTIONARY['TRK']['kept']):
                     logger.error(f'"coeff_weights" must have the same size as the number of elements in the IC compartment (got {dictIC_params["coeff_weights"].size} but {len(self.DICTIONARY["TRK"]["kept"])} expected)')
-                dictIC_params['coeff_weights'] = dictIC_params['coeff_weights'][self.DICTIONARY['TRK']['kept']==1]
+                dictIC_params['coeff_weights_kept'] = dictIC_params['coeff_weights'][self.DICTIONARY['TRK']['kept']==1]
 
         # check if group parameters are consistent with the regularisation
         if regularisation['regIC'] not in ['group_lasso', 'sparse_group_lasso'] and dictIC_params is not None:
@@ -1058,24 +1019,25 @@ cdef class Evaluation :
                         newweightsIC_group.append(weightsIC_group[count])
 
                 newweightsIC_group = np.array(newweightsIC_group, dtype=np.float64)
-                dictIC_params['group_idx'] = np.array(newICgroup_idx, dtype=np.object_)
+                dictIC_params['group_idx_kept'] = np.array(newICgroup_idx, dtype=np.object_)
                 if weightsIC_group.size != newweightsIC_group.size:
                     logger.warning(f"""\
                     Not all the original groups are kept. 
-                    {weightsIC_group.size - newweightsIC_group.size} groups have been removed because their streamlines didn't satify the criteria set in trk2dictionary.""")
+                    {weightsIC_group.size - newweightsIC_group.size} groups have been removed because their streamlines didn't satisfy the criteria set in trk2dictionary.""")
             else:
                 newweightsIC_group = weightsIC_group
+                dictIC_params['group_idx_kept'] = dictIC_params['group_idx']
 
             # compute group weights
             if regularisation['regIC'] == 'group_lasso' or regularisation['regIC'] == 'sparse_group_lasso':
                 if dictIC_params['group_weights_cardinality']:
-                    group_size = np.array([g.size for g in dictIC_params['group_idx']], dtype=np.int32)
+                    group_size = np.array([g.size for g in dictIC_params['group_idx_kept']], dtype=np.int32)
                     newweightsIC_group *= np.sqrt(group_size)
                 if dictIC_params['group_weights_adaptive']:
-                    if self.x is None or self.regularisation_params['regIC'] is not None:
+                    if self.x_nnls is None: #or self.regularisation_params['regIC'] is not None:
                         logger.error('Group weights cannot be computed if the fit without regularisation has not been performed before')
-                    x_nnls, _, _ = self.get_coeffs(get_normalized=False)
-                    group_x_norm = np.array([np.linalg.norm(x_nnls[g])+1e-12 for g in dictIC_params['group_idx']], dtype=np.float64)
+                    # x_nnls, _, _ = self.get_coeffs(get_normalized=False)
+                    group_x_norm = np.array([np.linalg.norm(self.x_nnls[g])+1e-12 for g in dictIC_params['group_idx_kept']], dtype=np.float64)
                     newweightsIC_group /= group_x_norm
                 dictIC_params['group_weights'] = newweightsIC_group
 
@@ -1083,21 +1045,24 @@ cdef class Evaluation :
 
         # update lambdas using lambda_max
         if regularisation['regIC'] == 'lasso':
-            if dictIC_params is not None and 'coeff_weights' in dictIC_params:
-                regularisation['lambdaIC_max'] = compute_lambda_max_lasso(regularisation['startIC'], regularisation['sizeIC'], dictIC_params['coeff_weights'])
+            if dictIC_params is not None and 'coeff_weights_kept' in dictIC_params:
+                regularisation['lambdaIC_max'] = compute_lambda_max_lasso(regularisation['startIC'], regularisation['sizeIC'], dictIC_params['coeff_weights_kept'])
             else:
                 regularisation['lambdaIC_max'] = compute_lambda_max_lasso(regularisation['startIC'], regularisation['sizeIC'], np.ones(regularisation['sizeIC'], dtype=np.float64))
             regularisation['lambdaIC'] = regularisation['lambdaIC_perc'] * regularisation['lambdaIC_max']
         if regularisation['regIC'] == 'group_lasso':
-            regularisation['lambdaIC_max'] = compute_lambda_max_group(dictIC_params['group_weights'], dictIC_params['group_idx'])
+            regularisation['lambdaIC_max'] = compute_lambda_max_group(dictIC_params['group_weights'], dictIC_params['group_idx_kept'])
             regularisation['lambdaIC'] = regularisation['lambdaIC_perc'] * regularisation['lambdaIC_max']
         if regularisation['regIC'] == 'sparse_group_lasso':
-            regularisation['lambdaIC_max'] = ( compute_lambda_max_lasso(regularisation['startIC'], regularisation['sizeIC']), compute_lambda_max_group(dictIC_params['group_weights'], dictIC_params['group_idx']) )
+            if 'coeff_weights_kept' in dictIC_params:
+                regularisation['lambdaIC_max'] = ( compute_lambda_max_lasso(regularisation['startIC'], regularisation['sizeIC'], dictIC_params['coeff_weights_kept']), compute_lambda_max_group(dictIC_params['group_weights'], dictIC_params['group_idx_kept']) )
+            else:
+                regularisation['lambdaIC_max'] = ( compute_lambda_max_lasso(regularisation['startIC'], regularisation['sizeIC'], np.ones(regularisation['sizeIC'], dtype=np.float64)), compute_lambda_max_group(dictIC_params['group_weights'], dictIC_params['group_idx_kept']) )
             regularisation['lambdaIC'] = ( regularisation['lambdaIC_perc'][0] * regularisation['lambdaIC_max'][0], regularisation['lambdaIC_perc'][1] * regularisation['lambdaIC_max'][1] )
 
         # print
         if regularisation['regIC'] is not None:
-            if (regularisation['regIC'] == 'lasso' or regularisation['regIC'] == 'sparse_group_lasso') and dictIC_params is not None and 'coeff_weights' in dictIC_params:
+            if (regularisation['regIC'] == 'lasso' or regularisation['regIC'] == 'sparse_group_lasso') and dictIC_params is not None and 'coeff_weights_kept' in dictIC_params:
                 logger.subinfo( f'Regularisation type: {regularisation["regIC"]} (weighted version)', indent_lvl=2, indent_char='-' )
             else:
                 logger.subinfo( f'Regularisation type: {regularisation["regIC"]}', indent_lvl=2, indent_char='-' )
@@ -1108,7 +1073,7 @@ cdef class Evaluation :
             logger.debug( f'% lambda: {regularisation["lambdaIC_perc"]}' )
             logger.debug( f'Lambda used: {regularisation["lambdaIC"]}' )
         if regularisation['regIC'] == 'group_lasso' or regularisation['regIC'] == 'sparse_group_lasso':
-            logger.debug( f'Number of groups: {len(dictIC_params["group_idx"])}' )
+            logger.debug( f'Number of groups: {len(dictIC_params["group_idx_kept"])}' )
             if dictIC_params['group_weights_cardinality']==False and dictIC_params['group_weights_adaptive']==False and dictIC_params['group_weights_extra'] is None:
                 logger.debug( 'Group weights are not considered (all ones)' )
             else:
@@ -1139,9 +1104,9 @@ cdef class Evaluation :
                 regularisation['lambdaEC_perc'] = lambdas[1]
             else:
                 regularisation['lambdaEC_perc'] = lambdas[1]
-            if dictEC_params is not None and 'coeff_weights' in dictEC_params:
-                if dictEC_params['coeff_weights'].size != regularisation['sizeEC']:
-                    logger.error(f'"coeff_weights" must have the same size as the number of elements in the EC compartment (got {dictEC_params["coeff_weights"].size} but {regularisation["sizeEC"]} expected)')
+            # if dictEC_params is not None and 'coeff_weights' in dictEC_params:
+            #     if dictEC_params['coeff_weights'].size != regularisation['sizeEC']:
+            #         logger.error(f'"coeff_weights" must have the same size as the number of elements in the EC compartment (got {dictEC_params["coeff_weights"].size} but {regularisation["sizeEC"]} expected)')
         elif regularisation['regEC'] == 'smoothness':
             logger.error('Not yet implemented')
         elif regularisation['regEC'] == 'group_lasso':
@@ -1155,18 +1120,18 @@ cdef class Evaluation :
 
         # update lambdas using lambda_max
         if regularisation['regEC'] == 'lasso':
-            if dictEC_params is not None and 'coeff_weights' in dictEC_params:
-                regularisation['lambdaEC_max'] = compute_lambda_max_lasso(regularisation['startEC'], regularisation['sizeEC'], dictEC_params['coeff_weights'])
-            else:
-                regularisation['lambdaEC_max'] = compute_lambda_max_lasso(regularisation['startEC'], regularisation['sizeEC'], np.ones(regularisation['sizeEC'], dtype=np.float64))
+            # if dictEC_params is not None and 'coeff_weights' in dictEC_params:
+            #     regularisation['lambdaEC_max'] = compute_lambda_max_lasso(regularisation['startEC'], regularisation['sizeEC'], dictEC_params['coeff_weights'])
+            # else:
+            regularisation['lambdaEC_max'] = compute_lambda_max_lasso(regularisation['startEC'], regularisation['sizeEC'], np.ones(regularisation['sizeEC'], dtype=np.float64))
             regularisation['lambdaEC'] = regularisation['lambdaEC_perc'] * regularisation['lambdaEC_max']
 
         # print
         if regularisation['regEC'] is not None:
-            if regularisation['regEC'] == 'lasso' and dictEC_params is not None and 'coeff_weights' in dictEC_params:
-                logger.subinfo( f'Regularisation type: {regularisation["regEC"]} (weighted version)', indent_lvl=2, indent_char='-' )
-            else:
-                logger.subinfo( f'Regularisation type: {regularisation["regEC"]}', indent_lvl=2, indent_char='-' )
+            # if regularisation['regEC'] == 'lasso' and dictEC_params is not None and 'coeff_weights' in dictEC_params:
+            #     logger.subinfo( f'Regularisation type: {regularisation["regEC"]} (weighted version)', indent_lvl=2, indent_char='-' )
+            # else:
+            logger.subinfo( f'Regularisation type: {regularisation["regEC"]}', indent_lvl=2, indent_char='-' )
 
         logger.subinfo( f'Non-negativity constraint: {regularisation["nnEC"]}', indent_char='-', indent_lvl=2 )
 
@@ -1194,9 +1159,9 @@ cdef class Evaluation :
                 regularisation['lambdaISO_perc'] = lambdas[2]
             else:
                 regularisation['lambdaISO_perc'] = lambdas[2]
-            if dictISO_params is not None and 'coeff_weights' in dictISO_params:
-                if dictISO_params['coeff_weights'].size != regularisation['sizeISO']:
-                    logger.error(f'"coeff_weights" must have the same size as the number of elements in the ISO compartment (got {dictISO_params["coeff_weights"].size} but {regularisation["sizeISO"]} expected)')
+            # if dictISO_params is not None and 'coeff_weights' in dictISO_params:
+            #     if dictISO_params['coeff_weights'].size != regularisation['sizeISO']:
+            #         logger.error(f'"coeff_weights" must have the same size as the number of elements in the ISO compartment (got {dictISO_params["coeff_weights"].size} but {regularisation["sizeISO"]} expected)')
         elif regularisation['regISO'] == 'smoothness':
             logger.error('Not yet implemented')
         elif regularisation['regISO'] == 'group_lasso':
@@ -1210,18 +1175,18 @@ cdef class Evaluation :
 
         # update lambdas using lambda_max
         if regularisation['regISO'] == 'lasso':
-            if dictISO_params is not None and 'coeff_weights' in dictISO_params:
-                regularisation['lambdaISO_max'] = compute_lambda_max_lasso(regularisation['startISO'], regularisation['sizeISO'], dictISO_params['coeff_weights'])
-            else:
-                regularisation['lambdaISO_max'] = compute_lambda_max_lasso(regularisation['startISO'], regularisation['sizeISO'], np.ones(regularisation['sizeISO'], dtype=np.float64))
+            # if dictISO_params is not None and 'coeff_weights' in dictISO_params:
+            #     regularisation['lambdaISO_max'] = compute_lambda_max_lasso(regularisation['startISO'], regularisation['sizeISO'], dictISO_params['coeff_weights'])
+            # else:
+            regularisation['lambdaISO_max'] = compute_lambda_max_lasso(regularisation['startISO'], regularisation['sizeISO'], np.ones(regularisation['sizeISO'], dtype=np.float64))
             regularisation['lambdaISO'] = regularisation['lambdaISO_perc'] * regularisation['lambdaISO_max']
 
         # print
         if regularisation['regISO'] is not None:
-            if regularisation['regISO'] == 'lasso' and dictISO_params is not None and 'coeff_weights' in dictISO_params:
-                logger.subinfo( f'Regularisation type: {regularisation["regISO"]} (weighted version)', indent_lvl=2, indent_char='-' )
-            else:
-                logger.subinfo( f'Regularisation type: {regularisation["regISO"]}', indent_lvl=2, indent_char='-' )
+            # if regularisation['regISO'] == 'lasso' and dictISO_params is not None and 'coeff_weights' in dictISO_params:
+            #     logger.subinfo( f'Regularisation type: {regularisation["regISO"]} (weighted version)', indent_lvl=2, indent_char='-' )
+            # else:
+            logger.subinfo( f'Regularisation type: {regularisation["regISO"]}', indent_lvl=2, indent_char='-' )
 
         logger.subinfo( f'Non-negativity constraint: {regularisation["nnISO"]}', indent_char='-', indent_lvl=2 )
         if regularisation['regISO'] is not None: 
@@ -1353,12 +1318,15 @@ cdef class Evaluation :
 
         # run solver
         t = time.time()
-        with ProgressBar(disable=self.verbose!=3, hide_on_exit=True) as pb:
+        with ProgressBar(disable=self.verbose!=3, hide_on_exit=True):
             self.x, opt_details = commit.solvers.solve(self.get_y(), self.A, self.A.T, tol_fun=tol_fun, tol_x=tol_x, max_iter=max_iter, verbose=self.verbose, x0=x0, regularisation=self.regularisation_params, confidence_array=confidence_array)
 
         self.CONFIG['optimization']['fit_details'] = opt_details
         self.CONFIG['optimization']['fit_time'] = time.time()-t
 
+        if self.regularisation_params['regIC'] is None and self.x_nnls is None:
+            self.x_nnls, _, _ = self.get_coeffs(get_normalized=False)
+
         logger.info( f'[ {format_time(self.CONFIG["optimization"]["fit_time"])} ]' )
 
 
@@ -1515,8 +1483,9 @@ cdef class Evaluation :
         # Map of compartment contributions
         logger.subinfo('Voxelwise contributions:', indent_char='*', indent_lvl=1)
 
-        logger.subinfo('Intra-axonal', indent_lvl=2, indent_char='-', with_progress=True)
-        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+        log_list = []
+        ret_subinfo = logger.subinfo('Intra-axonal', indent_lvl=2, indent_char='-', with_progress=True)
+        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
             niiIC_img = np.zeros( self.get_config('dim'), dtype=np.float32 )
             if len(self.KERNELS['wmr']) > 0 :
                 offset = nF * self.KERNELS['wmr'].shape[0]
@@ -1526,8 +1495,9 @@ cdef class Evaluation :
                 ).astype(np.float32)
                 niiIC_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv
 
-        logger.subinfo('Extra-axonal', indent_lvl=2, indent_char='-', with_progress=True)
-        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+        log_list = []
+        ret_subinfo = logger.subinfo('Extra-axonal', indent_lvl=2, indent_char='-', with_progress=True)
+        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
             niiEC_img = np.zeros( self.get_config('dim'), dtype=np.float32 )
             if len(self.KERNELS['wmh']) > 0 :
                 offset = nF * self.KERNELS['wmr'].shape[0]
@@ -1535,8 +1505,9 @@ cdef class Evaluation :
                 xv = np.bincount( self.DICTIONARY['EC']['v'], weights=tmp, minlength=nV ).astype(np.float32)
                 niiEC_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv
 
-        logger.subinfo('Isotropic   ', indent_lvl=2, indent_char='-', with_progress=True)
-        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+        log_list = []
+        ret_subinfo = logger.subinfo('Isotropic   ', indent_lvl=2, indent_char='-', with_progress=True)
+        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
             niiISO_img = np.zeros( self.get_config('dim'), dtype=np.float32 )
             if len(self.KERNELS['iso']) > 0 :
                 offset = nF * self.KERNELS['wmr'].shape[0] + nE * self.KERNELS['wmh'].shape[0]
@@ -1560,8 +1531,9 @@ cdef class Evaluation :
 
         # Configuration and results
         logger.subinfo('Configuration and results:', indent_char='*', indent_lvl=1)
-        logger.subinfo('streamline_weights.txt', indent_lvl=2, indent_char='-', with_progress=True)
-        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+        log_list = []
+        ret_subinfo = logger.subinfo('streamline_weights.txt', indent_lvl=2, indent_char='-', with_progress=True)
+        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
             xic, _, _ = self.get_coeffs()
             if stat_coeffs != 'all' and xic.size > 0 :
                 xic = np.reshape( xic, (-1,self.DICTIONARY['TRK']['kept'].size) )
@@ -1620,8 +1592,9 @@ cdef class Evaluation :
         #   item 0: dictionary with all the configuration details
         #   item 1: np.array obtained through the optimisation process with the normalised kernels
         #   item 2: np.array renormalisation of coeffs in item 1
-        logger.subinfo('results.pickle', indent_char='-', indent_lvl=2, with_progress=True)
-        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:
+        log_list = []
+        ret_subinfo = logger.subinfo('results.pickle', indent_char='-', indent_lvl=2, with_progress=True)
+        with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
             xic, xec, xiso = self.get_coeffs()
             x = self.x
             if self.get_config('doNormalizeKernels') :
@@ -1631,9 +1604,10 @@ cdef class Evaluation :
                 self.CONFIG['optimization']['regularisation'].pop('prox', None)
                 pickle.dump( [self.CONFIG, x, self.x], fid, protocol=2 )
 
-        if save_est_dwi :
-            logger.subinfo('Estimated signal:', indent_char='-', indent_lvl=2, with_progress=True)
-            with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar:                    
+        if save_est_dwi:
+            log_list = []
+            ret_subinfo = logger.subinfo('Estimated signal:', indent_char='-', indent_lvl=2, with_progress=True)
+            with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):                    
                 self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ] = y_est
                 nibabel.save( nibabel.Nifti1Image( self.niiDWI_img , affine ), pjoin(RESULTS_path,'fit_signal_estimated.nii.gz') )
                 self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ] = y_mea
diff --git a/commit/models.py b/commit/models.py
index 03741d84..105489bd 100755
--- a/commit/models.py
+++ b/commit/models.py
@@ -74,6 +74,7 @@ def __init__(self):
         self.name = 'Volume fractions'
         self.maps_name = []
         self.maps_descr = []
+        self.nolut = True
 
     def set(self):
         return
diff --git a/commit/operator/config.py b/commit/operator/config.py
deleted file mode 100755
index 8cbac4ed..00000000
--- a/commit/operator/config.py
+++ /dev/null
@@ -1,6 +0,0 @@
-nTHREADS      = None
-model         = None
-nIC           = None
-nEC      	  = None
-nISO     	  = None
-build_dir	  = None
diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx
index 70f1d505..d3ffbdbc 100755
--- a/commit/operator/operator.pyx
+++ b/commit/operator/operator.pyx
@@ -7,13 +7,14 @@ from dicelib.ui import setup_logger
 
 # Interfaces to actual C code performing the multiplications
 cdef extern void COMMIT_A(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
+    int _nF, int _nE, int _nV, int _nS, int _ndirs,
     double *_v_in, double *_v_out,
     unsigned int *_ICf, unsigned int *_ICv, unsigned short *_ICo, float *_ICl,
     unsigned int *_ECv, unsigned short *_ECo,
     unsigned int *_ISOv,
     float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    unsigned int* _ICthreads, unsigned int* _ECthreads, unsigned int* _ISOthreads
+    unsigned int* _ICthreads, unsigned int* _ECthreads, unsigned int* _ISOthreads,
+    unsigned int _nIC, unsigned int _nEC, unsigned int _nISO, unsigned int _nThreads
 ) nogil
 
 cdef extern void COMMIT_At(
@@ -23,7 +24,26 @@ cdef extern void COMMIT_At(
     unsigned int *_ECv, unsigned short *_ECo,
     unsigned int *_ISOv,
     float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    unsigned char *_ICthreadsT, unsigned int *_ECthreadsT, unsigned int *_ISOthreadsT
+    unsigned char* _ICthreadsT, unsigned int* _ECthreadsT, unsigned int* _ISOthreadsT,
+    unsigned int _nIC, unsigned int _nEC, unsigned int _nISO, unsigned int _nThreads
+) nogil
+
+cdef extern void COMMIT_A_nolut(
+    int _nF,
+    double *_v_in, double *_v_out,
+    unsigned int *_ICf, unsigned int *_ICv, float *_ICl,
+    unsigned int *_ISOv,
+    unsigned int* _ICthreads, unsigned int* _ISOthreads,
+    unsigned int _nISO, unsigned int _nThreads
+) nogil
+
+cdef extern void COMMIT_At_nolut(
+    int _nF, int _n,
+    double *_v_in, double *_v_out,
+    unsigned int *_ICf, unsigned int *_ICv, float *_ICl,
+    unsigned int *_ISOv,
+    unsigned char* _ICthreadsT, unsigned int* _ISOthreadsT,
+    unsigned int _nISO, unsigned int _nThreads
 ) nogil
 
 logger = setup_logger('operator')
@@ -39,6 +59,7 @@ cdef class LinearOperator :
     cdef DICTIONARY
     cdef KERNELS
     cdef THREADS
+    cdef nolut
 
     cdef unsigned int*   ICf
     cdef float*          ICl
@@ -61,11 +82,12 @@ cdef class LinearOperator :
     cdef unsigned int*   ISOthreadsT
 
 
-    def __init__( self, DICTIONARY, KERNELS, THREADS ) :
+    def __init__( self, DICTIONARY, KERNELS, THREADS, nolut=False ) :
         """Set the pointers to the data structures used by the C code."""
         self.DICTIONARY = DICTIONARY
         self.KERNELS    = KERNELS
         self.THREADS    = THREADS
+        self.nolut      = nolut
 
         self.nF         = DICTIONARY['IC']['nF']    # number of FIBERS
         self.nR         = KERNELS['wmr'].shape[0]   # number of FIBER RADII
@@ -131,7 +153,7 @@ cdef class LinearOperator :
     @property
     def T( self ) :
         """Transpose of the explicit matrix."""
-        C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS )
+        C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, self.nolut )
         C.adjoint = 1 - C.adjoint
         return C
 
@@ -166,27 +188,59 @@ cdef class LinearOperator :
         # Create output array
         cdef double [::1] v_out = np.zeros( self.shape[0], dtype=np.float64 )
 
+        cdef unsigned int nthreads = self.THREADS['n']
+        cdef unsigned int nIC = self.KERNELS['wmr'].shape[0]
+        cdef unsigned int nEC = self.KERNELS['wmh'].shape[0]
+        cdef unsigned int nISO = self.KERNELS['iso'].shape[0]
+
         # Call the cython function to read the memory pointers
         if not self.adjoint :
             # DIRECT PRODUCT A*x
-
-            with nogil :
-                COMMIT_A(
-                    self.nF, self.n, self.nE, self.nV, self.nS, self.ndirs,
-                    &v_in[0], &v_out[0],
-                    self.ICf, self.ICv, self.ICo, self.ICl, self.ECv, self.ECo, self.ISOv,
-                    self.LUT_IC, self.LUT_EC, self.LUT_ISO,
-                    self.ICthreads, self.ECthreads, self.ISOthreads
-                )
+            if self.nolut:
+                with nogil:
+                    COMMIT_A_nolut(
+                        self.nF,
+                        &v_in[0], &v_out[0],
+                        self.ICf, self.ICv, self.ICl,
+                        self.ISOv,
+                        self.ICthreads, self.ISOthreads,
+                        nISO, nthreads
+                    )
+            else:
+                with nogil:
+                    COMMIT_A(
+                        self.nF, self.nE, self.nV, self.nS, self.ndirs,
+                        &v_in[0], &v_out[0],
+                        self.ICf, self.ICv, self.ICo, self.ICl,
+                        self.ECv, self.ECo,
+                        self.ISOv,
+                        self.LUT_IC, self.LUT_EC, self.LUT_ISO,
+                        self.ICthreads, self.ECthreads, self.ISOthreads,
+                        nIC, nEC, nISO, nthreads
+                    )
         else :
             # INVERSE PRODUCT A'*y
-            with nogil :
-                COMMIT_At(
-                    self.nF, self.n, self.nE, self.nV, self.nS, self.ndirs,
-                    &v_in[0], &v_out[0],
-                    self.ICf, self.ICv, self.ICo, self.ICl, self.ECv, self.ECo, self.ISOv,
-                    self.LUT_IC, self.LUT_EC, self.LUT_ISO,
-                    self.ICthreadsT, self.ECthreadsT, self.ISOthreadsT
-                )
+            if self.nolut:
+                with nogil:
+                    COMMIT_At_nolut(
+                        self.nF, self.n,
+                        &v_in[0], &v_out[0],
+                        self.ICf, self.ICv, self.ICl,
+                        self.ISOv,
+                        self.ICthreadsT, self.ISOthreadsT,
+                        nISO, nthreads
+                    )
+            else:
+                with nogil:
+                    COMMIT_At(
+                        self.nF, self.n, self.nE, self.nV, self.nS, self.ndirs,
+                        &v_in[0], &v_out[0],
+                        self.ICf, self.ICv, self.ICo, self.ICl,
+                        self.ECv, self.ECo,
+                        self.ISOv,
+                        self.LUT_IC, self.LUT_EC, self.LUT_ISO,
+                        self.ICthreadsT, self.ECthreadsT, self.ISOthreadsT,
+                        nIC, nEC, nISO, nthreads
+                    )
 
         return v_out
diff --git a/commit/operator/operator.pyxbld b/commit/operator/operator.pyxbld
deleted file mode 100644
index f3967a15..00000000
--- a/commit/operator/operator.pyxbld
+++ /dev/null
@@ -1,39 +0,0 @@
-import numpy
-from os import utime
-from os.path import dirname, join
-from setuptools import Extension
-
-# pass parameters to the compiler at runtime
-# [TODO] find a way to avoid using this fake module
-from commit.operator import config
-
-
-def make_ext(modname, pyxfilename):
-
-    if (config.nTHREADS is None or config.nTHREADS < 1 or config.nTHREADS > 255):
-        raise RuntimeError('config.nTHREADS must be between 1 and 255')
-    if (config.nIC is None or config.nIC < 0 or config.nIC > 20):
-        raise RuntimeError('config.nIC must be in the range [0..20]')
-    if (config.nEC is None or config.nEC < 0 or config.nEC > 20):
-        raise RuntimeError('config.nEC must be in the range [0..20]')
-    if (config.nISO is None or config.nISO < 0 or config.nISO > 20):
-        raise RuntimeError('config.nISO must be in the range [0..20]')
-
-    # Force recompilation
-    if config.model == "VolumeFractions":
-        filename = "operator_noLUT.c"
-    else:
-        filename = "operator_withLUT.c"
-    path = dirname(pyxfilename)
-
-    if config.build_dir is None:
-        utime( join(path,filename), None)
-
-    return Extension(name=modname,
-                     sources=[pyxfilename, join(path, filename)],
-                     include_dirs=[numpy.get_include()],
-                     define_macros=[('nTHREADS', config.nTHREADS),
-                                    ('nIC', config.nIC),
-                                    ('nEC', config.nEC),
-                                    ('nISO', config.nISO)],
-                     extra_compile_args=['-w', '-O3', '-Ofast'])
diff --git a/commit/operator/operator_noLUT.c b/commit/operator/operator_noLUT.c
deleted file mode 100644
index 0046f237..00000000
--- a/commit/operator/operator_noLUT.c
+++ /dev/null
@@ -1,187 +0,0 @@
-#include <pthread.h>
-#include <stdint.h> // uint32_t etc
-
-// number of THREADS
-#ifdef nTHREADS
-    #if (nTHREADS<1 || nTHREADS>255)
-    #error "nTHREADS" must be in the range 1..255
-    #endif
-#else
-    #error "nTHREADS" parameter must be passed to the compiler as "-DnTHREADS=<value>"
-#endif
-
-
-/* global variables */
-int         nF, n;
-double      *x, *Y;
-uint32_t    *ICthreads, *ISOthreads;
-uint8_t     *ICthreadsT;
-uint32_t    *ISOthreadsT;
-uint32_t    *ICf, *ICv, *ISOv;
-float       *ICl;
-
-
-// ====================================================
-// Compute a sub-block of the A*x MAtRIX-VECTOR product
-// ====================================================
-void* COMMIT_A__block( void *ptr )
-{
-    int      id = (long)ptr;
-    double   x0;
-    double   *xPtr;
-    uint32_t *t_v, *t_vEnd, *t_f;
-    float    *t_l;
-
-    // intra-cellular compartments
-    t_v    = ICv + ICthreads[id];
-    t_vEnd = ICv + ICthreads[id+1];
-    t_l    = ICl + ICthreads[id];
-    t_f    = ICf + ICthreads[id];
-
-    while( t_v != t_vEnd )
-    {
-        x0 = x[*t_f];
-        if ( x0 != 0 )
-            Y[*t_v] += (double)(*t_l) * x0;
-        t_f++;
-        t_v++;
-        t_l++;
-    }
-
-#if nISO>=1
-    // isotropic compartments
-    t_v    = ISOv + ISOthreads[id];
-    t_vEnd = ISOv + ISOthreads[id+1];
-    xPtr   = x + nF + ISOthreads[id];
-
-    while( t_v != t_vEnd )
-    {
-        x0 = *xPtr++;
-        if ( x0 != 0 )
-            Y[*t_v] += x0;
-        t_v++;
-    }
-#endif
-
-    pthread_exit( 0 );
-}
-
-
-// =========================
-// Function called by CYTHON
-// =========================
-void COMMIT_A(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_vIN, double *_vOUT,
-    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
-    uint32_t *_ECv, uint16_t *_ECo,
-    uint32_t *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads
-)
-{
-    nF = _nF;
-    n  = _n;
-
-    x = _vIN;
-    Y = _vOUT;
-
-    ICf  = _ICf;
-    ICv  = _ICv;
-    ICl  = _ICl;
-    ISOv = _ISOv;
-
-    ICthreads  = _ICthreads;
-    ISOthreads = _ISOthreads;
-
-    // Run SEPARATE THREADS to perform the multiplication
-    pthread_t threads[nTHREADS];
-    int t;
-    for(t=0; t<nTHREADS ; t++)
-        pthread_create( &threads[t], NULL, COMMIT_A__block, (void *) (long int)t );
-    for(t=0; t<nTHREADS ; t++)
-        pthread_join( threads[t], NULL );
-    return;
-}
-
-
-
-/* ===================================================== */
-/* Compute a sub-block of the A'*y MAtRIX-VECTOR product */
-/* ===================================================== */
-void* COMMIT_At__block( void *ptr )
-{
-    int      id = (long)ptr;
-    double   *xPtr;
-    uint32_t *t_v, *t_vEnd, *t_f;
-    float    *t_l;
-    uint8_t  *t_t;
-
-    // intra-cellular compartments
-    t_v    = ICv;
-    t_vEnd = ICv + n;
-    t_l    = ICl;
-    t_f    = ICf;
-    t_t    = ICthreadsT;
-
-    while( t_v != t_vEnd )
-    {
-        // in this case, I need to walk throug because the segments are ordered in "voxel order"
-        if ( *t_t == id )
-            x[*t_f] += (double)(*t_l) * Y[*t_v];
-        t_t++;
-        t_f++;
-        t_v++;
-        t_l++;
-    }
-
-#if nISO>=1
-    // isotropic compartments
-    t_v    = ISOv + ISOthreadsT[id];
-    t_vEnd = ISOv + ISOthreadsT[id+1];
-    xPtr   = x + nF + ISOthreadsT[id];
-
-    while( t_v != t_vEnd )
-        (*xPtr++) += Y[*t_v++];
-#endif
-
-    pthread_exit( 0 );
-}
-
-
-// =========================
-// Function called by CYTHON
-// =========================
-void COMMIT_At(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_vIN, double *_vOUT,
-    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
-    uint32_t *_ECv, uint16_t *_ECo,
-    uint32_t *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT
-)
-{
-    nF = _nF;
-    n  = _n;
-
-    x = _vOUT;
-    Y = _vIN;
-
-    ICf  = _ICf;
-    ICv  = _ICv;
-    ICl  = _ICl;
-    ISOv = _ISOv;
-
-    ICthreadsT  = _ICthreadsT;
-    ISOthreadsT = _ISOthreadsT;
-
-    // Run SEPARATE THREADS to perform the multiplication
-    pthread_t threads[nTHREADS];
-    int t;
-    for(t=0; t<nTHREADS ; t++)
-        pthread_create( &threads[t], NULL, COMMIT_At__block, (void *) (long int)t );
-    for(t=0; t<nTHREADS ; t++)
-        pthread_join( threads[t], NULL );
-    return;
-}
diff --git a/commit/operator/operator_withLUT.c b/commit/operator/operator_withLUT.c
deleted file mode 100644
index 9c959f57..00000000
--- a/commit/operator/operator_withLUT.c
+++ /dev/null
@@ -1,2247 +0,0 @@
-#include <pthread.h>
-#include <stdint.h> // uint32_t etc
-
-// number of THREADS
-#ifdef nTHREADS
-    #if (nTHREADS<1 || nTHREADS>255)
-    #error "nTHREADS" must be in the range 1..255
-    #endif
-#else
-    #error "nTHREADS" parameter must be passed to the compiler as "-DnTHREADS=<value>"
-#endif
-
-
-/* global variables */
-int         nF, n, nE, nV, nS, ndirs;
-double      *x, *Y;
-uint32_t    *ICthreads, *ECthreads, *ISOthreads;
-uint8_t     *ICthreadsT;
-uint32_t    *ECthreadsT, *ISOthreadsT;
-uint32_t    *ICf, *ICv, *ECv, *ISOv;
-uint16_t    *ICo, *ECo;
-float       *ICl;
-float       *wmrSFP0, *wmrSFP1, *wmrSFP2, *wmrSFP3, *wmrSFP4, *wmrSFP5, *wmrSFP6, *wmrSFP7, *wmrSFP8, *wmrSFP9, *wmrSFP10, *wmrSFP11, *wmrSFP12, *wmrSFP13, *wmrSFP14, *wmrSFP15, *wmrSFP16, *wmrSFP17, *wmrSFP18, *wmrSFP19;
-float       *wmhSFP0, *wmhSFP1, *wmhSFP2, *wmhSFP3, *wmhSFP4, *wmhSFP5, *wmhSFP6, *wmhSFP7, *wmhSFP8, *wmhSFP9, *wmhSFP10, *wmhSFP11, *wmhSFP12, *wmhSFP13, *wmhSFP14, *wmhSFP15, *wmhSFP16, *wmhSFP17, *wmhSFP18, *wmhSFP19;
-float       *isoSFP0, *isoSFP1, *isoSFP2, *isoSFP3, *isoSFP4, *isoSFP5, *isoSFP6, *isoSFP7, *isoSFP8, *isoSFP9, *isoSFP10, *isoSFP11, *isoSFP12, *isoSFP13, *isoSFP14, *isoSFP15, *isoSFP16, *isoSFP17, *isoSFP18, *isoSFP19;
-
-
-
-// ====================================================
-// Compute a sub-block of the A*x MAtRIX-VECTOR product
-// ====================================================
-void* COMMIT_A__block( void *ptr )
-{
-    int      id = (long)ptr;
-    int      offset;
-    double   x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w;
-    double   *x_Ptr0, *x_Ptr1, *x_Ptr2, *x_Ptr3, *x_Ptr4, *x_Ptr5, *x_Ptr6, *x_Ptr7, *x_Ptr8, *x_Ptr9, *x_Ptr10, *x_Ptr11, *x_Ptr12, *x_Ptr13, *x_Ptr14, *x_Ptr15, *x_Ptr16, *x_Ptr17, *x_Ptr18, *x_Ptr19;
-    double   *Yptr, *YptrEnd;
-    float    *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr;
-    uint32_t *t_v, *t_vEnd, *t_f;
-    uint16_t *t_o;
-    float    *t_l;
-
-#if nIC>=1
-    // intra-cellular compartments
-    t_v    = ICv + ICthreads[id];
-    t_vEnd = ICv + ICthreads[id+1];
-    t_o    = ICo + ICthreads[id];
-    t_l    = ICl + ICthreads[id];
-    t_f    = ICf + ICthreads[id];
-
-    while( t_v != t_vEnd )
-    {
-        x_Ptr0 = x + *t_f;
-        x0 = *x_Ptr0;
-        #if nIC>=2
-        x_Ptr1 = x_Ptr0 + nF;
-        x1 = *x_Ptr1;
-        #endif
-        #if nIC>=3
-        x_Ptr2 = x_Ptr1 + nF;
-        x2 = *x_Ptr2;
-        #endif
-        #if nIC>=4
-        x_Ptr3 = x_Ptr2 + nF;
-        x3 = *x_Ptr3;
-        #endif
-        #if nIC>=5
-        x_Ptr4 = x_Ptr3 + nF;
-        x4 = *x_Ptr4;
-        #endif
-        #if nIC>=6
-        x_Ptr5 = x_Ptr4 + nF;
-        x5 = *x_Ptr5;
-        #endif
-        #if nIC>=7
-        x_Ptr6 = x_Ptr5 + nF;
-        x6 = *x_Ptr6;
-        #endif
-        #if nIC>=8
-        x_Ptr7 = x_Ptr6 + nF;
-        x7 = *x_Ptr7;
-        #endif
-        #if nIC>=9
-        x_Ptr8 = x_Ptr7 + nF;
-        x8 = *x_Ptr8;
-        #endif
-        #if nIC>=10
-        x_Ptr9 = x_Ptr8 + nF;
-        x9 = *x_Ptr9;
-        #endif
-        #if nIC>=11
-        x_Ptr10 = x_Ptr9 + nF;
-        x10 = *x_Ptr10;
-        #endif
-        #if nIC>=12
-        x_Ptr11 = x_Ptr10 + nF;
-        x11 = *x_Ptr11;
-        #endif
-        #if nIC>=13
-        x_Ptr12 = x_Ptr11 + nF;
-        x12 = *x_Ptr12;
-        #endif
-        #if nIC>=14
-        x_Ptr13 = x_Ptr12 + nF;
-        x13 = *x_Ptr13;
-        #endif
-        #if nIC>=15
-        x_Ptr14 = x_Ptr13 + nF;
-        x14 = *x_Ptr14;
-        #endif
-        #if nIC>=16
-        x_Ptr15 = x_Ptr14 + nF;
-        x15 = *x_Ptr15;
-        #endif
-        #if nIC>=17
-        x_Ptr16 = x_Ptr15 + nF;
-        x16 = *x_Ptr16;
-        #endif
-        #if nIC>=18
-        x_Ptr17 = x_Ptr16 + nF;
-        x17 = *x_Ptr17;
-        #endif
-        #if nIC>=19
-        x_Ptr18 = x_Ptr17 + nF;
-        x18 = *x_Ptr18;
-        #endif
-        #if nIC>=20
-        x_Ptr19 = x_Ptr18 + nF;
-        x19 = *x_Ptr19;
-        #endif
-
-        if ( x0 != 0
-        #if nIC>=2
-            || x1 != 0
-        #endif
-        #if nIC>=3
-            || x2 != 0
-        #endif
-        #if nIC>=4
-            || x3 != 0
-        #endif
-        #if nIC>=5
-            || x4 != 0
-        #endif
-        #if nIC>=6
-            || x5 != 0
-        #endif
-        #if nIC>=7
-            || x6 != 0
-        #endif
-        #if nIC>=8
-            || x7 != 0
-        #endif
-        #if nIC>=9
-            || x8 != 0
-        #endif
-        #if nIC>=10
-            || x9 != 0
-        #endif
-        #if nIC>=11
-            || x10 != 0
-        #endif
-        #if nIC>=12
-            || x11 != 0
-        #endif
-        #if nIC>=13
-            || x12 != 0
-        #endif
-        #if nIC>=14
-            || x13 != 0
-        #endif
-        #if nIC>=15
-            || x14 != 0
-        #endif
-        #if nIC>=16
-            || x15 != 0
-        #endif
-        #if nIC>=17
-            || x16 != 0
-        #endif
-        #if nIC>=18
-            || x17 != 0
-        #endif
-        #if nIC>=19
-            || x18 != 0
-        #endif
-        #if nIC>=20
-            || x19 != 0
-        #endif
-        )
-        {
-            Yptr    = Y    + nS * (*t_v);
-            YptrEnd = Yptr + nS;
-            w       = (double)(*t_l);
-            offset  = nS * (*t_o);
-            SFP0ptr = wmrSFP0 + offset;
-            #if nIC>=2
-            SFP1ptr = wmrSFP1 + offset;
-            #endif
-            #if nIC>=3
-            SFP2ptr = wmrSFP2 + offset;
-            #endif
-            #if nIC>=4
-            SFP3ptr = wmrSFP3 + offset;
-            #endif
-            #if nIC>=5
-            SFP4ptr = wmrSFP4 + offset;
-            #endif
-            #if nIC>=6
-            SFP5ptr = wmrSFP5 + offset;
-            #endif
-            #if nIC>=7
-            SFP6ptr = wmrSFP6 + offset;
-            #endif
-            #if nIC>=8
-            SFP7ptr = wmrSFP7 + offset;
-            #endif
-            #if nIC>=9
-            SFP8ptr = wmrSFP8 + offset;
-            #endif
-            #if nIC>=10
-            SFP9ptr = wmrSFP9 + offset;
-            #endif
-            #if nIC>=11
-            SFP10ptr = wmrSFP10 + offset;
-            #endif
-            #if nIC>=12
-            SFP11ptr = wmrSFP11 + offset;
-            #endif
-            #if nIC>=13
-            SFP12ptr = wmrSFP12 + offset;
-            #endif
-            #if nIC>=14
-            SFP13ptr = wmrSFP13 + offset;
-            #endif
-            #if nIC>=15
-            SFP14ptr = wmrSFP14 + offset;
-            #endif
-            #if nIC>=16
-            SFP15ptr = wmrSFP15 + offset;
-            #endif
-            #if nIC>=17
-            SFP16ptr = wmrSFP16 + offset;
-            #endif
-            #if nIC>=18
-            SFP17ptr = wmrSFP17 + offset;
-            #endif
-            #if nIC>=19
-            SFP18ptr = wmrSFP18 + offset;
-            #endif
-            #if nIC>=20
-            SFP19ptr = wmrSFP19 + offset;
-            #endif
-
-            while( Yptr != YptrEnd )
-                (*Yptr++) += w * (
-                          x0 * (*SFP0ptr++)
-                        #if nIC>=2
-                        + x1 * (*SFP1ptr++)
-                        #endif
-                        #if nIC>=3
-                        + x2 * (*SFP2ptr++)
-                        #endif
-                        #if nIC>=4
-                        + x3 * (*SFP3ptr++)
-                        #endif
-                        #if nIC>=5
-                        + x4 * (*SFP4ptr++)
-                        #endif
-                        #if nIC>=6
-                        + x5 * (*SFP5ptr++)
-                        #endif
-                        #if nIC>=7
-                        + x6 * (*SFP6ptr++)
-                        #endif
-                        #if nIC>=8
-                        + x7 * (*SFP7ptr++)
-                        #endif
-                        #if nIC>=9
-                        + x8 * (*SFP8ptr++)
-                        #endif
-                        #if nIC>=10
-                        + x9 * (*SFP9ptr++)
-                        #endif
-                        #if nIC>=11
-                        + x10 * (*SFP10ptr++)
-                        #endif
-                        #if nIC>=12
-                        + x11 * (*SFP11ptr++)
-                        #endif
-                        #if nIC>=13
-                        + x12 * (*SFP12ptr++)
-                        #endif
-                        #if nIC>=14
-                        + x13 * (*SFP13ptr++)
-                        #endif
-                        #if nIC>=15
-                        + x14 * (*SFP14ptr++)
-                        #endif
-                        #if nIC>=16
-                        + x15 * (*SFP15ptr++)
-                        #endif
-                        #if nIC>=17
-                        + x16 * (*SFP16ptr++)
-                        #endif
-                        #if nIC>=18
-                        + x17 * (*SFP17ptr++)
-                        #endif
-                        #if nIC>=19
-                        + x18 * (*SFP18ptr++)
-                        #endif
-                        #if nIC>=20
-                        + x19 * (*SFP19ptr++)
-                        #endif
-                );
-        }
-
-        t_f++;
-        t_v++;
-        t_o++;
-        t_l++;
-    }
-#endif
-
-#if nEC>=1
-    // extra-cellular compartments
-    t_v    = ECv + ECthreads[id];
-    t_vEnd = ECv + ECthreads[id+1];
-    t_o    = ECo + ECthreads[id];
-
-    x_Ptr0 = x + nIC*nF + ECthreads[id];
-    #if nEC>=2
-    x_Ptr1 = x_Ptr0 + nE;
-    #endif
-    #if nEC>=3
-    x_Ptr2 = x_Ptr1 + nE;
-    #endif
-    #if nEC>=4
-    x_Ptr3 = x_Ptr2 + nE;
-    #endif
-    #if nEC>=5
-    x_Ptr4 = x_Ptr3 + nE;
-    #endif
-    #if nEC>=6
-    x_Ptr5 = x_Ptr4 + nE;
-    #endif
-    #if nEC>=7
-    x_Ptr6 = x_Ptr5 + nE;
-    #endif
-    #if nEC>=8
-    x_Ptr7 = x_Ptr6 + nE;
-    #endif
-    #if nEC>=9
-    x_Ptr8 = x_Ptr7 + nE;
-    #endif
-    #if nEC>=10
-    x_Ptr9 = x_Ptr8 + nE;
-    #endif
-    #if nEC>=11
-    x_Ptr10 = x_Ptr9 + nE;
-    #endif
-    #if nEC>=12
-    x_Ptr11 = x_Ptr10 + nE;
-    #endif
-    #if nEC>=13
-    x_Ptr12 = x_Ptr11 + nE;
-    #endif
-    #if nEC>=14
-    x_Ptr13 = x_Ptr12 + nE;
-    #endif
-    #if nEC>=15
-    x_Ptr14 = x_Ptr13 + nE;
-    #endif
-    #if nEC>=16
-    x_Ptr15 = x_Ptr14 + nE;
-    #endif
-    #if nEC>=17
-    x_Ptr16 = x_Ptr15 + nE;
-    #endif
-    #if nEC>=18
-    x_Ptr17 = x_Ptr16 + nE;
-    #endif
-    #if nEC>=19
-    x_Ptr18 = x_Ptr17 + nE;
-    #endif
-    #if nEC>=20
-    x_Ptr19 = x_Ptr18 + nE;
-    #endif
-
-    while( t_v != t_vEnd )
-    {
-        x0 = *x_Ptr0++;
-        #if nEC>=2
-        x1 = *x_Ptr1++;
-        #endif
-        #if nEC>=3
-        x2 = *x_Ptr2++;
-        #endif
-        #if nEC>=4
-        x3 = *x_Ptr3++;
-        #endif
-        #if nEC>=5
-        x4 = *x_Ptr4++;
-        #endif
-        #if nEC>=6
-        x5 = *x_Ptr5++;
-        #endif
-        #if nEC>=7
-        x6 = *x_Ptr6++;
-        #endif
-        #if nEC>=8
-        x7 = *x_Ptr7++;
-        #endif
-        #if nEC>=9
-        x8 = *x_Ptr8++;
-        #endif
-        #if nEC>=10
-        x9 = *x_Ptr9++;
-        #endif
-        #if nEC>=11
-        x10 = *x_Ptr10++;
-        #endif
-        #if nEC>=12
-        x11 = *x_Ptr11++;
-        #endif
-        #if nEC>=13
-        x12 = *x_Ptr12++;
-        #endif
-        #if nEC>=14
-        x13 = *x_Ptr13++;
-        #endif
-        #if nEC>=15
-        x14 = *x_Ptr14++;
-        #endif
-        #if nEC>=16
-        x15 = *x_Ptr15++;
-        #endif
-        #if nEC>=17
-        x16 = *x_Ptr16++;
-        #endif
-        #if nEC>=18
-        x17 = *x_Ptr17++;
-        #endif
-        #if nEC>=19
-        x18 = *x_Ptr18++;
-        #endif
-        #if nEC>=20
-        x19 = *x_Ptr19++;
-        #endif
-        if (
-               x0 != 0
-            #if nEC>=2
-            || x1 != 0
-            #endif
-            #if nEC>=3
-            || x2 != 0
-            #endif
-            #if nEC>=4
-            || x3 != 0
-            #endif
-            #if nEC>=5
-            || x4 != 0
-            #endif
-            #if nEC>=6
-            || x5 != 0
-            #endif
-            #if nEC>=7
-            || x6 != 0
-            #endif
-            #if nEC>=8
-            || x7 != 0
-            #endif
-            #if nEC>=9
-            || x8 != 0
-            #endif
-            #if nEC>=10
-            || x9 != 0
-            #endif
-            #if nEC>=11
-            || x10 != 0
-            #endif
-            #if nEC>=12
-            || x11 != 0
-            #endif
-            #if nEC>=13
-            || x12 != 0
-            #endif
-            #if nEC>=14
-            || x13 != 0
-            #endif
-            #if nEC>=15
-            || x14 != 0
-            #endif
-            #if nEC>=16
-            || x15 != 0
-            #endif
-            #if nEC>=17
-            || x16 != 0
-            #endif
-            #if nEC>=18
-            || x17 != 0
-            #endif
-            #if nEC>=19
-            || x18 != 0
-            #endif
-            #if nEC>=20
-            || x19 != 0
-            #endif
-          )
-        {
-            Yptr    = Y    + nS * (*t_v);
-            YptrEnd = Yptr + nS;
-            offset  = nS * (*t_o);
-            SFP0ptr = wmhSFP0 + offset;
-            #if nEC>=2
-            SFP1ptr = wmhSFP1 + offset;
-            #endif
-            #if nEC>=3
-            SFP2ptr = wmhSFP2 + offset;
-            #endif
-            #if nEC>=4
-            SFP3ptr = wmhSFP3 + offset;
-            #endif
-            #if nEC>=5
-            SFP4ptr = wmhSFP4 + offset;
-            #endif
-            #if nEC>=6
-            SFP5ptr = wmhSFP5 + offset;
-            #endif
-            #if nEC>=7
-            SFP6ptr = wmhSFP6 + offset;
-            #endif
-            #if nEC>=8
-            SFP7ptr = wmhSFP7 + offset;
-            #endif
-            #if nEC>=9
-            SFP8ptr = wmhSFP8 + offset;
-            #endif
-            #if nEC>=10
-            SFP9ptr = wmhSFP9 + offset;
-            #endif
-            #if nEC>=11
-            SFP10ptr = wmhSFP10 + offset;
-            #endif
-            #if nEC>=12
-            SFP11ptr = wmhSFP11 + offset;
-            #endif
-            #if nEC>=13
-            SFP12ptr = wmhSFP12 + offset;
-            #endif
-            #if nEC>=14
-            SFP13ptr = wmhSFP13 + offset;
-            #endif
-            #if nEC>=15
-            SFP14ptr = wmhSFP14 + offset;
-            #endif
-            #if nEC>=16
-            SFP15ptr = wmhSFP15 + offset;
-            #endif
-            #if nEC>=17
-            SFP16ptr = wmhSFP16 + offset;
-            #endif
-            #if nEC>=18
-            SFP17ptr = wmhSFP17 + offset;
-            #endif
-            #if nEC>=19
-            SFP18ptr = wmhSFP18 + offset;
-            #endif
-            #if nEC>=20
-            SFP19ptr = wmhSFP19 + offset;
-            #endif
-
-            while( Yptr != YptrEnd )
-                (*Yptr++) += (
-                      x0 * (*SFP0ptr++)
-                    #if nEC>=2
-                    + x1 * (*SFP1ptr++)
-                    #endif
-                    #if nEC>=3
-                    + x2 * (*SFP2ptr++)
-                    #endif
-                    #if nEC>=4
-                    + x3 * (*SFP3ptr++)
-                    #endif
-                    #if nEC>=5
-                    + x4 * (*SFP4ptr++)
-                    #endif
-                    #if nEC>=6
-                    + x5 * (*SFP5ptr++)
-                    #endif
-                    #if nEC>=7
-                    + x6 * (*SFP6ptr++)
-                    #endif
-                    #if nEC>=8
-                    + x7 * (*SFP7ptr++)
-                    #endif
-                    #if nEC>=9
-                    + x8 * (*SFP8ptr++)
-                    #endif
-                    #if nEC>=10
-                    + x9 * (*SFP9ptr++)
-                    #endif
-                    #if nEC>=11
-                    + x10 * (*SFP10ptr++)
-                    #endif
-                    #if nEC>=12
-                    + x11 * (*SFP11ptr++)
-                    #endif
-                    #if nEC>=13
-                    + x12 * (*SFP12ptr++)
-                    #endif
-                    #if nEC>=14
-                    + x13 * (*SFP13ptr++)
-                    #endif
-                    #if nEC>=15
-                    + x14 * (*SFP14ptr++)
-                    #endif
-                    #if nEC>=16
-                    + x15 * (*SFP15ptr++)
-                    #endif
-                    #if nEC>=17
-                    + x16 * (*SFP16ptr++)
-                    #endif
-                    #if nEC>=18
-                    + x17 * (*SFP17ptr++)
-                    #endif
-                    #if nEC>=19
-                    + x18 * (*SFP18ptr++)
-                    #endif
-                    #if nEC>=20
-                    + x19 * (*SFP19ptr++)
-                    #endif
-
-                );
-        }
-        t_v++;
-        t_o++;
-    }
-#endif
-
-#if nISO>=1
-    // isotropic compartments
-    t_v    = ISOv + ISOthreads[id];
-    t_vEnd = ISOv + ISOthreads[id+1];
-
-    x_Ptr0 = x + nIC*nF + nEC*nE + ISOthreads[id];
-    #if nISO>=2
-    x_Ptr1 = x_Ptr0 + nV;
-    #endif
-    #if nISO>=3
-    x_Ptr2 = x_Ptr1 + nV;
-    #endif
-    #if nISO>=4
-    x_Ptr3 = x_Ptr2 + nV;
-    #endif
-    #if nISO>=5
-    x_Ptr4 = x_Ptr3 + nV;
-    #endif
-    #if nISO>=6
-    x_Ptr5 = x_Ptr4 + nV;
-    #endif
-    #if nISO>=7
-    x_Ptr6 = x_Ptr5 + nV;
-    #endif
-    #if nISO>=8
-    x_Ptr7 = x_Ptr6 + nV;
-    #endif
-    #if nISO>=9
-    x_Ptr8 = x_Ptr7 + nV;
-    #endif
-    #if nISO>=10
-    x_Ptr9 = x_Ptr8 + nV;
-    #endif
-    #if nISO>=11
-    x_Ptr10 = x_Ptr9 + nV;
-    #endif
-    #if nISO>=12
-    x_Ptr11 = x_Ptr10 + nV;
-    #endif
-    #if nISO>=13
-    x_Ptr12 = x_Ptr11 + nV;
-    #endif
-    #if nISO>=14
-    x_Ptr13 = x_Ptr12 + nV;
-    #endif
-    #if nISO>=15
-    x_Ptr14 = x_Ptr13 + nV;
-    #endif
-    #if nISO>=16
-    x_Ptr15 = x_Ptr14 + nV;
-    #endif
-    #if nISO>=17
-    x_Ptr16 = x_Ptr15 + nV;
-    #endif
-    #if nISO>=18
-    x_Ptr17 = x_Ptr16 + nV;
-    #endif
-    #if nISO>=19
-    x_Ptr18 = x_Ptr17 + nV;
-    #endif
-    #if nISO>=20
-    x_Ptr19 = x_Ptr18 + nV;
-    #endif
-
-    while( t_v != t_vEnd )
-    {
-        x0 = *x_Ptr0++;
-        #if nISO>=2
-        x1 = *x_Ptr1++;
-        #endif
-        #if nISO>=3
-        x2 = *x_Ptr2++;
-        #endif
-        #if nISO>=4
-        x3 = *x_Ptr3++;
-        #endif
-        #if nISO>=5
-        x4 = *x_Ptr4++;
-        #endif
-        #if nISO>=6
-        x5 = *x_Ptr5++;
-        #endif
-        #if nISO>=7
-        x6 = *x_Ptr6++;
-        #endif
-        #if nISO>=8
-        x7 = *x_Ptr7++;
-        #endif
-        #if nISO>=9
-        x8 = *x_Ptr8++;
-        #endif
-        #if nISO>=10
-        x9 = *x_Ptr9++;
-        #endif
-        #if nISO>=11
-        x10 = *x_Ptr10++;
-        #endif
-        #if nISO>=12
-        x11 = *x_Ptr11++;
-        #endif
-        #if nISO>=13
-        x12 = *x_Ptr12++;
-        #endif
-        #if nISO>=14
-        x13 = *x_Ptr13++;
-        #endif
-        #if nISO>=15
-        x14 = *x_Ptr14++;
-        #endif
-        #if nISO>=16
-        x15 = *x_Ptr15++;
-        #endif
-        #if nISO>=17
-        x16 = *x_Ptr16++;
-        #endif
-        #if nISO>=18
-        x17 = *x_Ptr17++;
-        #endif
-        #if nISO>=19
-        x18 = *x_Ptr18++;
-        #endif
-        #if nISO>=20
-        x19 = *x_Ptr19++;
-        #endif
-
-        if (
-               x0 != 0
-            #if nISO>=2
-            || x1 != 0
-            #endif
-            #if nISO>=3
-            || x2 != 0
-            #endif
-            #if nISO>=4
-            || x3 != 0
-            #endif
-            #if nISO>=5
-            || x4 != 0
-            #endif
-            #if nISO>=6
-            || x5 != 0
-            #endif
-            #if nISO>=7
-            || x6 != 0
-            #endif
-            #if nISO>=8
-            || x7 != 0
-            #endif
-            #if nISO>=9
-            || x8 != 0
-            #endif
-            #if nISO>=10
-            || x9 != 0
-            #endif
-            #if nISO>=11
-            || x10 != 0
-            #endif
-            #if nISO>=12
-            || x11 != 0
-            #endif
-            #if nISO>=13
-            || x12 != 0
-            #endif
-            #if nISO>=14
-            || x13 != 0
-            #endif
-            #if nISO>=15
-            || x14 != 0
-            #endif
-            #if nISO>=16
-            || x15 != 0
-            #endif
-            #if nISO>=17
-            || x16 != 0
-            #endif
-            #if nISO>=18
-            || x17 != 0
-            #endif
-            #if nISO>=19
-            || x18 != 0
-            #endif
-            #if nISO>=20
-            || x19 != 0
-            #endif
-          )
-        {
-            Yptr    = Y    + nS * (*t_v);
-            YptrEnd = Yptr + nS;
-            SFP0ptr = isoSFP0;
-            #if nISO>=2
-            SFP1ptr = isoSFP1;
-            #endif
-            #if nISO>=3
-            SFP2ptr = isoSFP2;
-            #endif
-            #if nISO>=4
-            SFP3ptr = isoSFP3;
-            #endif
-            #if nISO>=5
-            SFP4ptr = isoSFP4;
-            #endif
-            #if nISO>=6
-            SFP5ptr = isoSFP5;
-            #endif
-            #if nISO>=7
-            SFP6ptr = isoSFP6;
-            #endif
-            #if nISO>=8
-            SFP7ptr = isoSFP7;
-            #endif
-            #if nISO>=9
-            SFP8ptr = isoSFP8;
-            #endif
-            #if nISO>=10
-            SFP9ptr = isoSFP9;
-            #endif
-            #if nISO>=11
-            SFP10ptr = isoSFP10;
-            #endif
-            #if nISO>=12
-            SFP11ptr = isoSFP11;
-            #endif
-            #if nISO>=13
-            SFP12ptr = isoSFP12;
-            #endif
-            #if nISO>=14
-            SFP13ptr = isoSFP13;
-            #endif
-            #if nISO>=15
-            SFP14ptr = isoSFP14;
-            #endif
-            #if nISO>=16
-            SFP15ptr = isoSFP15;
-            #endif
-            #if nISO>=17
-            SFP16ptr = isoSFP16;
-            #endif
-            #if nISO>=18
-            SFP17ptr = isoSFP17;
-            #endif
-            #if nISO>=19
-            SFP18ptr = isoSFP18;
-            #endif
-            #if nISO>=20
-            SFP19ptr = isoSFP19;
-            #endif
-
-            while( Yptr != YptrEnd )
-                (*Yptr++) += (
-                      x0 * (*SFP0ptr++)
-                    #if nISO>=2
-                    + x1 * (*SFP1ptr++)
-                    #endif
-                    #if nISO>=3
-                    + x2 * (*SFP2ptr++)
-                    #endif
-                    #if nISO>=4
-                    + x3 * (*SFP3ptr++)
-                    #endif
-                    #if nISO>=5
-                    + x4 * (*SFP4ptr++)
-                    #endif
-                    #if nISO>=6
-                    + x5 * (*SFP5ptr++)
-                    #endif
-                    #if nISO>=7
-                    + x6 * (*SFP6ptr++)
-                    #endif
-                    #if nISO>=8
-                    + x7 * (*SFP7ptr++)
-                    #endif
-                    #if nISO>=9
-                    + x8 * (*SFP8ptr++)
-                    #endif
-                    #if nISO>=10
-                    + x9 * (*SFP9ptr++)
-                    #endif
-                    #if nISO>=11
-                    + x10 * (*SFP10ptr++)
-                    #endif
-                    #if nISO>=12
-                    + x11 * (*SFP11ptr++)
-                    #endif
-                    #if nISO>=13
-                    + x12 * (*SFP12ptr++)
-                    #endif
-                    #if nISO>=14
-                    + x13 * (*SFP13ptr++)
-                    #endif
-                    #if nISO>=15
-                    + x14 * (*SFP14ptr++)
-                    #endif
-                    #if nISO>=16
-                    + x15 * (*SFP15ptr++)
-                    #endif
-                    #if nISO>=17
-                    + x16 * (*SFP16ptr++)
-                    #endif
-                    #if nISO>=18
-                    + x17 * (*SFP17ptr++)
-                    #endif
-                    #if nISO>=19
-                    + x18 * (*SFP18ptr++)
-                    #endif
-                    #if nISO>=20
-                    + x19 * (*SFP19ptr++)
-                    #endif
-                );
-        }
-        t_v++;
-    }
-#endif
-
-    pthread_exit( 0 );
-}
-
-
-// =========================
-// Function called by CYTHON
-// =========================
-void COMMIT_A(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_vIN, double *_vOUT,
-    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
-    uint32_t *_ECv, uint16_t *_ECo,
-    uint32_t *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads
-)
-{
-    nF = _nF;
-    n  = _n;
-    nE = _nE;
-    nV = _nV;
-    nS = _nS;
-    ndirs = _ndirs;
-
-    x = _vIN;
-    Y = _vOUT;
-
-    ICf  = _ICf;
-    ICv  = _ICv;
-    ICo  = _ICo;
-    ICl  = _ICl;
-    ECv  = _ECv;
-    ECo  = _ECo;
-    ISOv = _ISOv;
-
-    #if nIC>=1
-    wmrSFP0 = _wmrSFP;
-    #if nIC>=2
-    wmrSFP1 = wmrSFP0 + _ndirs*_nS;
-    #if nIC>=3
-    wmrSFP2 = wmrSFP1 + _ndirs*_nS;
-    #if nIC>=4
-    wmrSFP3 = wmrSFP2 + _ndirs*_nS;
-    #if nIC>=5
-    wmrSFP4 = wmrSFP3 + _ndirs*_nS;
-    #if nIC>=6
-    wmrSFP5 = wmrSFP4 + _ndirs*_nS;
-    #if nIC>=7
-    wmrSFP6 = wmrSFP5 + _ndirs*_nS;
-    #if nIC>=8
-    wmrSFP7 = wmrSFP6 + _ndirs*_nS;
-    #if nIC>=9
-    wmrSFP8 = wmrSFP7 + _ndirs*_nS;
-    #if nIC>=10
-    wmrSFP9 = wmrSFP8 + _ndirs*_nS;
-    #if nIC>=11
-    wmrSFP10 = wmrSFP9 + _ndirs*_nS;
-    #if nIC>=12
-    wmrSFP11 = wmrSFP10 + _ndirs*_nS;
-    #if nIC>=13
-    wmrSFP12 = wmrSFP11 + _ndirs*_nS;
-    #if nIC>=14
-    wmrSFP13 = wmrSFP12 + _ndirs*_nS;
-    #if nIC>=15
-    wmrSFP14 = wmrSFP13 + _ndirs*_nS;
-    #if nIC>=16
-    wmrSFP15 = wmrSFP14 + _ndirs*_nS;
-    #if nIC>=17
-    wmrSFP16 = wmrSFP15 + _ndirs*_nS;
-    #if nIC>=18
-    wmrSFP17 = wmrSFP16 + _ndirs*_nS;
-    #if nIC>=19
-    wmrSFP18 = wmrSFP17 + _ndirs*_nS;
-    #if nIC>=20
-    wmrSFP19 = wmrSFP18 + _ndirs*_nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #if nEC>=1
-    wmhSFP0 = _wmhSFP;
-    #if nEC>=2
-    wmhSFP1 = wmhSFP0 + _ndirs*_nS;
-    #if nEC>=3
-    wmhSFP2 = wmhSFP1 + _ndirs*_nS;
-    #if nEC>=4
-    wmhSFP3 = wmhSFP2 + _ndirs*_nS;
-    #if nEC>=5
-    wmhSFP4 = wmhSFP3 + _ndirs*_nS;
-    #if nEC>=6
-    wmhSFP5 = wmhSFP4 + _ndirs*_nS;
-    #if nEC>=7
-    wmhSFP6 = wmhSFP5 + _ndirs*_nS;
-    #if nEC>=8
-    wmhSFP7 = wmhSFP6 + _ndirs*_nS;
-    #if nEC>=9
-    wmhSFP8 = wmhSFP7 + _ndirs*_nS;
-    #if nEC>=10
-    wmhSFP9 = wmhSFP8 + _ndirs*_nS;
-    #if nEC>=11
-    wmhSFP10 = wmhSFP9 + _ndirs*_nS;
-    #if nEC>=12
-    wmhSFP11 = wmhSFP10 + _ndirs*_nS;
-    #if nEC>=13
-    wmhSFP12 = wmhSFP11 + _ndirs*_nS;
-    #if nEC>=14
-    wmhSFP13 = wmhSFP12 + _ndirs*_nS;
-    #if nEC>=15
-    wmhSFP14 = wmhSFP13 + _ndirs*_nS;
-    #if nEC>=16
-    wmhSFP15 = wmhSFP14 + _ndirs*_nS;
-    #if nEC>=17
-    wmhSFP16 = wmhSFP15 + _ndirs*_nS;
-    #if nEC>=18
-    wmhSFP17 = wmhSFP16 + _ndirs*_nS;
-    #if nEC>=19
-    wmhSFP18 = wmhSFP17 + _ndirs*_nS;
-    #if nEC>=20
-    wmhSFP19 = wmhSFP18 + _ndirs*_nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #if nISO>=1
-    isoSFP0 = _isoSFP;
-    #if nISO>=2
-    isoSFP1 = isoSFP0 + _nS;
-    #if nISO>=3
-    isoSFP2 = isoSFP1 + _nS;
-    #if nISO>=4
-    isoSFP3 = isoSFP2 + _nS;
-    #if nISO>=5
-    isoSFP4 = isoSFP3 + _nS;
-    #if nISO>=6
-    isoSFP5 = isoSFP4 + _nS;
-    #if nISO>=7
-    isoSFP6 = isoSFP5 + _nS;
-    #if nISO>=8
-    isoSFP7 = isoSFP6 + _nS;
-    #if nISO>=9
-    isoSFP8 = isoSFP7 + _nS;
-    #if nISO>=10
-    isoSFP9 = isoSFP8 + _nS;
-    #if nISO>=11
-    isoSFP10 = isoSFP9 + _nS;
-    #if nISO>=12
-    isoSFP11 = isoSFP10 + _nS;
-    #if nISO>=13
-    isoSFP12 = isoSFP11 + _nS;
-    #if nISO>=14
-    isoSFP13 = isoSFP12 + _nS;
-    #if nISO>=15
-    isoSFP14 = isoSFP13 + _nS;
-    #if nISO>=16
-    isoSFP15 = isoSFP14 + _nS;
-    #if nISO>=17
-    isoSFP16 = isoSFP15 + _nS;
-    #if nISO>=18
-    isoSFP17 = isoSFP16 + _nS;
-    #if nISO>=19
-    isoSFP18 = isoSFP17 + _nS;
-    #if nISO>=20
-    isoSFP19 = isoSFP18 + _nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-
-    ICthreads  = _ICthreads;
-    ECthreads  = _ECthreads;
-    ISOthreads = _ISOthreads;
-
-    // Run SEPARATE THREADS to perform the multiplication
-    pthread_t threads[nTHREADS];
-    int t;
-    for(t=0; t<nTHREADS ; t++)
-        pthread_create( &threads[t], NULL, COMMIT_A__block, (void *) (long int)t );
-    for(t=0; t<nTHREADS ; t++)
-        pthread_join( threads[t], NULL );
-    return;
-}
-
-
-
-/* ===================================================== */
-/* Compute a sub-block of the A'*y MAtRIX-VECTOR product */
-/* ===================================================== */
-void* COMMIT_At__block( void *ptr )
-{
-    int      id = (long)ptr;
-    int      offset;
-    double   x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w, Y_tmp;
-    double   *x_Ptr0, *x_Ptr1, *x_Ptr2, *x_Ptr3, *x_Ptr4, *x_Ptr5, *x_Ptr6, *x_Ptr7, *x_Ptr8, *x_Ptr9, *x_Ptr10, *x_Ptr11, *x_Ptr12, *x_Ptr13, *x_Ptr14, *x_Ptr15, *x_Ptr16, *x_Ptr17, *x_Ptr18, *x_Ptr19;
-    double   *Yptr, *YptrEnd;
-    float    *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr;
-    uint32_t *t_v, *t_vEnd, *t_f;
-    uint16_t *t_o;
-    float    *t_l;
-    uint8_t  *t_t;
-
-#if nIC>=1
-    // intra-cellular compartments
-    t_v    = ICv;
-    t_vEnd = ICv + n;
-    t_o    = ICo;
-    t_l    = ICl;
-    t_f    = ICf;
-    t_t    = ICthreadsT;
-
-    while( t_v != t_vEnd )
-    {
-        // in this case, I need to walk throug because the segments are ordered in "voxel order"
-        if ( *t_t == id )
-        {
-            Yptr    = Y    + nS * (*t_v);
-            YptrEnd = Yptr + nS;
-            offset  = nS * (*t_o);
-
-            Y_tmp = *Yptr;
-            SFP0ptr   = wmrSFP0 + offset;
-            x0 = (*SFP0ptr++) * Y_tmp;
-            #if nIC>=2
-            SFP1ptr   = wmrSFP1 + offset;
-            x1 = (*SFP1ptr++) * Y_tmp;
-            #endif
-            #if nIC>=3
-            SFP2ptr   = wmrSFP2 + offset;
-            x2 = (*SFP2ptr++) * Y_tmp;
-            #endif
-            #if nIC>=4
-            SFP3ptr   = wmrSFP3 + offset;
-            x3 = (*SFP3ptr++) * Y_tmp;
-            #endif
-            #if nIC>=5
-            SFP4ptr   = wmrSFP4 + offset;
-            x4 = (*SFP4ptr++) * Y_tmp;
-            #endif
-            #if nIC>=6
-            SFP5ptr   = wmrSFP5 + offset;
-            x5 = (*SFP5ptr++) * Y_tmp;
-            #endif
-            #if nIC>=7
-            SFP6ptr   = wmrSFP6 + offset;
-            x6 = (*SFP6ptr++) * Y_tmp;
-            #endif
-            #if nIC>=8
-            SFP7ptr   = wmrSFP7 + offset;
-            x7 = (*SFP7ptr++) * Y_tmp;
-            #endif
-            #if nIC>=9
-            SFP8ptr   = wmrSFP8 + offset;
-            x8 = (*SFP8ptr++) * Y_tmp;
-            #endif
-            #if nIC>=10
-            SFP9ptr   = wmrSFP9 + offset;
-            x9 = (*SFP9ptr++) * Y_tmp;
-            #endif
-            #if nIC>=11
-            SFP10ptr   = wmrSFP10 + offset;
-            x10 = (*SFP10ptr++) * Y_tmp;
-            #endif
-            #if nIC>=12
-            SFP11ptr   = wmrSFP11 + offset;
-            x11 = (*SFP11ptr++) * Y_tmp;
-            #endif
-            #if nIC>=13
-            SFP12ptr   = wmrSFP12 + offset;
-            x12 = (*SFP12ptr++) * Y_tmp;
-            #endif
-            #if nIC>=14
-            SFP13ptr   = wmrSFP13 + offset;
-            x13 = (*SFP13ptr++) * Y_tmp;
-            #endif
-            #if nIC>=15
-            SFP14ptr   = wmrSFP14 + offset;
-            x14 = (*SFP14ptr++) * Y_tmp;
-            #endif
-            #if nIC>=16
-            SFP15ptr   = wmrSFP15 + offset;
-            x15 = (*SFP15ptr++) * Y_tmp;
-            #endif
-            #if nIC>=17
-            SFP16ptr   = wmrSFP16 + offset;
-            x16 = (*SFP16ptr++) * Y_tmp;
-            #endif
-            #if nIC>=18
-            SFP17ptr   = wmrSFP17 + offset;
-            x17 = (*SFP17ptr++) * Y_tmp;
-            #endif
-            #if nIC>=19
-            SFP18ptr   = wmrSFP18 + offset;
-            x18 = (*SFP18ptr++) * Y_tmp;
-            #endif
-            #if nIC>=20
-            SFP19ptr   = wmrSFP19 + offset;
-            x19 = (*SFP19ptr++) * Y_tmp;
-            #endif
-
-            while( ++Yptr != YptrEnd )
-            {
-                Y_tmp = *Yptr;
-                x0 += (*SFP0ptr++) * Y_tmp;
-                #if nIC>=2
-                x1 += (*SFP1ptr++) * Y_tmp;
-                #endif
-                #if nIC>=3
-                x2 += (*SFP2ptr++) * Y_tmp;
-                #endif
-                #if nIC>=4
-                x3 += (*SFP3ptr++) * Y_tmp;
-                #endif
-                #if nIC>=5
-                x4 += (*SFP4ptr++) * Y_tmp;
-                #endif
-                #if nIC>=6
-                x5 += (*SFP5ptr++) * Y_tmp;
-                #endif
-                #if nIC>=7
-                x6 += (*SFP6ptr++) * Y_tmp;
-                #endif
-                #if nIC>=8
-                x7 += (*SFP7ptr++) * Y_tmp;
-                #endif
-                #if nIC>=9
-                x8 += (*SFP8ptr++) * Y_tmp;
-                #endif
-                #if nIC>=10
-                x9 += (*SFP9ptr++) * Y_tmp;
-                #endif
-                #if nIC>=11
-                x10 += (*SFP10ptr++) * Y_tmp;
-                #endif
-                #if nIC>=12
-                x11 += (*SFP11ptr++) * Y_tmp;
-                #endif
-                #if nIC>=13
-                x12 += (*SFP12ptr++) * Y_tmp;
-                #endif
-                #if nIC>=14
-                x13 += (*SFP13ptr++) * Y_tmp;
-                #endif
-                #if nIC>=15
-                x14 += (*SFP14ptr++) * Y_tmp;
-                #endif
-                #if nIC>=16
-                x15 += (*SFP15ptr++) * Y_tmp;
-                #endif
-                #if nIC>=17
-                x16 += (*SFP16ptr++) * Y_tmp;
-                #endif
-                #if nIC>=18
-                x17 += (*SFP17ptr++) * Y_tmp;
-                #endif
-                #if nIC>=19
-                x18 += (*SFP18ptr++) * Y_tmp;
-                #endif
-                #if nIC>=20
-                x19 += (*SFP19ptr++) * Y_tmp;
-                #endif
-            }
-
-            w = (double)(*t_l);
-            x[*t_f]      += w * x0;
-            #if nIC>=2
-            x[*t_f+nF]   += w * x1;
-            #endif
-            #if nIC>=3
-            x[*t_f+2*nF] += w * x2;
-            #endif
-            #if nIC>=4
-            x[*t_f+3*nF] += w * x3;
-            #endif
-            #if nIC>=5
-            x[*t_f+4*nF] += w * x4;
-            #endif
-            #if nIC>=6
-            x[*t_f+5*nF] += w * x5;
-            #endif
-            #if nIC>=7
-            x[*t_f+6*nF] += w * x6;
-            #endif
-            #if nIC>=8
-            x[*t_f+7*nF] += w * x7;
-            #endif
-            #if nIC>=9
-            x[*t_f+8*nF] += w * x8;
-            #endif
-            #if nIC>=10
-            x[*t_f+9*nF] += w * x9;
-            #endif
-            #if nIC>=11
-            x[*t_f+10*nF] += w * x10;
-            #endif
-            #if nIC>=12
-            x[*t_f+11*nF] += w * x11;
-            #endif
-            #if nIC>=13
-            x[*t_f+12*nF] += w * x12;
-            #endif
-            #if nIC>=14
-            x[*t_f+13*nF] += w * x13;
-            #endif
-            #if nIC>=15
-            x[*t_f+14*nF] += w * x14;
-            #endif
-            #if nIC>=16
-            x[*t_f+15*nF] += w * x15;
-            #endif
-            #if nIC>=17
-            x[*t_f+16*nF] += w * x16;
-            #endif
-            #if nIC>=18
-            x[*t_f+17*nF] += w * x17;
-            #endif
-            #if nIC>=19
-            x[*t_f+18*nF] += w * x18;
-            #endif
-            #if nIC>=20
-            x[*t_f+19*nF] += w * x19;
-            #endif
-        }
-
-        t_f++;
-        t_v++;
-        t_o++;
-        t_l++;
-        t_t++;
-    }
-#endif
-
-#if nEC>=1
-    // extra-cellular compartments
-    t_v    = ECv + ECthreadsT[id];
-    t_vEnd = ECv + ECthreadsT[id+1];
-    t_o    = ECo + ECthreadsT[id];
-
-    x_Ptr0 = x + nIC*nF + ECthreadsT[id];
-    #if nEC>=2
-    x_Ptr1 = x_Ptr0 + nE;
-    #endif
-    #if nEC>=3
-    x_Ptr2 = x_Ptr1 + nE;
-    #endif
-    #if nEC>=4
-    x_Ptr3 = x_Ptr2 + nE;
-    #endif
-    #if nEC>=5
-    x_Ptr4 = x_Ptr3 + nE;
-    #endif
-    #if nEC>=6
-    x_Ptr5 = x_Ptr4 + nE;
-    #endif
-    #if nEC>=7
-    x_Ptr6 = x_Ptr5 + nE;
-    #endif
-    #if nEC>=8
-    x_Ptr7 = x_Ptr6 + nE;
-    #endif
-    #if nEC>=9
-    x_Ptr8 = x_Ptr7 + nE;
-    #endif
-    #if nEC>=10
-    x_Ptr9 = x_Ptr8 + nE;
-    #endif
-    #if nEC>=11
-    x_Ptr10 = x_Ptr9 + nE;
-    #endif
-    #if nEC>=12
-    x_Ptr11 = x_Ptr10 + nE;
-    #endif
-    #if nEC>=13
-    x_Ptr12 = x_Ptr11 + nE;
-    #endif
-    #if nEC>=14
-    x_Ptr13 = x_Ptr12 + nE;
-    #endif
-    #if nEC>=15
-    x_Ptr14 = x_Ptr13 + nE;
-    #endif
-    #if nEC>=16
-    x_Ptr15 = x_Ptr14 + nE;
-    #endif
-    #if nEC>=17
-    x_Ptr16 = x_Ptr15 + nE;
-    #endif
-    #if nEC>=18
-    x_Ptr17 = x_Ptr16 + nE;
-    #endif
-    #if nEC>=19
-    x_Ptr18 = x_Ptr17 + nE;
-    #endif
-    #if nEC>=20
-    x_Ptr19 = x_Ptr18 + nE;
-    #endif
-
-    while( t_v != t_vEnd )
-    {
-        Yptr    = Y    + nS * (*t_v++);
-        YptrEnd = Yptr + nS;
-        offset  = nS * (*t_o++);
-
-        Y_tmp = *Yptr;
-        SFP0ptr = wmhSFP0 + offset;
-        x0 = (*SFP0ptr++) * Y_tmp;
-        #if nEC>=2
-        SFP1ptr = wmhSFP1 + offset;
-        x1 = (*SFP1ptr++) * Y_tmp;
-        #endif
-        #if nEC>=3
-        SFP2ptr = wmhSFP2 + offset;
-        x2 = (*SFP2ptr++) * Y_tmp;
-        #endif
-        #if nEC>=4
-        SFP3ptr = wmhSFP3 + offset;
-        x3 = (*SFP3ptr++) * Y_tmp;
-        #endif
-        #if nEC>=5
-        SFP4ptr = wmhSFP4 + offset;
-        x4 = (*SFP4ptr++) * Y_tmp;
-        #endif
-        #if nEC>=6
-        SFP5ptr = wmhSFP5 + offset;
-        x5 = (*SFP5ptr++) * Y_tmp;
-        #endif
-        #if nEC>=7
-        SFP6ptr = wmhSFP6 + offset;
-        x6 = (*SFP6ptr++) * Y_tmp;
-        #endif
-        #if nEC>=8
-        SFP7ptr = wmhSFP7 + offset;
-        x7 = (*SFP7ptr++) * Y_tmp;
-        #endif
-        #if nEC>=9
-        SFP8ptr = wmhSFP8 + offset;
-        x8 = (*SFP8ptr++) * Y_tmp;
-        #endif
-        #if nEC>=10
-        SFP9ptr = wmhSFP9 + offset;
-        x9 = (*SFP9ptr++) * Y_tmp;
-        #endif
-        #if nEC>=11
-        SFP10ptr = wmhSFP10 + offset;
-        x10 = (*SFP10ptr++) * Y_tmp;
-        #endif
-        #if nEC>=12
-        SFP11ptr = wmhSFP11 + offset;
-        x11 = (*SFP11ptr++) * Y_tmp;
-        #endif
-        #if nEC>=13
-        SFP12ptr = wmhSFP12 + offset;
-        x12 = (*SFP12ptr++) * Y_tmp;
-        #endif
-        #if nEC>=14
-        SFP13ptr = wmhSFP13 + offset;
-        x13 = (*SFP13ptr++) * Y_tmp;
-        #endif
-        #if nEC>=15
-        SFP14ptr = wmhSFP14 + offset;
-        x14 = (*SFP14ptr++) * Y_tmp;
-        #endif
-        #if nEC>=16
-        SFP15ptr = wmhSFP15 + offset;
-        x15 = (*SFP15ptr++) * Y_tmp;
-        #endif
-        #if nEC>=17
-        SFP16ptr = wmhSFP16 + offset;
-        x16 = (*SFP16ptr++) * Y_tmp;
-        #endif
-        #if nEC>=18
-        SFP17ptr = wmhSFP17 + offset;
-        x17 = (*SFP17ptr++) * Y_tmp;
-        #endif
-        #if nEC>=19
-        SFP18ptr = wmhSFP18 + offset;
-        x18 = (*SFP18ptr++) * Y_tmp;
-        #endif
-        #if nEC>=20
-        SFP19ptr = wmhSFP19 + offset;
-        x19 = (*SFP19ptr++) * Y_tmp;
-        #endif
-
-        while( ++Yptr != YptrEnd )
-        {
-            Y_tmp = *Yptr;
-            x0 += (*SFP0ptr++) * Y_tmp;
-            #if nEC>=2
-            x1 += (*SFP1ptr++) * Y_tmp;
-            #endif
-            #if nEC>=3
-            x2 += (*SFP2ptr++) * Y_tmp;
-            #endif
-            #if nEC>=4
-            x3 += (*SFP3ptr++) * Y_tmp;
-            #endif
-            #if nEC>=5
-            x4 += (*SFP4ptr++) * Y_tmp;
-            #endif
-            #if nEC>=6
-            x5 += (*SFP5ptr++) * Y_tmp;
-            #endif
-            #if nEC>=7
-            x6 += (*SFP6ptr++) * Y_tmp;
-            #endif
-            #if nEC>=8
-            x7 += (*SFP7ptr++) * Y_tmp;
-            #endif
-            #if nEC>=9
-            x8 += (*SFP8ptr++) * Y_tmp;
-            #endif
-            #if nEC>=10
-            x9 += (*SFP9ptr++) * Y_tmp;
-            #endif
-            #if nEC>=11
-            x10 += (*SFP10ptr++) * Y_tmp;
-            #endif
-            #if nEC>=12
-            x11 += (*SFP11ptr++) * Y_tmp;
-            #endif
-            #if nEC>=13
-            x12 += (*SFP12ptr++) * Y_tmp;
-            #endif
-            #if nEC>=14
-            x13 += (*SFP13ptr++) * Y_tmp;
-            #endif
-            #if nEC>=15
-            x14 += (*SFP14ptr++) * Y_tmp;
-            #endif
-            #if nEC>=16
-            x15 += (*SFP15ptr++) * Y_tmp;
-            #endif
-            #if nEC>=17
-            x16 += (*SFP16ptr++) * Y_tmp;
-            #endif
-            #if nEC>=18
-            x17 += (*SFP17ptr++) * Y_tmp;
-            #endif
-            #if nEC>=19
-            x18 += (*SFP18ptr++) * Y_tmp;
-            #endif
-            #if nEC>=20
-            x19 += (*SFP19ptr++) * Y_tmp;
-            #endif
-        }
-        (*x_Ptr0++) += x0;
-        #if nEC>=2
-        (*x_Ptr1++) += x1;
-        #endif
-        #if nEC>=3
-        (*x_Ptr2++) += x2;
-        #endif
-        #if nEC>=4
-        (*x_Ptr3++) += x3;
-        #endif
-        #if nEC>=5
-        (*x_Ptr4++) += x4;
-        #endif
-        #if nEC>=6
-        (*x_Ptr5++) += x5;
-        #endif
-        #if nEC>=7
-        (*x_Ptr6++) += x6;
-        #endif
-        #if nEC>=8
-        (*x_Ptr7++) += x7;
-        #endif
-        #if nEC>=9
-        (*x_Ptr8++) += x8;
-        #endif
-        #if nEC>=10
-        (*x_Ptr9++) += x9;
-        #endif
-        #if nEC>=11
-        (*x_Ptr10++) += x10;
-        #endif
-        #if nEC>=12
-        (*x_Ptr11++) += x11;
-        #endif
-        #if nEC>=13
-        (*x_Ptr12++) += x12;
-        #endif
-        #if nEC>=14
-        (*x_Ptr13++) += x13;
-        #endif
-        #if nEC>=15
-        (*x_Ptr14++) += x14;
-        #endif
-        #if nEC>=16
-        (*x_Ptr15++) += x15;
-        #endif
-        #if nEC>=17
-        (*x_Ptr16++) += x16;
-        #endif
-        #if nEC>=18
-        (*x_Ptr17++) += x17;
-        #endif
-        #if nEC>=19
-        (*x_Ptr18++) += x18;
-        #endif
-        #if nEC>=20
-        (*x_Ptr19++) += x19;
-        #endif
-    }
-#endif
-
-#if nISO>=1
-    // isotropic compartments
-    t_v    = ISOv + ISOthreadsT[id];
-    t_vEnd = ISOv + ISOthreadsT[id+1];
-
-    x_Ptr0 = x + nIC*nF + nEC*nE + ISOthreadsT[id];
-    #if nISO>=2
-    x_Ptr1 = x_Ptr0 + nV;
-    #endif
-    #if nISO>=3
-    x_Ptr2 = x_Ptr1 + nV;
-    #endif
-    #if nISO>=4
-    x_Ptr3 = x_Ptr2 + nV;
-    #endif
-    #if nISO>=5
-    x_Ptr4 = x_Ptr3 + nV;
-    #endif
-    #if nISO>=6
-    x_Ptr5 = x_Ptr4 + nV;
-    #endif
-    #if nISO>=7
-    x_Ptr6 = x_Ptr5 + nV;
-    #endif
-    #if nISO>=8
-    x_Ptr7 = x_Ptr6 + nV;
-    #endif
-    #if nISO>=9
-    x_Ptr8 = x_Ptr7 + nV;
-    #endif
-    #if nISO>=10
-    x_Ptr9 = x_Ptr8 + nV;
-    #endif
-    #if nISO>=11
-    x_Ptr10 = x_Ptr9 + nV;
-    #endif
-    #if nISO>=12
-    x_Ptr11 = x_Ptr10 + nV;
-    #endif
-    #if nISO>=13
-    x_Ptr12 = x_Ptr11 + nV;
-    #endif
-    #if nISO>=14
-    x_Ptr13 = x_Ptr12 + nV;
-    #endif
-    #if nISO>=15
-    x_Ptr14 = x_Ptr13 + nV;
-    #endif
-    #if nISO>=16
-    x_Ptr15 = x_Ptr14 + nV;
-    #endif
-    #if nISO>=17
-    x_Ptr16 = x_Ptr15 + nV;
-    #endif
-    #if nISO>=18
-    x_Ptr17 = x_Ptr16 + nV;
-    #endif
-    #if nISO>=19
-    x_Ptr18 = x_Ptr17 + nV;
-    #endif
-    #if nISO>=20
-    x_Ptr19 = x_Ptr18 + nV;
-    #endif
-
-    while( t_v != t_vEnd )
-    {
-        Yptr    = Y    + nS * (*t_v++);
-        YptrEnd = Yptr + nS;
-
-        SFP0ptr = isoSFP0;
-        #if nISO>=2
-        SFP1ptr = isoSFP1;
-        #endif
-        #if nISO>=3
-        SFP2ptr = isoSFP2;
-        #endif
-        #if nISO>=4
-        SFP3ptr = isoSFP3;
-        #endif
-        #if nISO>=5
-        SFP4ptr = isoSFP4;
-        #endif
-        #if nISO>=6
-        SFP5ptr = isoSFP5;
-        #endif
-        #if nISO>=7
-        SFP6ptr = isoSFP6;
-        #endif
-        #if nISO>=8
-        SFP7ptr = isoSFP7;
-        #endif
-        #if nISO>=9
-        SFP8ptr = isoSFP8;
-        #endif
-        #if nISO>=10
-        SFP9ptr = isoSFP9;
-        #endif
-        #if nISO>=11
-        SFP10ptr = isoSFP10;
-        #endif
-        #if nISO>=12
-        SFP11ptr = isoSFP11;
-        #endif
-        #if nISO>=13
-        SFP12ptr = isoSFP12;
-        #endif
-        #if nISO>=14
-        SFP13ptr = isoSFP13;
-        #endif
-        #if nISO>=15
-        SFP14ptr = isoSFP14;
-        #endif
-        #if nISO>=16
-        SFP15ptr = isoSFP15;
-        #endif
-        #if nISO>=17
-        SFP16ptr = isoSFP16;
-        #endif
-        #if nISO>=18
-        SFP17ptr = isoSFP17;
-        #endif
-        #if nISO>=19
-        SFP18ptr = isoSFP18;
-        #endif
-        #if nISO>=20
-        SFP19ptr = isoSFP19;
-        #endif
-
-        Y_tmp = *Yptr;
-        x0 = (*SFP0ptr++) * Y_tmp;
-        #if nISO>=2
-        x1 = (*SFP1ptr++) * Y_tmp;
-        #endif
-        #if nISO>=3
-        x2 = (*SFP2ptr++) * Y_tmp;
-        #endif
-        #if nISO>=4
-        x3 = (*SFP3ptr++) * Y_tmp;
-        #endif
-        #if nISO>=5
-        x4 = (*SFP4ptr++) * Y_tmp;
-        #endif
-        #if nISO>=6
-        x5 = (*SFP5ptr++) * Y_tmp;
-        #endif
-        #if nISO>=7
-        x6 = (*SFP6ptr++) * Y_tmp;
-        #endif
-        #if nISO>=8
-        x7 = (*SFP7ptr++) * Y_tmp;
-        #endif
-        #if nISO>=9
-        x8 = (*SFP8ptr++) * Y_tmp;
-        #endif
-        #if nISO>=10
-        x9 = (*SFP9ptr++) * Y_tmp;
-        #endif
-        #if nISO>=11
-        x10 = (*SFP10ptr++) * Y_tmp;
-        #endif
-        #if nISO>=12
-        x11 = (*SFP11ptr++) * Y_tmp;
-        #endif
-        #if nISO>=13
-        x12 = (*SFP12ptr++) * Y_tmp;
-        #endif
-        #if nISO>=14
-        x13 = (*SFP13ptr++) * Y_tmp;
-        #endif
-        #if nISO>=15
-        x14 = (*SFP14ptr++) * Y_tmp;
-        #endif
-        #if nISO>=16
-        x15 = (*SFP15ptr++) * Y_tmp;
-        #endif
-        #if nISO>=17
-        x16 = (*SFP16ptr++) * Y_tmp;
-        #endif
-        #if nISO>=18
-        x17 = (*SFP17ptr++) * Y_tmp;
-        #endif
-        #if nISO>=19
-        x18 = (*SFP18ptr++) * Y_tmp;
-        #endif
-        #if nISO>=20
-        x19 = (*SFP19ptr++) * Y_tmp;
-        #endif
-
-        while( ++Yptr != YptrEnd )
-        {
-            Y_tmp = *Yptr;
-            x0  += (*SFP0ptr++) * Y_tmp;
-            #if nISO>=2
-            x1  += (*SFP1ptr++) * Y_tmp;
-            #endif
-            #if nISO>=3
-            x2  += (*SFP2ptr++) * Y_tmp;
-            #endif
-            #if nISO>=4
-            x3  += (*SFP3ptr++) * Y_tmp;
-            #endif
-            #if nISO>=5
-            x4  += (*SFP4ptr++) * Y_tmp;
-            #endif
-            #if nISO>=6
-            x5  += (*SFP5ptr++) * Y_tmp;
-            #endif
-            #if nISO>=7
-            x6  += (*SFP6ptr++) * Y_tmp;
-            #endif
-            #if nISO>=8
-            x7  += (*SFP7ptr++) * Y_tmp;
-            #endif
-            #if nISO>=9
-            x8  += (*SFP8ptr++) * Y_tmp;
-            #endif
-            #if nISO>=10
-            x9  += (*SFP9ptr++) * Y_tmp;
-            #endif
-            #if nISO>=11
-            x10  += (*SFP10ptr++) * Y_tmp;
-            #endif
-            #if nISO>=12
-            x11  += (*SFP11ptr++) * Y_tmp;
-            #endif
-            #if nISO>=13
-            x12  += (*SFP12ptr++) * Y_tmp;
-            #endif
-            #if nISO>=14
-            x13  += (*SFP13ptr++) * Y_tmp;
-            #endif
-            #if nISO>=15
-            x14  += (*SFP14ptr++) * Y_tmp;
-            #endif
-            #if nISO>=16
-            x15  += (*SFP15ptr++) * Y_tmp;
-            #endif
-            #if nISO>=17
-            x16  += (*SFP16ptr++) * Y_tmp;
-            #endif
-            #if nISO>=18
-            x17  += (*SFP17ptr++) * Y_tmp;
-            #endif
-            #if nISO>=19
-            x18  += (*SFP18ptr++) * Y_tmp;
-            #endif
-            #if nISO>=20
-            x19  += (*SFP19ptr++) * Y_tmp;
-            #endif
-        }
-
-        (*x_Ptr0++) += x0;
-        #if nISO>=2
-        (*x_Ptr1++) += x1;
-        #endif
-        #if nISO>=3
-        (*x_Ptr2++) += x2;
-        #endif
-        #if nISO>=4
-        (*x_Ptr3++) += x3;
-        #endif
-        #if nISO>=5
-        (*x_Ptr4++) += x4;
-        #endif
-        #if nISO>=6
-        (*x_Ptr5++) += x5;
-        #endif
-        #if nISO>=7
-        (*x_Ptr6++) += x6;
-        #endif
-        #if nISO>=8
-        (*x_Ptr7++) += x7;
-        #endif
-        #if nISO>=9
-        (*x_Ptr8++) += x8;
-        #endif
-        #if nISO>=10
-        (*x_Ptr9++) += x9;
-        #endif
-        #if nISO>=11
-        (*x_Ptr10++) += x10;
-        #endif
-        #if nISO>=12
-        (*x_Ptr11++) += x11;
-        #endif
-        #if nISO>=13
-        (*x_Ptr12++) += x12;
-        #endif
-        #if nISO>=14
-        (*x_Ptr13++) += x13;
-        #endif
-        #if nISO>=15
-        (*x_Ptr14++) += x14;
-        #endif
-        #if nISO>=16
-        (*x_Ptr15++) += x15;
-        #endif
-        #if nISO>=17
-        (*x_Ptr16++) += x16;
-        #endif
-        #if nISO>=18
-        (*x_Ptr17++) += x17;
-        #endif
-        #if nISO>=19
-        (*x_Ptr18++) += x18;
-        #endif
-        #if nISO>=20
-        (*x_Ptr19++) += x19;
-        #endif
-    }
-#endif
-
-    pthread_exit( 0 );
-}
-
-
-// =========================
-// Function called by CYTHON
-// =========================
-void COMMIT_At(
-    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
-    double *_vIN, double *_vOUT,
-    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
-    uint32_t *_ECv, uint16_t *_ECo,
-    uint32_t *_ISOv,
-    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
-    uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT
-)
-{
-    nF = _nF;
-    n  = _n;
-    nE = _nE;
-    nV = _nV;
-    nS = _nS;
-    ndirs = _ndirs;
-
-    x = _vOUT;
-    Y = _vIN;
-
-    ICf  = _ICf;
-    ICv  = _ICv;
-    ICo  = _ICo;
-    ICl  = _ICl;
-    ECv  = _ECv;
-    ECo  = _ECo;
-    ISOv = _ISOv;
-
-    #if nIC>=1
-    wmrSFP0 = _wmrSFP;
-    #if nIC>=2
-    wmrSFP1 = wmrSFP0 + _ndirs*_nS;
-    #if nIC>=3
-    wmrSFP2 = wmrSFP1 + _ndirs*_nS;
-    #if nIC>=4
-    wmrSFP3 = wmrSFP2 + _ndirs*_nS;
-    #if nIC>=5
-    wmrSFP4 = wmrSFP3 + _ndirs*_nS;
-    #if nIC>=6
-    wmrSFP5 = wmrSFP4 + _ndirs*_nS;
-    #if nIC>=7
-    wmrSFP6 = wmrSFP5 + _ndirs*_nS;
-    #if nIC>=8
-    wmrSFP7 = wmrSFP6 + _ndirs*_nS;
-    #if nIC>=9
-    wmrSFP8 = wmrSFP7 + _ndirs*_nS;
-    #if nIC>=10
-    wmrSFP9 = wmrSFP8 + _ndirs*_nS;
-    #if nIC>=11
-    wmrSFP10 = wmrSFP9 + _ndirs*_nS;
-    #if nIC>=12
-    wmrSFP11 = wmrSFP10 + _ndirs*_nS;
-    #if nIC>=13
-    wmrSFP12 = wmrSFP11 + _ndirs*_nS;
-    #if nIC>=14
-    wmrSFP13 = wmrSFP12 + _ndirs*_nS;
-    #if nIC>=15
-    wmrSFP14 = wmrSFP13 + _ndirs*_nS;
-    #if nIC>=16
-    wmrSFP15 = wmrSFP14 + _ndirs*_nS;
-    #if nIC>=17
-    wmrSFP16 = wmrSFP15 + _ndirs*_nS;
-    #if nIC>=18
-    wmrSFP17 = wmrSFP16 + _ndirs*_nS;
-    #if nIC>=19
-    wmrSFP18 = wmrSFP17 + _ndirs*_nS;
-    #if nIC>=20
-    wmrSFP19 = wmrSFP18 + _ndirs*_nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #if nEC>=1
-    wmhSFP0 = _wmhSFP;
-    #if nEC>=2
-    wmhSFP1 = wmhSFP0 + _ndirs*_nS;
-    #if nEC>=3
-    wmhSFP2 = wmhSFP1 + _ndirs*_nS;
-    #if nEC>=4
-    wmhSFP3 = wmhSFP2 + _ndirs*_nS;
-    #if nEC>=5
-    wmhSFP4 = wmhSFP3 + _ndirs*_nS;
-    #if nEC>=6
-    wmhSFP5 = wmhSFP4 + _ndirs*_nS;
-    #if nEC>=7
-    wmhSFP6 = wmhSFP5 + _ndirs*_nS;
-    #if nEC>=8
-    wmhSFP7 = wmhSFP6 + _ndirs*_nS;
-    #if nEC>=9
-    wmhSFP8 = wmhSFP7 + _ndirs*_nS;
-    #if nEC>=10
-    wmhSFP9 = wmhSFP8 + _ndirs*_nS;
-    #if nEC>=11
-    wmhSFP10 = wmhSFP9 + _ndirs*_nS;
-    #if nEC>=12
-    wmhSFP11 = wmhSFP10 + _ndirs*_nS;
-    #if nEC>=13
-    wmhSFP12 = wmhSFP11 + _ndirs*_nS;
-    #if nEC>=14
-    wmhSFP13 = wmhSFP12 + _ndirs*_nS;
-    #if nEC>=15
-    wmhSFP14 = wmhSFP13 + _ndirs*_nS;
-    #if nEC>=16
-    wmhSFP15 = wmhSFP14 + _ndirs*_nS;
-    #if nEC>=17
-    wmhSFP16 = wmhSFP15 + _ndirs*_nS;
-    #if nEC>=18
-    wmhSFP17 = wmhSFP16 + _ndirs*_nS;
-    #if nEC>=19
-    wmhSFP18 = wmhSFP17 + _ndirs*_nS;
-    #if nEC>=20
-    wmhSFP19 = wmhSFP18 + _ndirs*_nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #if nISO>=1
-    isoSFP0 = _isoSFP;
-    #if nISO>=2
-    isoSFP1 = isoSFP0 + _nS;
-    #if nISO>=3
-    isoSFP2 = isoSFP1 + _nS;
-    #if nISO>=4
-    isoSFP3 = isoSFP2 + _nS;
-    #if nISO>=5
-    isoSFP4 = isoSFP3 + _nS;
-    #if nISO>=6
-    isoSFP5 = isoSFP4 + _nS;
-    #if nISO>=7
-    isoSFP6 = isoSFP5 + _nS;
-    #if nISO>=8
-    isoSFP7 = isoSFP6 + _nS;
-    #if nISO>=9
-    isoSFP8 = isoSFP7 + _nS;
-    #if nISO>=10
-    isoSFP9 = isoSFP8 + _nS;
-    #if nISO>=11
-    isoSFP10 = isoSFP9 + _nS;
-    #if nISO>=12
-    isoSFP11 = isoSFP10 + _nS;
-    #if nISO>=13
-    isoSFP12 = isoSFP11 + _nS;
-    #if nISO>=14
-    isoSFP13 = isoSFP12 + _nS;
-    #if nISO>=15
-    isoSFP14 = isoSFP13 + _nS;
-    #if nISO>=16
-    isoSFP15 = isoSFP14 + _nS;
-    #if nISO>=17
-    isoSFP16 = isoSFP15 + _nS;
-    #if nISO>=18
-    isoSFP17 = isoSFP16 + _nS;
-    #if nISO>=19
-    isoSFP18 = isoSFP17 + _nS;
-    #if nISO>=20
-    isoSFP19 = isoSFP18 + _nS;
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-    #endif
-
-    ICthreadsT  = _ICthreadsT;
-    ECthreadsT  = _ECthreadsT;
-    ISOthreadsT = _ISOthreadsT;
-
-    // Run SEPARATE THREADS to perform the multiplication
-    pthread_t threads[nTHREADS];
-    int t;
-    for(t=0; t<nTHREADS ; t++)
-        pthread_create( &threads[t], NULL, COMMIT_At__block, (void *) (long int)t );
-    for(t=0; t<nTHREADS ; t++)
-        pthread_join( threads[t], NULL );
-    return;
-}
diff --git a/commit/solvers.py b/commit/solvers.py
index d45325c9..730f06d6 100755
--- a/commit/solvers.py
+++ b/commit/solvers.py
@@ -34,12 +34,12 @@ def init_regularisation(regularisation_params):
 
     # create array with al coeff_weights for weighted version of 'lasso'
     all_coeff_weights = np.ones(sizeIC+sizeEC+sizeISO, dtype=np.float64)
-    if regularisation_params.get('dictIC_params') is not None and "coeff_weights" in regularisation_params['dictIC_params'].keys():
-        all_coeff_weights[startIC:(startIC+sizeIC)] = regularisation_params['dictIC_params']["coeff_weights"]
-    if regularisation_params.get('dictEC_params') is not None and "coeff_weights" in regularisation_params['dictEC_params'].keys():
-        all_coeff_weights[startEC:(startEC+sizeEC)] = regularisation_params['dictEC_params']["coeff_weights"]
-    if regularisation_params.get('dictISO_params') is not None and "coeff_weights" in regularisation_params['dictISO_params'].keys():
-        all_coeff_weights[startISO:(startISO+sizeISO)] = regularisation_params['dictISO_params']["coeff_weights"]
+    if regularisation_params.get('dictIC_params') is not None and "coeff_weights_kept" in regularisation_params['dictIC_params'].keys():
+        all_coeff_weights[startIC:(startIC+sizeIC)] = regularisation_params['dictIC_params']["coeff_weights_kept"]
+    # if regularisation_params.get('dictEC_params') is not None and "coeff_weights" in regularisation_params['dictEC_params'].keys():
+    #     all_coeff_weights[startEC:(startEC+sizeEC)] = regularisation_params['dictEC_params']["coeff_weights"]
+    # if regularisation_params.get('dictISO_params') is not None and "coeff_weights" in regularisation_params['dictISO_params'].keys():
+    #     all_coeff_weights[startISO:(startISO+sizeISO)] = regularisation_params['dictISO_params']["coeff_weights"]
 
     ############################
     # INTRACELLULAR COMPARTMENT#
@@ -57,7 +57,7 @@ def init_regularisation(regularisation_params):
     elif regularisation_params['regIC'] == 'lasso':
         lambdaIC = regularisation_params['lambdaIC']
         # check if weights are provided
-        if dictIC_params is not None and "coeff_weights" in dictIC_params.keys():
+        if dictIC_params is not None and "coeff_weights_kept" in dictIC_params.keys():
             # w = dictIC_params["coeff_weights"]
             omegaIC = lambda x: lambdaIC * np.linalg.norm(all_coeff_weights[startIC:sizeIC]*x[startIC:sizeIC],1)
             if regularisation_params.get('nnIC'):
@@ -77,17 +77,17 @@ def init_regularisation(regularisation_params):
     #     proxIC  = lambda x: projection_onto_l2_ball(x, lambdaIC, startIC, sizeIC)
 
     elif regularisation_params['regIC'] == 'group_lasso':
-        if not len(dictIC_params["group_idx"]) == len(dictIC_params['group_weights']):
+        if not len(dictIC_params['group_idx_kept']) == len(dictIC_params['group_weights']):
             logger.error('Number of groups and weights do not match')
 
         lambda_group_IC = regularisation_params['lambdaIC']
 
         # convert to new data structure (needed for faster access)
-        N = np.sum([g.size for g in dictIC_params["group_idx"]])
+        N = np.sum([g.size for g in dictIC_params['group_idx_kept']])
         groupIdxIC = np.zeros( (N,), dtype=np.int32 )
-        groupSizeIC = np.zeros( (dictIC_params["group_idx"].size,), dtype=np.int32 )
+        groupSizeIC = np.zeros( (dictIC_params['group_idx_kept'].size,), dtype=np.int32 )
         pos = 0
-        for i, g in enumerate(dictIC_params["group_idx"]) :
+        for i, g in enumerate(dictIC_params['group_idx_kept']) :
             groupSizeIC[i] = g.size
             groupIdxIC[pos:(pos+g.size)] = g[:]
             pos += g.size
@@ -99,23 +99,23 @@ def init_regularisation(regularisation_params):
             proxIC = lambda x, scaling: prox_group_lasso(x,groupIdxIC,groupSizeIC,dictIC_params['group_weights'],scaling*lambda_group_IC)
   
     elif regularisation_params['regIC'] == 'sparse_group_lasso':
-        if not len(dictIC_params["group_idx"]) == len(dictIC_params['group_weights']):
+        if not len(dictIC_params['group_idx_kept']) == len(dictIC_params['group_weights']):
             logger.error('Number of groups and weights do not match')
 
         lambdaIC = regularisation_params['lambdaIC'][0]
         lambda_group_IC = regularisation_params['lambdaIC'][1]
 
         # convert to new data structure (needed for faster access)
-        N = np.sum([g.size for g in dictIC_params["group_idx"]])
+        N = np.sum([g.size for g in dictIC_params['group_idx_kept']])
         groupIdxIC  = np.zeros( (N,), dtype=np.int32 )
-        groupSizeIC = np.zeros( (dictIC_params["group_idx"].size,), dtype=np.int32 )
+        groupSizeIC = np.zeros( (dictIC_params['group_idx_kept'].size,), dtype=np.int32 )
         pos = 0
-        for i, g in enumerate(dictIC_params["group_idx"]) :
+        for i, g in enumerate(dictIC_params['group_idx_kept']) :
             groupSizeIC[i] = g.size
             groupIdxIC[pos:(pos+g.size)] = g[:]
             pos += g.size
 
-        if "coeff_weights" in dictIC_params.keys():
+        if "coeff_weights_kept" in dictIC_params.keys():
             # w = dictIC_params["coeff_weights"]
             omegaIC = lambda x: omega_w_sparse_group_lasso( x, all_coeff_weights, groupIdxIC, groupSizeIC, dictIC_params['group_weights'], lambdaIC, lambda_group_IC)
 
@@ -148,18 +148,18 @@ def init_regularisation(regularisation_params):
     elif regularisation_params['regEC'] == 'lasso':
         lambdaEC = regularisation_params.get('lambdaEC')
         # check if weights are provided
-        if dictEC_params is not None and "coeff_weights" in dictEC_params.keys():
-            omegaEC = lambda x: lambdaEC * np.linalg.norm(all_coeff_weights[startEC:sizeEC]*x[startEC:sizeEC],1)
-            if regularisation_params.get('nnEC'):
-                proxEC = lambda x, scaling: non_negativity(w_soft_thresholding(x,all_coeff_weights,scaling*lambdaEC,startEC,sizeEC),startEC,sizeEC)
-            else:
-                proxEC = lambda x, scaling: w_soft_thresholding(x,all_coeff_weights,scaling*lambdaEC,startEC,sizeEC)
+        # if dictEC_params is not None and "coeff_weights" in dictEC_params.keys():
+        #     omegaEC = lambda x: lambdaEC * np.linalg.norm(all_coeff_weights[startEC:sizeEC]*x[startEC:sizeEC],1)
+        #     if regularisation_params.get('nnEC'):
+        #         proxEC = lambda x, scaling: non_negativity(w_soft_thresholding(x,all_coeff_weights,scaling*lambdaEC,startEC,sizeEC),startEC,sizeEC)
+        #     else:
+        #         proxEC = lambda x, scaling: w_soft_thresholding(x,all_coeff_weights,scaling*lambdaEC,startEC,sizeEC)
+        # else:
+        omegaEC = lambda x: lambdaEC * np.linalg.norm(x[startEC:(startEC+sizeEC)],1)
+        if regularisation_params.get('nnEC'):
+            proxEC = lambda x, scaling: non_negativity(soft_thresholding(x,scaling*lambdaEC,startEC,sizeEC),startEC,sizeEC)
         else:
-            omegaEC = lambda x: lambdaEC * np.linalg.norm(x[startEC:(startEC+sizeEC)],1)
-            if regularisation_params.get('nnEC'):
-                proxEC = lambda x, scaling: non_negativity(soft_thresholding(x,scaling*lambdaEC,startEC,sizeEC),startEC,sizeEC)
-            else:
-                proxEC = lambda x, scaling: soft_thresholding(x,scaling*lambdaEC,startEC,sizeEC)
+            proxEC = lambda x, scaling: soft_thresholding(x,scaling*lambdaEC,startEC,sizeEC)
 
     # elif regularisation_params['regIC'] == 'smoothness':
     #     lambdaEC = regularisation_params.get('lambdaEC')
@@ -183,18 +183,18 @@ def init_regularisation(regularisation_params):
     elif regularisation_params['regISO'] == 'lasso':
         lambdaISO = regularisation_params.get('lambdaISO')
         # check if weights are provided
-        if dictISO_params is not None and "coeff_weights" in dictISO_params.keys():
-            omegaISO = lambda x: lambdaISO * np.linalg.norm(all_coeff_weights[startISO:sizeISO]*x[startISO:sizeISO],1)
-            if regularisation_params.get('nnISO'):
-                proxISO = lambda x, scaling: non_negativity(w_soft_thresholding(x,all_coeff_weights,scaling*lambdaISO,startISO,sizeISO),startISO,sizeISO)
-            else:
-                proxISO = lambda x, scaling: w_soft_thresholding(x,all_coeff_weights,scaling*lambdaISO,startISO,sizeISO)
+        # if dictISO_params is not None and "coeff_weights" in dictISO_params.keys():
+        #     omegaISO = lambda x: lambdaISO * np.linalg.norm(all_coeff_weights[startISO:sizeISO]*x[startISO:sizeISO],1)
+        #     if regularisation_params.get('nnISO'):
+        #         proxISO = lambda x, scaling: non_negativity(w_soft_thresholding(x,all_coeff_weights,scaling*lambdaISO,startISO,sizeISO),startISO,sizeISO)
+        #     else:
+        #         proxISO = lambda x, scaling: w_soft_thresholding(x,all_coeff_weights,scaling*lambdaISO,startISO,sizeISO)
+        # else:
+        omegaISO = lambda x: lambdaISO * np.linalg.norm(x[startISO:(startISO+sizeISO)],1)
+        if regularisation_params.get('nnISO'):
+            proxISO  = lambda x, scaling: non_negativity(soft_thresholding(x,scaling*lambdaISO,startISO,sizeISO),startISO,sizeISO)
         else:
-            omegaISO = lambda x: lambdaISO * np.linalg.norm(x[startISO:(startISO+sizeISO)],1)
-            if regularisation_params.get('nnISO'):
-                proxISO  = lambda x, scaling: non_negativity(soft_thresholding(x,scaling*lambdaISO,startISO,sizeISO),startISO,sizeISO)
-            else:
-                proxISO  = lambda x, scaling: soft_thresholding(x,scaling*lambdaISO,startISO,sizeISO)
+            proxISO  = lambda x, scaling: soft_thresholding(x,scaling*lambdaISO,startISO,sizeISO)
 
     # elif regularisation_params['regISO'] == 'group_lasso':
     #     lambdaISO = regularisation_params.get('lambdaISO')
@@ -341,7 +341,7 @@ def fista(y, A, At, omega, prox, sqrt_W=None, tol_fun=1e-4, tol_x=1e-6, max_iter
         abs_x   = np.linalg.norm(x - prev_x)
         rel_x   = abs_x / ( np.linalg.norm(x) + eps )
         if verbose==4 :
-            print( "  %13.7e  %13.7e  |  %13.7e  %13.7e  %13.7e  |  %13.7e  %13.7e" % ( 0.5 * res_norm**2, reg_term_x, curr_obj, abs_obj, rel_obj, abs_x, rel_x ) )
+            print( "  %13.7e  %13.7e  |  %13.7e  %13.7e  %13.7e  |  %13.7e  %13.7e" % ( 0.5 * res_norm**2, reg_term_x, curr_obj, abs_obj, rel_obj, abs_x, rel_x ), flush=True )
 
         if abs_obj < eps :
             criterion = "Absolute tolerance on the objective"
@@ -379,7 +379,7 @@ def fista(y, A, At, omega, prox, sqrt_W=None, tol_fun=1e-4, tol_x=1e-6, max_iter
         qfval = 0.5 * np.linalg.norm(res)**2
 
     if verbose==4 :
-        print( "< Stopping criterion: %s >" % criterion )
+        print( "< Stopping criterion: %s >" % criterion, flush=True )
 
     opt_details = {}
     opt_details['residual'] = 0.5*res_norm**2
diff --git a/commit/trk2dictionary/trk2dictionary.pyx b/commit/trk2dictionary/trk2dictionary.pyx
old mode 100755
new mode 100644
index ab7c8ee7..db35f4a0
--- a/commit/trk2dictionary/trk2dictionary.pyx
+++ b/commit/trk2dictionary/trk2dictionary.pyx
@@ -54,12 +54,12 @@ cpdef cat_function( infilename, outfilename ):
         for fname in infilename:
             with open( fname, 'rb' ) as inFile:
                 shutil.copyfileobj( inFile, outfile )
-                remove( fname )
+            remove( fname )
 
 cpdef compute_tdi( np.uint32_t[::1] v, np.float32_t[::1] l, int nx, int ny, int nz, int verbose):
     cdef np.float32_t [::1] tdi = np.zeros( nx*ny*nz, dtype=np.float32 )
     cdef unsigned long long i=0
-    with ProgressBar(total=v.size, disable=(verbose in [0, 1, 3]), hide_on_exit=True) as pbar:
+    with ProgressBar(total=v.size, disable=verbose<3, hide_on_exit=True) as pbar:
 
         for i in range(v.size):
             tdi[ v[i] ] += l[i]
@@ -322,14 +322,15 @@ cpdef run( filename_tractogram=None, path_out=None, filename_peaks=None, filenam
             hdr = nibabel.streamlines.load( filename_tractogram, lazy_load=True ).header
             temp_idx = np.arange(int(hdr['count']))
             log_list = []
-            subinfo = logger.subinfo(f'Clustering with threshold = {blur_clust_thr[0]}', indent_lvl=2, indent_char='-', with_progress=verbose>2)
-            with ProgressBar(disable=verbose<3, hide_on_exit=True, subinfo=subinfo, log_list=log_list) as pbar:
+            ret_subinfo = logger.subinfo(f'Clustering with threshold = {blur_clust_thr[0]}', indent_lvl=2, indent_char='-', with_progress=verbose>2)
+            with ProgressBar(disable=verbose<3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
                 idx_centroids = run_clustering(tractogram_in=filename_tractogram, tractogram_out=filename_out,
                                             temp_folder=path_temp, atlas=blur_clust_groupby, clust_thr=blur_clust_thr[0],
                                             n_threads=n_threads, keep_temp_files=True, force=True, verbose=1, log_list=log_list)
         else:
-            logger.subinfo(f'Clustering with threshold = {blur_clust_thr[0]}', indent_lvl=2, indent_char='-', with_progress=verbose>2)
-            with ProgressBar(disable=verbose<3, hide_on_exit=True, subinfo=True) as pbar:
+            log_list = []
+            ret_subinfo = logger.subinfo(f'Clustering with threshold = {blur_clust_thr[0]}', indent_lvl=2, indent_char='-', with_progress=verbose>2)
+            with ProgressBar(disable=verbose<3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
                 idx_centroids = run_clustering(tractogram_in=filename_tractogram, tractogram_out=filename_out,
                                             temp_folder=path_temp, clust_thr=blur_clust_thr[0],
                                             keep_temp_files=True, force=True, verbose=1)
@@ -543,14 +544,22 @@ cpdef run( filename_tractogram=None, path_out=None, filename_peaks=None, filenam
         logger.warning( 'DICTIONARY not generated' )
         return None
 
+    # free memory
+    free(ptrTDI)
+    
+    # NOTE: this is to ensure flushing all the output from the cpp code
+    print(end='', flush=True)
+    time.sleep(0.5)
+
     # Concatenate files together
-    logger.subinfo( 'Saving data structure', indent_char='*', indent_lvl=1, with_progress=True )
+    log_list = []
+    ret_subinfo = logger.subinfo( 'Saving data structure', indent_char='*', indent_lvl=1, with_progress=verbose>2 )
     cdef int discarded = 0
-    with ProgressBar(disable=verbose<3, hide_on_exit=True, subinfo=True) as pbar:
+    with ProgressBar(disable=verbose<3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
         for j in range(n_threads-1):
-            path_IC_f = path_temp + f'/dictionary_IC_f_{j+1}.dict'
-            kept = np.fromfile( path_temp + f'/dictionary_TRK_kept_{j}.dict', dtype=np.uint8 )
-            IC_f = np.fromfile( path_temp + f'/dictionary_IC_f_{j+1}.dict', dtype=np.uint32 )
+            path_IC_f = join(path_temp, f'dictionary_IC_f_{j+1}.dict')
+            kept = np.fromfile( join(path_temp, f'dictionary_TRK_kept_{j}.dict'), dtype=np.uint8 )
+            IC_f = np.fromfile( join(path_temp, f'dictionary_IC_f_{j+1}.dict'), dtype=np.uint32 )
             discarded += np.count_nonzero(kept==0)
             IC_f -= discarded
             IC_f_save = np.memmap( path_IC_f, dtype="uint32", mode='w+', shape=IC_f.shape )
@@ -559,52 +568,52 @@ cpdef run( filename_tractogram=None, path_out=None, filename_peaks=None, filenam
             del IC_f_save
             # np.save( path_out + f'/dictionary_IC_f_{j+1}.dict', IC_f, allow_pickle=True)
 
-        fileout = path_out + '/dictionary_TRK_kept.dict'
+        fileout = join(path_out, 'dictionary_TRK_kept.dict')
         dict_list = []
         for j in range(n_threads):
-            dict_list += [ path_temp + f'/dictionary_TRK_kept_{j}.dict' ]
+            dict_list += [ join(path_temp, f'dictionary_TRK_kept_{j}.dict') ]
         cat_function( dict_list, fileout )
 
-        fileout = path_out + '/dictionary_TRK_norm.dict'
+        fileout = join(path_out, 'dictionary_TRK_norm.dict')
         dict_list = []
         for j in range(n_threads):
-            dict_list += [ path_temp + f'/dictionary_TRK_norm_{j}.dict' ]
+            dict_list += [ join(path_temp, f'dictionary_TRK_norm_{j}.dict') ]
         cat_function( dict_list, fileout )
 
-        fileout = path_out + '/dictionary_TRK_len.dict'
+        fileout = join(path_out, 'dictionary_TRK_len.dict')
         dict_list = []
         for j in range(n_threads):
-            dict_list += [ path_temp + f'/dictionary_TRK_len_{j}.dict' ]
+            dict_list += [ join(path_temp, f'dictionary_TRK_len_{j}.dict') ]
         cat_function( dict_list, fileout )
 
-        fileout = path_out + '/dictionary_TRK_lenTot.dict'
+        fileout = join(path_out, 'dictionary_TRK_lenTot.dict')
         dict_list = []
         for j in range(n_threads):
-            dict_list += [ path_temp + f'/dictionary_TRK_lenTot_{j}.dict' ]
+            dict_list += [ join(path_temp, f'dictionary_TRK_lenTot_{j}.dict') ]
         cat_function( dict_list, fileout )
 
-        fileout = path_out + '/dictionary_IC_f.dict'
+        fileout = join(path_out, 'dictionary_IC_f.dict')
         dict_list = []
         for j in range(n_threads):
-            dict_list += [ path_temp + f'/dictionary_IC_f_{j}.dict' ]
+            dict_list += [ join(path_temp, f'dictionary_IC_f_{j}.dict') ]
         cat_function( dict_list, fileout )
 
-        fileout = path_out + '/dictionary_IC_v.dict'
+        fileout = join(path_out, 'dictionary_IC_v.dict')
         dict_list = []
         for j in range(n_threads):
-            dict_list += [ path_temp + f'/dictionary_IC_v_{j}.dict' ]
+            dict_list += [ join(path_temp, f'dictionary_IC_v_{j}.dict') ]
         cat_function( dict_list, fileout )
 
-        fileout = path_out + '/dictionary_IC_o.dict'
+        fileout = join(path_out, 'dictionary_IC_o.dict')
         dict_list = []
         for j in range(n_threads):
-            dict_list += [ path_temp + f'/dictionary_IC_o_{j}.dict' ]
+            dict_list += [ join(path_temp, f'dictionary_IC_o_{j}.dict') ]
         cat_function( dict_list, fileout )
 
-        fileout = path_out + '/dictionary_IC_len.dict'
+        fileout = join(path_out, 'dictionary_IC_len.dict')
         dict_list = []
         for j in range(n_threads):
-            dict_list += [ path_temp + f'/dictionary_IC_len_{j}.dict' ]
+            dict_list += [ join(path_temp, f'dictionary_IC_len_{j}.dict') ]
         cat_function( dict_list, fileout )
 
 
@@ -619,29 +628,31 @@ cpdef run( filename_tractogram=None, path_out=None, filename_peaks=None, filenam
             TDI_affine = np.diag( [Px, Py, Pz, 1] )
 
     # save TDI and MASK maps
-    logger.subinfo( 'Saving TDI and MASK maps', indent_char='*', indent_lvl=1 )
-    v = np.fromfile( join(path_out, 'dictionary_IC_v.dict'),   dtype=np.uint32 )
-    l = np.fromfile( join(path_out, 'dictionary_IC_len.dict'), dtype=np.float32 )
-
     cdef np.float32_t [::1] niiTDI_mem = np.zeros( Nx*Ny*Nz, dtype=np.float32 )
-    niiTDI_mem = compute_tdi( v, l, Nx, Ny, Nz, verbose )
-    niiTDI_img_save = np.reshape( niiTDI_mem, (Nx,Ny,Nz), order='F' )
+    log_list = []
+    ret_subinfo = logger.subinfo( 'Saving TDI and MASK maps', indent_char='*', indent_lvl=1, with_progress=verbose>2)
+    with ProgressBar(disable=verbose<3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list):
+        v = np.fromfile( join(path_out, 'dictionary_IC_v.dict'),   dtype=np.uint32 )
+        l = np.fromfile( join(path_out, 'dictionary_IC_len.dict'), dtype=np.float32 )
+        
+        niiTDI_mem = compute_tdi( v, l, Nx, Ny, Nz, verbose=0 )
+        niiTDI_img_save = np.reshape( niiTDI_mem, (Nx,Ny,Nz), order='F' )
 
-    niiTDI = nibabel.Nifti1Image( niiTDI_img_save, TDI_affine )
-    niiTDI_hdr = _get_header( niiTDI )
-    niiTDI_hdr['descrip'] = f'Created with COMMIT {get_distribution("dmri-commit").version}'
-    nibabel.save( niiTDI, join(path_out,'dictionary_tdi.nii.gz') )
+        niiTDI = nibabel.Nifti1Image( niiTDI_img_save, TDI_affine )
+        niiTDI_hdr = _get_header( niiTDI )
+        niiTDI_hdr['descrip'] = f'Created with COMMIT {get_distribution("dmri-commit").version}'
+        nibabel.save( niiTDI, join(path_out,'dictionary_tdi.nii.gz') )
 
-    if filename_mask is not None :
-        niiMASK = nibabel.Nifti1Image( niiMASK_img, TDI_affine )
-    else :
-        niiMASK = nibabel.Nifti1Image( (np.asarray(niiTDI_img_save)>0).astype(np.float32), TDI_affine )
+        if filename_mask is not None :
+            niiMASK = nibabel.Nifti1Image( niiMASK_img, TDI_affine )
+        else :
+            niiMASK = nibabel.Nifti1Image( (np.asarray(niiTDI_img_save)>0).astype(np.float32), TDI_affine )
 
-    niiMASK_hdr = _get_header( niiMASK )
-    niiMASK_hdr['descrip'] = niiTDI_hdr['descrip']
-    nibabel.save( niiMASK, join(path_out,'dictionary_mask.nii.gz') )
+        niiMASK_hdr = _get_header( niiMASK )
+        niiMASK_hdr['descrip'] = niiTDI_hdr['descrip']
+        nibabel.save( niiMASK, join(path_out,'dictionary_mask.nii.gz') )
 
-    if not keep_temp:
-        shutil.rmtree(path_temp)
+        if not keep_temp:
+            shutil.rmtree(path_temp)
 
     logger.info( f'[ {format_time(time.time() - tic)} ]' )
\ No newline at end of file
diff --git a/commit/trk2dictionary/trk2dictionary_c.cpp b/commit/trk2dictionary/trk2dictionary_c.cpp
index 7b259379..be8e24ee 100644
--- a/commit/trk2dictionary/trk2dictionary_c.cpp
+++ b/commit/trk2dictionary/trk2dictionary_c.cpp
@@ -178,9 +178,9 @@ int trk2dictionary(
     // Open tractogram file and compute the offset for each thread
     // -----------------------------------------------------------------
     unsigned long long int current;
-    unsigned long long int OffsetArr[threads_count];
+    unsigned long long int *OffsetArr = new unsigned long long int[threads_count]();
     int f = 0;
-    float Buff[3];
+    float *Buff = new float[3]();
     int N;
 
     FILE* fpTractogram = fopen(str_filename,"rb");
@@ -232,8 +232,8 @@ int trk2dictionary(
     // ==========================================
     //          Parallel IC compartments
     // ==========================================
-
-    printf( "   * Exporting IC compartments:\033[0m\n" );
+    
+    std::cout << "   * Exporting IC compartments:" << std::endl;
     // unsigned int width = 25;
     // PROGRESS = new ProgressBar( (unsigned int) n_count, (unsigned int) width);
     if (verbosity > 2)
@@ -256,7 +256,7 @@ int trk2dictionary(
     if (verbosity > 2)
         PROGRESS->close();
 
-    printf( "      [ %d streamlines kept, %ld segments in total ]\n", std::accumulate(totFibers.begin(), totFibers.end(), 0), std::accumulate( totICSegments.begin(), totICSegments.end(), 0) );
+    std::cout << "      [ " << std::accumulate(totFibers.begin(), totFibers.end(), 0) << " streamlines kept, " << std::accumulate( totICSegments.begin(), totICSegments.end(), 0) << " segments in total ]" << std::endl;
     totFibers.clear();
     threads.clear();
 
@@ -264,20 +264,23 @@ int trk2dictionary(
     //          Parallel EC compartments
     // ==========================================
 
-    printf( "   * Exporting EC compartments:\033[0m\n" );
-
+    std::cout << "   * Exporting EC compartments:" << std::endl;
     int EC = ECSegments( ptrPEAKS, Np, vf_THR, ECix, ECiy, ECiz, ptrTDI, ptrHashTable, path_out, ptrPeaksAffine, threads_count );
 
-    printf("      [ %d voxels, %d segments ]\n", totECVoxels, totECSegments );
+    std::cout << "      [ " << totECVoxels << " voxels, " << totECSegments << " segments ]" << std::endl;
 
     /*=========================*/
     /*     Restricted ISO compartments     */
     /*=========================*/
-    printf( "   * Exporting ISO compartments:\033[0m\n" );
-
+    std::cout << "   * Exporting ISO compartments:" << std::endl;
     int totISO = ISOcompartments(ptrTDI, path_out, threads_count);
 
-    printf("      [ %d voxels ]\n", totISO );
+    std::cout << "      [ " << totISO << " voxels ]" << std::endl;
+
+    delete[] batch_size;
+    delete[] Pos;
+    delete[] OffsetArr;
+    delete[] Buff;
 
     return 1;
 
@@ -290,8 +293,7 @@ int ECSegments(float* ptrPEAKS, int Np, float vf_THR, int ECix, int ECiy, int EC
     // Variables definition
     string    filename;
     string    OUTPUT_path(path_out);
-    std::size_t pos = OUTPUT_path.find("/temp");
-    OUTPUT_path = OUTPUT_path.substr (0,pos);
+    OUTPUT_path = OUTPUT_path.substr (0,OUTPUT_path.size()-5);
 
     unsigned short o;
     unsigned int v;
@@ -308,7 +310,7 @@ int ECSegments(float* ptrPEAKS, int Np, float vf_THR, int ECix, int ECiy, int EC
         segKey         ec_seg;
         int            ix, iy, iz, id, atLeastOne;
         float          peakMax;
-        float          norms[ Np ];
+        float          *norms = new float[Np]();
         float          *ptr;
         int            ox, oy;
         int            skip = 0;
@@ -387,6 +389,7 @@ int ECSegments(float* ptrPEAKS, int Np, float vf_THR, int ECix, int ECiy, int EC
                     }
                 }
         }
+        delete[] norms;
     }
     totECSegments = temp_totECSegments;
     totECVoxels = temp_totECVoxels;
@@ -404,8 +407,7 @@ int ISOcompartments(double** ptrTDI, char* path_out, int threads){
     // Variables definition
     string    filename;
     string    OUTPUT_path(path_out);
-    std::size_t pos = OUTPUT_path.find("/temp");
-    OUTPUT_path = OUTPUT_path.substr (0,pos);
+    OUTPUT_path = OUTPUT_path.substr (0,OUTPUT_path.size()-5);
     unsigned int totISOVoxels = 0, v=0;
 
     filename = OUTPUT_path+"/dictionary_ISO_v.dict";        FILE* pDict_ISO_v   = fopen( filename.c_str(),   "wb" );
@@ -481,7 +483,7 @@ unsigned long long int offset, int idx, unsigned int startpos, unsigned int endp
     filename = OUTPUT_path+"/dictionary_TRK_norm_" + std::to_string(idx) + ".dict";   FILE* pDict_TRK_norm = fopen(filename.c_str(),"wb");
     if ( !pDict_TRK_norm )
     {
-        printf( "\n[trk2dictionary] Unable to create output files" );
+        std::cout << "\n[trk2dictionary] Unable to create output files";
     }
     filename = OUTPUT_path+"/dictionary_IC_f_" + std::to_string(idx) + ".dict";        FILE* pDict_IC_f       = fopen(filename.c_str(),"wb");
     filename = OUTPUT_path+"/dictionary_IC_v_" + std::to_string(idx) + ".dict";        FILE* pDict_IC_v       = fopen(filename.c_str(),"wb");
diff --git a/pyproject.toml b/pyproject.toml
index 37d31d98..46c095b9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,18 +1,18 @@
 [build-system]
 requires = [
     "cython>=3.0.10",
-    "numpy>=1.24.4",
+    "numpy>=1.24.4,<2.0.0",
     "setuptools>=69.2.0"
 ]
 build-backend = "setuptools.build_meta"
 
 [project]
 name = "dmri-commit"
-version = "2.2.0"
+version = "2.3.0"
 dependencies = [
     "dmri-amico>=2.0.1",
     "dmri-dicelib>=1.1.0",
-    "numpy>=1.24.4",
+    "numpy>=1.24.4,<2.0.0",
     "scipy>=1.10.1"
 ]
 requires-python = ">=3.8"
@@ -62,3 +62,6 @@ include-package-data = false
     "*.pyx",
     "*.pyxbld"
 ]
+
+[tool.cibuildwheel]
+build-verbosity = 3
diff --git a/setup.py b/setup.py
index aef9e8a6..1079cef4 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,5 @@
 import os
+import sys
 
 from setuptools import Extension, setup
 from setuptools.command.build_ext import build_ext
@@ -11,19 +12,35 @@ def get_extensions():
     # for the computation of matrix-vector multiplications
     trk2dictionary = Extension(name=f'{package_name}.trk2dictionary',
                      sources=[f'{package_name}/trk2dictionary/trk2dictionary.pyx'],
-                     libraries=['stdc++'],
-                     extra_compile_args=['-w', '-std=c++11'],
+                     libraries=[] if sys.platform == 'win32' else ['stdc++'],
+                     extra_compile_args=[] if sys.platform == 'win32' else ['-w', '-std=c++11'],
                      language='c++')
     core = Extension(name=f'{package_name}.core',
                      sources=[f'{package_name}/core.pyx'],
-                     extra_compile_args=['-w'],
+                     extra_compile_args=[] if sys.platform == 'win32' else ['-w', '-std=c++11'],
                      language='c++')
     proximals = Extension(name=f'{package_name}.proximals',
                      sources=[f'{package_name}/proximals.pyx'],
-                     extra_compile_args=['-w'],
+                     extra_compile_args=[] if sys.platform == 'win32' else ['-w', '-std=c++11'],
                      language='c++')
+    # NOTE: Windows requires the pthread-win32 static library to compile the operator extension
+    #       The library can be downloaded from https://github.com/GerHobbelt/pthread-win32
+    #       The PTHREAD_WIN_INCLUDE and PTHREAD_WIN_LIB environment variables must be set to the include and lib directories
+    if sys.platform == 'win32':
+        try:
+            pthread_win_include = os.environ['PTHREAD_WIN_INCLUDE']
+            pthread_win_lib = os.environ['PTHREAD_WIN_LIB']
+        except KeyError:
+            raise RuntimeError('PTHREAD_WIN_INCLUDE and PTHREAD_WIN_LIB must be set')
+    operator = Extension(name=f'{package_name}.operator.operator',
+                    sources=[f'{package_name}/operator/operator.pyx', f'{package_name}/operator/operator_c.c'],
+                    include_dirs=[pthread_win_include] if sys.platform == 'win32' else [],
+                    libraries=['pthread'] if sys.platform == 'win32' else [],
+                    library_dirs=[pthread_win_lib] if sys.platform == 'win32' else [],
+                    extra_compile_args=['/fp:fast', '/DHAVE_STRUCT_TIMESPEC'] if sys.platform == 'win32' else ['-w', '-O3', '-Ofast'],
+                    language='c')
                  
-    return [trk2dictionary, core, proximals]
+    return [trk2dictionary, core, proximals, operator]
 
 class CustomBuildExtCommand(build_ext):
     """ build_ext command to use when numpy headers are needed. """
@@ -36,7 +53,7 @@ def run(self):
 
         # Add everything requires for build
         self.swig_opts = None
-        self.include_dirs = [get_include()]
+        self.include_dirs.extend([get_include()])
         self.distribution.ext_modules[:] = cythonize( self.distribution.ext_modules, build_dir='build' )
 
         # if not specified via '-j N' option, set compilation using max number of cores
@@ -48,6 +65,11 @@ def run(self):
         build_ext.finalize_options(self)
         build_ext.run(self)
 
+# generate the operator_c.c file
+sys.path.insert(0, os.path.dirname(__file__))
+from setup_operator import write_operator_c_file
+write_operator_c_file()
+
 # create the 'build' directory
 if not os.path.exists('build'):
     os.makedirs('build')
diff --git a/setup_operator.py b/setup_operator.py
new file mode 100644
index 00000000..3fa86743
--- /dev/null
+++ b/setup_operator.py
@@ -0,0 +1,932 @@
+'''
+This script writes the operator_c.c file.
+The main functions are:
+    COMMIT_A__block
+    COMMIT_A
+    COMMIT_At__block
+    COMMIT_At
+    COMMIT_A__block_nolut
+    COMMIT_A_nolut
+    COMMIT_At__block_nolut
+    COMMIT_At_nolut
+'''
+
+from typing import NoReturn
+from os.path import join as path_join
+
+
+def add_imports() -> str:
+    '''Adds the imports to the operator_c.c file.'''
+    s: str = '''\
+#include <pthread.h>
+#include <stdint.h>
+
+// max number of threads
+#define MAX_THREADS 255\n\n'''
+    return s
+
+
+def add_global_variables() -> str:
+    '''Adds the global variables to the operator_c.c file.'''
+    s: str = '''\
+// global variables
+int         nF, n, nE, nV, nS, ndirs;
+double      *x, *Y;
+uint32_t    *ICthreads, *ECthreads, *ISOthreads;
+uint8_t     *ICthreadsT;
+uint32_t    *ECthreadsT, *ISOthreadsT;
+uint32_t    *ICf, *ICv, *ECv, *ISOv;
+uint16_t    *ICo, *ECo;
+float       *ICl;
+float       *wmrSFP0, *wmrSFP1, *wmrSFP2, *wmrSFP3, *wmrSFP4, *wmrSFP5, *wmrSFP6, *wmrSFP7, *wmrSFP8, *wmrSFP9, *wmrSFP10, *wmrSFP11, *wmrSFP12, *wmrSFP13, *wmrSFP14, *wmrSFP15, *wmrSFP16, *wmrSFP17, *wmrSFP18, *wmrSFP19;
+float       *wmhSFP0, *wmhSFP1, *wmhSFP2, *wmhSFP3, *wmhSFP4, *wmhSFP5, *wmhSFP6, *wmhSFP7, *wmhSFP8, *wmhSFP9, *wmhSFP10, *wmhSFP11, *wmhSFP12, *wmhSFP13, *wmhSFP14, *wmhSFP15, *wmhSFP16, *wmhSFP17, *wmhSFP18, *wmhSFP19;
+float       *isoSFP0, *isoSFP1, *isoSFP2, *isoSFP3, *isoSFP4, *isoSFP5, *isoSFP6, *isoSFP7, *isoSFP8, *isoSFP9, *isoSFP10, *isoSFP11, *isoSFP12, *isoSFP13, *isoSFP14, *isoSFP15, *isoSFP16, *isoSFP17, *isoSFP18, *isoSFP19;
+uint32_t    nIC, nEC, nISO;\n\n\n'''
+    return s
+
+
+def add_commit_a_block() -> str:
+    '''Adds the COMMIT_A__block function to the operator_c.c file.'''
+    def add_intracellular_compartments() -> str:
+        s: str = '''\
+    // intra-cellular compartments
+    if (nIC > 0)
+    {
+        t_v = ICv + ICthreads[id];
+        t_vEnd = ICv + ICthreads[id+1];
+        t_o = ICo + ICthreads[id];
+        t_l = ICl + ICthreads[id];
+        t_f = ICf + ICthreads[id];
+        switch (nIC)
+        {'''
+        s1: str = '''\
+xPtr0 = x + (*t_f);
+                    x0 = *xPtr0;'''
+        s2: str = 'if (x0 != 0'
+        s3: str = 'SFP0ptr = wmrSFP0 + offset;'
+        s4: str = 'x0 * (*SFP0ptr++)'
+
+        for i in range(0, 20):
+            if i > 0:
+                s1 += f'''\
+
+                    xPtr{i} = xPtr{i - 1} + nF;
+                    x{i} = *xPtr{i};'''
+                s2 += f' || x{i} != 0'
+                s3 += f'''\
+
+                        SFP{i}ptr = wmrSFP{i} + offset;'''
+                s4 += f' + x{i} * (*SFP{i}ptr++)'
+            s += f'''\
+
+            case {i + 1}:
+                while (t_v != t_vEnd)
+                {{
+                    {s1}
+                    {s2})
+                    {{
+                        YPtr = Y + nS * (*t_v);
+                        YPtrEnd = YPtr + nS;
+                        w = (double)(*t_l);
+                        offset = nS * (*t_o);
+                        {s3}
+                        while (YPtr != YPtrEnd)
+                            (*YPtr++) += w * ({s4});
+                    }}
+                    t_f++;
+                    t_v++;
+                    t_o++;
+                    t_l++;
+                }}
+                break;'''
+
+        s += '''\
+
+        }
+    }\n\n'''
+        return s
+
+    def add_extracellular_compartments() -> str:
+        s: str = '''\
+    // extra-cellular compartments
+    if (nEC > 0)
+    {
+        t_v = ECv + ECthreads[id];
+        t_vEnd = ECv + ECthreads[id+1];
+        t_o = ECo + ECthreads[id];
+        xPtr0 = x + nIC*nF + ECthreads[id];
+        switch (nEC)
+        {'''
+        s1: str = ''
+        s2: str = 'x0 = *xPtr0++;'
+        s3: str = 'if (x0 != 0'
+        s4: str = 'SFP0ptr = wmhSFP0 + offset;'
+        s5: str = 'x0 * (*SFP0ptr++)'
+
+        for i in range(0, 20):
+            if i > 0:
+                s1 += f'''\
+
+                xPtr{i} = xPtr{i - 1} + nE;'''
+                s2 += f'''\
+
+                    x{i} = *xPtr{i}++;'''
+                s3 += f' || x{i} != 0'
+                s4 += f'''\
+
+                        SFP{i}ptr = wmhSFP{i} + offset;'''
+                s5 += f' + x{i} * (*SFP{i}ptr++)'
+            s += f'''\
+
+            case {i + 1}:{s1}
+                while (t_v != t_vEnd)
+                {{
+                    {s2}
+                    {s3})
+                    {{
+                        YPtr = Y + nS * (*t_v);
+                        YPtrEnd = YPtr + nS;
+                        offset = nS * (*t_o);
+                        {s4}
+                        while (YPtr != YPtrEnd)
+                            (*YPtr++) += ({s5});
+                    }}
+                    t_v++;
+                    t_o++;
+                }}
+                break;'''
+
+        s += '''\
+
+        }
+    }\n\n'''
+        return s
+
+    def add_isotropic_compartments() -> str:
+        s: str = '''\
+    // isotropic compartments
+    if (nISO > 0)
+    {
+        t_v = ISOv + ISOthreads[id];
+        t_vEnd = ISOv + ISOthreads[id+1];
+        xPtr0 = x + nIC*nF + nEC*nE + ISOthreads[id];
+        switch (nISO)
+        {'''
+        s1: str = ''
+        s2: str = 'x0 = *xPtr0++;'
+        s3: str = 'if (x0 != 0'
+        s4: str = 'SFP0ptr = isoSFP0;'
+        s5: str = 'x0 * (*SFP0ptr++)'
+
+        for i in range(0, 20):
+            if i > 0:
+                s1 += f'''\
+
+                xPtr{i} = xPtr{i - 1} + nV;'''
+                s2 += f'''\
+
+                    x{i} = *xPtr{i}++;'''
+                s3 += f' || x{i} != 0'
+                s4 += f'''\
+
+                        SFP{i}ptr = isoSFP{i};'''
+                s5 += f' + x{i} * (*SFP{i}ptr++)'
+            s += f'''\
+
+            case {i + 1}:{s1}
+                while (t_v != t_vEnd)
+                {{
+                    {s2}
+                    {s3})
+                    {{
+                        YPtr = Y + nS * (*t_v);
+                        YPtrEnd = YPtr + nS;
+                        {s4}
+                        while (YPtr != YPtrEnd)
+                            (*YPtr++) += ({s5});
+                    }}
+                    t_v++;
+                }}
+                break;'''
+
+        s += '''\
+
+        }
+    }\n\n'''
+        return s
+
+    s: str = '''\
+//
+// Compute a sub-block of the A*x MATRIX-VECTOR product
+// 
+void* COMMIT_A__block( void *ptr )
+{
+    int      id = (long)ptr;
+    int      offset;
+    double   x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w;
+    double   *xPtr0, *xPtr1, *xPtr2, *xPtr3, *xPtr4, *xPtr5, *xPtr6, *xPtr7, *xPtr8, *xPtr9, *xPtr10, *xPtr11, *xPtr12, *xPtr13, *xPtr14, *xPtr15, *xPtr16, *xPtr17, *xPtr18, *xPtr19;
+    double   *YPtr, *YPtrEnd;
+    float    *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr;
+    uint32_t *t_v, *t_vEnd, *t_f;
+    uint16_t *t_o;
+    float    *t_l;\n\n'''
+    s += add_intracellular_compartments()
+    s += add_extracellular_compartments()
+    s += add_isotropic_compartments()
+    s += '''\
+    pthread_exit( 0 );
+}\n\n'''
+    return s
+
+
+def add_commit_a() -> str:
+    '''Adds the COMMIT_A function to the operator_c.c file.'''
+    def add_intracellular_compartments() -> str:
+        s: str = '''\
+    switch (nIC)
+    {'''
+        s1: str = 'wmrSFP0 = _wmrSFP;'
+
+        for i in range(0, 20):
+            if i > 0:
+                s1 += f'''\
+
+            wmrSFP{i} = wmrSFP{i - 1} + _ndirs*_nS;'''
+            s += f'''\
+
+        case {i + 1}:
+            {s1}
+            break;'''
+        s += '''\
+
+    }\n\n'''
+        return s
+
+    def add_extracellular_compartments() -> str:
+        s: str = '''\
+    switch (nEC)
+    {'''
+        s1: str = 'wmhSFP0 = _wmhSFP;'
+
+        for i in range(0, 20):
+            if i > 0:
+                s1 += f'''\
+
+            wmhSFP{i} = wmhSFP{i - 1} + _ndirs*_nS;'''
+            s += f'''\
+
+        case {i + 1}:
+            {s1}
+            break;'''
+        s += '''\
+
+    }\n\n'''
+        return s
+
+    def add_isotropic_compartments() -> str:
+        s: str = '''\
+    switch (nISO)
+    {'''
+        s1: str = 'isoSFP0 = _isoSFP;'
+
+        for i in range(0, 20):
+            if i > 0:
+                s1 += f'''\
+
+            isoSFP{i} = isoSFP{i - 1} + _nS;'''
+            s += f'''\
+
+        case {i + 1}:
+            {s1}
+            break;'''
+        s += '''\
+
+    }\n\n'''
+        return s
+
+    s: str = '''\
+//
+// Function called by Cython
+//
+void COMMIT_A(
+    int _nF, int _nE, int _nV, int _nS, int _ndirs,
+    double *_vIN, double *_vOUT,
+    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
+    uint32_t *_ECv, uint16_t *_ECo,
+    uint32_t *_ISOv,
+    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
+    uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads,
+    uint32_t _nIC, uint32_t _nEC, uint32_t _nISO, uint32_t _nThreads
+)
+{
+    nF = _nF;
+    nE = _nE;
+    nV = _nV;
+    nS = _nS;
+    ndirs = _ndirs;
+
+    x = _vIN;
+    Y = _vOUT;
+
+    ICf  = _ICf;
+    ICv  = _ICv;
+    ICo  = _ICo;
+    ICl  = _ICl;
+    ECv  = _ECv;
+    ECo  = _ECo;
+    ISOv = _ISOv;
+
+    nIC = _nIC;
+    nEC = _nEC;
+    nISO = _nISO;\n\n'''
+    s += add_intracellular_compartments()
+    s += add_extracellular_compartments()
+    s += add_isotropic_compartments()
+    s += '''\
+
+    ICthreads  = _ICthreads;
+    ECthreads  = _ECthreads;
+    ISOthreads = _ISOthreads;
+
+    // Run SEPARATE THREADS to perform the multiplication
+    pthread_t threads[MAX_THREADS];
+    int t;
+    for(t=0; t<_nThreads ; t++)
+        pthread_create( &threads[t], NULL, COMMIT_A__block, (void *) (long int)t );
+    for(t=0; t<_nThreads ; t++)
+        pthread_join( threads[t], NULL );
+    return;
+}\n\n'''
+    return s
+
+
+def add_commit_at_block() -> str:
+    '''Adds the COMMIT_At__block function to the operator_c.c file.'''
+    def add_intracellular_compartments() -> str:
+        s: str = '''\
+    // intra-cellular compartments
+    if (nIC > 0)
+    {
+        t_v = ICv;
+        t_vEnd = ICv + n;
+        t_o = ICo;
+        t_l = ICl;
+        t_f = ICf;
+        t_t = ICthreadsT;
+        switch (nIC)
+        {'''
+        s1: str = 'SFP0ptr = wmrSFP0 + offset;'
+        s2: str = 'x0 = (*SFP0ptr++) * YTmp;'
+        s3: str = 'x0 += (*SFP0ptr++) * YTmp;'
+        s4: str = 'x[*t_f] += w * x0;'
+        s5: str = ''
+
+        for i in range(0, 20):
+            if i > 0:
+                if i > 1:
+                    s5 = f'{i}*'
+                s1 += f'''\
+
+                        SFP{i}ptr = wmrSFP{i} + offset;'''
+                s2 += f'''\
+
+                        x{i} = (*SFP{i}ptr++) * YTmp;'''
+                s3 += f'''\
+
+                        x{i} += (*SFP{i}ptr++) * YTmp;'''
+                s4 += f'''\
+
+                        x[*t_f+{s5}nF] += w * x{i};'''
+            s += f'''\
+
+            case {i + 1}:
+                while (t_v != t_vEnd)
+                {{
+                    if (*t_t == id)
+                    {{
+                        YPtr = Y + nS * (*t_v);
+                        YPtrEnd = YPtr + nS;
+                        w = (double)(*t_l);
+                        offset = nS * (*t_o);
+                        YTmp = *YPtr;
+                        {s1}
+                        {s2}
+                        while (++YPtr != YPtrEnd)
+                        {{
+                            YTmp = *YPtr;
+                            {s3}
+                        }}
+                        {s4}
+                    }}
+                    t_f++;
+                    t_v++;
+                    t_o++;
+                    t_l++;
+                    t_t++;
+                }}
+                break;'''
+
+        s += '''\
+
+        }
+    }\n\n'''
+        return s
+
+    def add_extracellular_compartments() -> str:
+        s: str = '''\
+    // extra-cellular compartments
+    if (nEC > 0)
+    {
+        t_v = ECv + ECthreadsT[id];
+        t_vEnd = ECv + ECthreadsT[id+1];
+        t_o = ECo + ECthreadsT[id];
+        xPtr0 = x + nIC*nF + ECthreadsT[id];
+        switch (nEC)
+        {'''
+        s1: str = ''
+        s2: str = 'SFP0ptr = wmhSFP0 + offset;'
+        s3: str = 'x0 = (*SFP0ptr++) * YTmp;'
+        s4: str = 'x0 += (*SFP0ptr++) * YTmp;'
+        s5: str = '(*xPtr0++) += x0;'
+
+        for i in range(0, 20):
+            if i > 0:
+                s1 += f'''\
+
+                xPtr{i} = xPtr{i - 1} + nE;'''
+                s2 += f'''\
+
+                    SFP{i}ptr = wmhSFP{i} + offset;'''
+                s3 += f'''\
+
+                    x{i} = (*SFP{i}ptr++) * YTmp;'''
+                s4 += f'''\
+
+                    x{i} += (*SFP{i}ptr++) * YTmp;'''
+                s5 += f'''\
+
+                    (*xPtr{i}++) += x{i};'''
+            s += f'''\
+
+            case {i + 1}:{s1}
+                while (t_v != t_vEnd)
+                {{
+                    YPtr = Y + nS * (*t_v);
+                    YPtrEnd = YPtr + nS;
+                    offset = nS * (*t_o);
+                    YTmp = *YPtr;
+                    {s2}
+                    {s3}
+                    while (++YPtr != YPtrEnd)
+                    {{
+                        YTmp = *YPtr;
+                        {s4}
+                    }}
+                    {s5}
+                    t_v++;
+                    t_o++;
+                }}
+                break;'''
+
+        s += '''\
+
+        }
+    }\n\n'''
+        return s
+
+    def add_isotropic_compartments() -> str:
+        s: str = '''\
+    // isotropic compartments
+    if (nISO > 0)
+    {
+        t_v = ISOv + ISOthreadsT[id];
+        t_vEnd = ISOv + ISOthreadsT[id+1];
+        xPtr0 = x + nIC*nF + nEC*nE + ISOthreadsT[id];
+        switch (nISO)
+        {'''
+        s1: str = ''
+        s2: str = 'SFP0ptr = isoSFP0;'
+        s3: str = 'x0 = (*SFP0ptr++) * YTmp;'
+        s4: str = 'x0 += (*SFP0ptr++) * YTmp;'
+        s5: str = '(*xPtr0++) += x0;'
+
+        for i in range(0, 20):
+            if i > 0:
+                s1 += f'''\
+
+                xPtr{i} = xPtr{i - 1} + nV;'''
+                s2 += f'''\
+
+                    SFP{i}ptr = isoSFP{i};'''
+                s3 += f'''\
+
+                    x{i} = (*SFP{i}ptr++) * YTmp;'''
+                s4 += f'''\
+
+                    x{i} += (*SFP{i}ptr++) * YTmp;'''
+                s5 += f'''\
+
+                    (*xPtr{i}++) += x{i};'''
+            s += f'''\
+
+            case {i + 1}:{s1}
+                while (t_v != t_vEnd)
+                {{
+                    YPtr = Y + nS * (*t_v);
+                    YPtrEnd = YPtr + nS;
+                    YTmp = *YPtr;
+                    {s2}
+                    {s3}
+                    while (++YPtr != YPtrEnd)
+                    {{
+                        YTmp = *YPtr;
+                        {s4}
+                    }}
+                    {s5}
+                    t_v++;
+                }}
+                break;'''
+
+        s += '''\
+
+        }
+    }\n\n'''
+        return s
+
+    s: str = '''\
+//
+// Compute a sub-block of the At*y MATRIX-VECTOR product
+//
+void* COMMIT_At__block( void *ptr )
+{
+    int      id = (long)ptr;
+    int      offset;
+    double   x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w, YTmp;
+    double   *xPtr0, *xPtr1, *xPtr2, *xPtr3, *xPtr4, *xPtr5, *xPtr6, *xPtr7, *xPtr8, *xPtr9, *xPtr10, *xPtr11, *xPtr12, *xPtr13, *xPtr14, *xPtr15, *xPtr16, *xPtr17, *xPtr18, *xPtr19;
+    double   *YPtr, *YPtrEnd;
+    float    *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr;
+    uint32_t *t_v, *t_vEnd, *t_f;
+    uint16_t *t_o;
+    float    *t_l;
+    uint8_t  *t_t;\n\n'''
+    s += add_intracellular_compartments()
+    s += add_extracellular_compartments()
+    s += add_isotropic_compartments()
+    s += '''\
+    pthread_exit( 0 );
+}\n\n'''
+    return s
+
+
+def add_commit_at() -> str:
+    '''Adds the COMMIT_At function to the operator_c.c file.'''
+    def add_intracellular_compartments() -> str:
+        s: str = '''\
+    switch (nIC)
+    {'''
+        s1: str = 'wmrSFP0 = _wmrSFP;'
+
+        for i in range(0, 20):
+            if i > 0:
+                s1 += f'''\
+
+            wmrSFP{i} = wmrSFP{i - 1} + _ndirs*_nS;'''
+            s += f'''\
+
+        case {i + 1}:
+            {s1}
+            break;'''
+        s += '''\
+
+    }\n\n'''
+        return s
+
+    def add_extracellular_compartments() -> str:
+        s: str = '''\
+    switch (nEC)
+    {'''
+        s1: str = 'wmhSFP0 = _wmhSFP;'
+
+        for i in range(0, 20):
+            if i > 0:
+                s1 += f'''\
+
+            wmhSFP{i} = wmhSFP{i - 1} + _ndirs*_nS;'''
+            s += f'''\
+
+        case {i + 1}:
+            {s1}
+            break;'''
+        s += '''\
+
+    }\n\n'''
+        return s
+
+    def add_isotropic_compartments() -> str:
+        s: str = '''\
+    switch (nISO)
+    {'''
+        s1: str = 'isoSFP0 = _isoSFP;'
+
+        for i in range(0, 20):
+            if i > 0:
+                s1 += f'''\
+
+            isoSFP{i} = isoSFP{i - 1} + _nS;'''
+            s += f'''\
+
+        case {i + 1}:
+            {s1}
+            break;'''
+        s += '''\
+
+    }\n\n'''
+        return s
+
+    s: str = '''\
+//
+// Function called by Cython
+//
+void COMMIT_At(
+    int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs,
+    double *_vIN, double *_vOUT,
+    uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl,
+    uint32_t *_ECv, uint16_t *_ECo,
+    uint32_t *_ISOv,
+    float *_wmrSFP, float *_wmhSFP, float *_isoSFP,
+    uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT,
+    uint32_t _nIC, uint32_t _nEC, uint32_t _nISO, uint32_t _nThreads
+)
+{
+    nF = _nF;
+    n  = _n;
+    nE = _nE;
+    nV = _nV;
+    nS = _nS;
+    ndirs = _ndirs;
+
+    x = _vOUT;
+    Y = _vIN;
+
+    ICf  = _ICf;
+    ICv  = _ICv;
+    ICo  = _ICo;
+    ICl  = _ICl;
+    ECv  = _ECv;
+    ECo  = _ECo;
+    ISOv = _ISOv;
+
+    nIC = _nIC;
+    nEC = _nEC;
+    nISO = _nISO;\n\n'''
+    s += add_intracellular_compartments()
+    s += add_extracellular_compartments()
+    s += add_isotropic_compartments()
+    s += '''\
+    ICthreadsT  = _ICthreadsT;
+    ECthreadsT  = _ECthreadsT;
+    ISOthreadsT = _ISOthreadsT;
+
+    // Run SEPARATE THREADS to perform the multiplication
+    pthread_t threads[MAX_THREADS];
+    int t;
+    for(t=0; t<_nThreads ; t++)
+        pthread_create( &threads[t], NULL, COMMIT_At__block, (void *) (long int)t );
+    for(t=0; t<_nThreads ; t++)
+        pthread_join( threads[t], NULL );
+    return;
+}\n\n'''
+    return s
+
+
+def add_commit_a_block_nolut() -> str:
+    '''Adds the COMMIT_A__block_nolut function to the operator_c.c file.'''
+    def add_intracellular_compartments() -> str:
+        s: str = '''\
+    // intra-cellular compartments
+    t_v    = ICv + ICthreads[id];
+    t_vEnd = ICv + ICthreads[id+1];
+    t_l    = ICl + ICthreads[id];
+    t_f    = ICf + ICthreads[id];
+
+    while( t_v != t_vEnd )
+    {
+        x0 = x[*t_f];
+        if ( x0 != 0 )
+            Y[*t_v] += (double)(*t_l) * x0;
+        t_f++;
+        t_v++;
+        t_l++;
+    }\n\n'''
+        return s
+
+    def add_isotropic_compartments() -> str:
+        s: str = '''\
+    // isotropic compartments
+    if (nISO > 0)
+    {
+        t_v    = ISOv + ISOthreads[id];
+        t_vEnd = ISOv + ISOthreads[id+1];
+        xPtr   = x + nF + ISOthreads[id];
+
+        while( t_v != t_vEnd )
+        {
+            x0 = *xPtr++;
+            if ( x0 != 0 )
+                Y[*t_v] += x0;
+            t_v++;
+        }
+    }\n\n'''
+        return s
+
+    s: str = '''\
+//
+// Compute a sub-block of the A*x MATRIX-VECTOR product
+//
+void* COMMIT_A__block_nolut( void *ptr )
+{
+    int      id = (long)ptr;
+    double   x0;
+    double   *xPtr;
+    uint32_t *t_v, *t_vEnd, *t_f;
+    float    *t_l;\n\n'''
+    s += add_intracellular_compartments()
+    s += add_isotropic_compartments()
+    s += '''\
+    pthread_exit( 0 );
+}\n\n'''
+    return s
+
+
+def add_commit_a_nolut() -> str:
+    '''Adds the COMMIT_A_nolut function to the operator_c.c file.'''
+    s: str = '''\
+//
+// Function called by Cython
+//
+void COMMIT_A_nolut(
+    int _nF,
+    double *_vIN, double *_vOUT,
+    uint32_t *_ICf, uint32_t *_ICv, float *_ICl,
+    uint32_t *_ISOv,
+    uint32_t* _ICthreads, uint32_t* _ISOthreads,
+    uint32_t _nISO, uint32_t _nThreads
+)
+{
+    nF = _nF;
+
+    x = _vIN;
+    Y = _vOUT;
+
+    ICf  = _ICf;
+    ICv  = _ICv;
+    ICl  = _ICl;
+    ISOv = _ISOv;
+
+    nISO = _nISO;
+
+    ICthreads  = _ICthreads;
+    ISOthreads = _ISOthreads;
+
+    // Run SEPARATE THREADS to perform the multiplication
+    pthread_t threads[MAX_THREADS];
+    int t;
+    for(t=0; t<_nThreads ; t++)
+        pthread_create( &threads[t], NULL, COMMIT_A__block_nolut, (void *) (long int)t );
+    for(t=0; t<_nThreads ; t++)
+        pthread_join( threads[t], NULL );
+    return;
+}\n\n'''
+    return s
+
+
+def add_commit_at_block_nolut() -> str:
+    '''Adds the COMMIT_At__block_nolut function to the operator_c.c file.'''
+    def add_intracellular_compartments() -> str:
+        s: str = '''\
+    // intra-cellular compartments
+    t_v    = ICv;
+    t_vEnd = ICv + n;
+    t_l    = ICl;
+    t_f    = ICf;
+    t_t    = ICthreadsT;
+
+    while( t_v != t_vEnd )
+    {
+        // in this case, I need to walk throug because the segments are ordered in "voxel order"
+        if ( *t_t == id )
+            x[*t_f] += (double)(*t_l) * Y[*t_v];
+        t_t++;
+        t_f++;
+        t_v++;
+        t_l++;
+    }\n\n'''
+        return s
+
+    def add_isotropic_compartments() -> str:
+        s: str = '''\
+    // isotropic compartments
+    if (nISO > 0)
+    {
+        t_v    = ISOv + ISOthreadsT[id];
+        t_vEnd = ISOv + ISOthreadsT[id+1];
+        xPtr   = x + nF + ISOthreadsT[id];
+
+        while( t_v != t_vEnd )
+            (*xPtr++) += Y[*t_v++];
+    }\n\n'''
+        return s
+
+    s: str = '''\
+//
+// Compute a sub-block of the At*y MATRIX-VECTOR product
+//
+void* COMMIT_At__block_nolut( void *ptr )
+{
+    int      id = (long)ptr;
+    double   *xPtr;
+    uint32_t *t_v, *t_vEnd, *t_f;
+    float    *t_l;
+    uint8_t  *t_t;\n\n'''
+    s += add_intracellular_compartments()
+    s += add_isotropic_compartments()
+    s += '''\
+
+    pthread_exit( 0 );
+}\n\n'''
+    return s
+
+
+def add_commit_at_nolut() -> str:
+    '''Adds the COMMIT_At_nolut function to the operator_c.c file.'''
+    s: str = '''\
+//
+// Function called by Cython
+//
+void COMMIT_At_nolut(
+    int _nF, int _n,
+    double *_vIN, double *_vOUT,
+    uint32_t *_ICf, uint32_t *_ICv, float *_ICl,
+    uint32_t *_ISOv,
+    uint8_t* _ICthreadsT, uint32_t* _ISOthreadsT,
+    uint32_t _nISO, uint32_t _nThreads
+)
+{
+    nF = _nF;
+    n  = _n;
+
+    x = _vOUT;
+    Y = _vIN;
+
+    ICf  = _ICf;
+    ICv  = _ICv;
+    ICl  = _ICl;
+    ISOv = _ISOv;
+
+    nISO = _nISO;
+
+    ICthreadsT  = _ICthreadsT;
+    ISOthreadsT = _ISOthreadsT;
+
+    // Run SEPARATE THREADS to perform the multiplication
+    pthread_t threads[MAX_THREADS];
+    int t;
+    for(t=0; t<_nThreads ; t++)
+        pthread_create( &threads[t], NULL, COMMIT_At__block_nolut, (void *) (long int)t );
+    for(t=0; t<_nThreads ; t++)
+        pthread_join( threads[t], NULL );
+    return;
+}\n'''
+    return s
+
+
+def add_commit() -> str:
+    '''Merge all the COMMIT functions.'''
+    s: str = ''
+    s += add_commit_a_block()
+    s += add_commit_a()
+    s += add_commit_at_block()
+    s += add_commit_at()
+    return s
+
+
+def add_commit_nolut() -> str:
+    '''Merge all the COMMIT_nolut functions.'''
+    s: str = ''
+    s += add_commit_a_block_nolut()
+    s += add_commit_a_nolut()
+    s += add_commit_at_block_nolut()
+    s += add_commit_at_nolut()
+    return s
+
+
+def write_operator_c_file() -> NoReturn:
+    '''Merge the COMMIT and COMMIT_nolut functions and write them to the operator_c.c file.'''
+    s: str = ''
+    s += add_imports()
+    s += add_global_variables()
+    s += add_commit()
+    s += add_commit_nolut()
+
+    with open(path_join('commit', 'operator', 'operator_c.c'), 'w') as f:
+        f.write(s)