diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml new file mode 100644 index 00000000..e44da58a --- /dev/null +++ b/.github/workflows/build_wheels.yml @@ -0,0 +1,189 @@ +name: Build wheels +run-name: Build wheels - ${{ github.sha }} +on: + push: + branches: + - 'master' + - 'release/**' +jobs: + build_windows_wheels: + strategy: + matrix: + py: [cp38, cp39, cp310, cp311, cp312] + arch: + - [AMD64, win_amd64, x64, x64, 64bit] + - [x86, win32, x86, Win32, 32bit] + name: ${{ matrix.py }}-${{ matrix.arch[1] }} + runs-on: windows-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Compile pthread-win32 + run: | + Import-Module 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\Microsoft.VisualStudio.DevShell.dll' + Enter-VsDevShell -VsInstallPath 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise' -DevCmdArguments '-arch=x64' -StartInPath 'C:\' + git clone https://github.com/GerHobbelt/pthread-win32.git + cd C:\pthread-win32\windows\VS2022 + msbuild .\pthread.2022.sln -t:pthread_static_lib -p:Configuration=Release,Platform=${{ matrix.arch[3] }} + cd C:\ + mkdir C:\pthread-win32_static_lib + mkdir C:\pthread-win32_static_lib\include + mkdir C:\pthread-win32_static_lib\lib + cp C:\pthread-win32\windows\VS2022\bin\Release-Unicode-${{ matrix.arch[4] }}-${{ matrix.arch[2] }}\pthread_static_lib.lib C:\pthread-win32_static_lib\lib\pthread.lib + cp C:\pthread-win32\_ptw32.h C:\pthread-win32_static_lib\include + cp C:\pthread-win32\pthread.h C:\pthread-win32_static_lib\include + cp C:\pthread-win32\sched.h C:\pthread-win32_static_lib\include + cp C:\pthread-win32\semaphore.h C:\pthread-win32_static_lib\include + + - name: Build wheel ${{ matrix.py }}-${{ matrix.arch[1] }} + uses: pypa/cibuildwheel@v2.19.1 + env: + PTHREAD_WIN_INCLUDE: C:\pthread-win32_static_lib\include + PTHREAD_WIN_LIB: C:\pthread-win32_static_lib\lib + CIBW_PLATFORM: windows + CIBW_BUILD: ${{ matrix.py }}-${{ matrix.arch[1] }} + CIBW_ARCHS_WINDOWS: ${{ matrix.arch[0] }} + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: wheels_${{ matrix.py }}_${{ matrix.arch[1] }} + path: ./wheelhouse/*.whl + if-no-files-found: error + + build_macos_wheels: + strategy: + matrix: + config: + [ + { + py: cp38, + arch: [x86_64, macosx_x86_64, 12.0, macos-12] + }, + { + py: cp39, + arch: [x86_64, macosx_x86_64, 12.0, macos-12] + }, + { + py: cp310, + arch: [x86_64, macosx_x86_64, 12.0, macos-12] + }, + { + py: cp311, + arch: [x86_64, macosx_x86_64, 12.0, macos-12] + }, + { + py: cp312, + arch: [x86_64, macosx_x86_64, 12.0, macos-12] + }, + { + py: cp38, + arch: [arm64, macosx_arm64, 12.0, macos-14] + }, + { + py: cp39, + arch: [arm64, macosx_arm64, 12.0, macos-14] + }, + { + py: cp310, + arch: [arm64, macosx_arm64, 12.0, macos-14] + }, + { + py: cp311, + arch: [arm64, macosx_arm64, 12.0, macos-14] + }, + { + py: cp312, + arch: [arm64, macosx_arm64, 12.0, macos-14] + } + ] + name: ${{ matrix.config.py }}-${{ matrix.config.arch[1] }} + runs-on: ${{ matrix.config.arch[3] }} + if: + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install pipx # NOTE: required only for arm64 + if: startsWith(matrix.config.arch[0], 'arm64') + run: | + brew install pipx + + - name: Build wheel ${{ matrix.config.py }}-${{ matrix.config.arch[1] }} + uses: pypa/cibuildwheel@v2.19.1 + env: + MACOSX_DEPLOYMENT_TARGET: ${{ matrix.config.arch[2] }} + CIBW_PLATFORM: macos + CIBW_BUILD: ${{ matrix.config.py }}-${{ matrix.config.arch[1] }} + CIBW_ARCHS_MACOS: ${{ matrix.config.arch[0] }} + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: wheels_${{ matrix.config.py }}_${{ matrix.config.arch[1] }} + path: ./wheelhouse/*.whl + if-no-files-found: error + + build_linux_wheels: + strategy: + matrix: + py: [cp38, cp39, cp310, cp311, cp312] + arch: + - [x86_64, manylinux_x86_64, amd64] + - [aarch64, manylinux_aarch64, arm64] + name: ${{ matrix.py }}-${{ matrix.arch[1] }} + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3.0.0 + with: + platforms: ${{ matrix.arch[2] }} + + - name: Build wheel ${{ matrix.py }}-${{ matrix.arch[1] }} + uses: pypa/cibuildwheel@v2.19.1 + env: + CIBW_PLATFORM: linux + CIBW_BUILD: ${{ matrix.py }}-${{ matrix.arch[1] }} + CIBW_ARCHS_LINUX: ${{ matrix.arch[0] }} + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: wheels_${{ matrix.py }}_${{ matrix.arch[1] }} + path: ./wheelhouse/*.whl + if-no-files-found: error + + build_source_distribution: + name: sdist + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Build source distribution + run: | + pip install -U pip + pip install -U build + python -m build --sdist + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: sdist + path: ./dist/*.tar.gz + if-no-files-found: error + + run_id: + name: Create/Update WHEELS_ARTIFACTS_RUN_ID secret + runs-on: ubuntu-latest + needs: [build_windows_wheels, build_macos_wheels, build_linux_wheels, build_source_distribution] + steps: + - uses: actions/checkout@v4 + - run: | + gh secret set WHEELS_ARTIFACTS_RUN_ID --body ${{ github.run_id }} + env: + GH_TOKEN: ${{ secrets.GH_PAT }} diff --git a/.github/workflows/publish_on_pypi.yml b/.github/workflows/publish_on_pypi.yml new file mode 100644 index 00000000..bb020956 --- /dev/null +++ b/.github/workflows/publish_on_pypi.yml @@ -0,0 +1,56 @@ +name: Publish on PyPI +run-name: Publish on PyPI - ${{ github.sha }} +on: + release: + types: [published] +jobs: + publish_on_pypi: + name: Publish on PyPI + if: github.event.release.prerelease == false + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/project/dmri-commit + permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + steps: + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + github-token: ${{ secrets.GH_PAT }} + run-id: ${{ secrets.WHEELS_ARTIFACTS_RUN_ID }} + path: dist + merge-multiple: true + + - name: Publish on PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + skip-existing: true + verbose: true + print-hash: true + + publish_on_pypi_test: + name: Publish on PyPI Test + if: github.event.release.prerelease == true && contains(github.event.release.tag_name, 'rc') + runs-on: ubuntu-latest + environment: + name: testpypi + url: https://test.pypi.org/project/dmri-commit + permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + steps: + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + github-token: ${{ secrets.GH_PAT }} + run-id: ${{ secrets.WHEELS_ARTIFACTS_RUN_ID }} + path: dist + merge-multiple: true + + - name: Publish on PyPI Test + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + skip-existing: true + verbose: true + print-hash: true diff --git a/CHANGELOG.md b/CHANGELOG.md index 4032f88a..0a8fb89f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,20 @@ # Change Log ### All notable changes to `COMMIT` will be documented in this file. +## `v2.3.0`
_2024-07-04_ +### ✨Added +- Added support for Windows (requires the `pthread-win32` library) +- Precompiled wheels for Windows, MacOS, and Linux are now available on PyPI + +### 🛠️Changed +- `operator.pyx` no more compiled at runtime +- Restict the `numpy` version to `<2.0.0` + +### 🐛Fixed +- Improved output when running from Jupyter notebooks + +--- +--- ## `v2.2.0`
_2024-04-12_ ### 🐛Fixed diff --git a/MANIFEST.in b/MANIFEST.in index 46c7147b..e7ff52f8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,3 +2,4 @@ recursive-include commit *.c recursive-include commit *.cpp recursive-include commit *.h recursive-include commit *pyx +include setup_operator.py diff --git a/commit/core.pyx b/commit/core.pyx old mode 100755 new mode 100644 index 3454d46c..27656767 --- a/commit/core.pyx +++ b/commit/core.pyx @@ -27,6 +27,7 @@ from dicelib.utils import format_time import commit.models import commit.solvers +from commit.operator import operator logger = setup_logger('core') @@ -78,6 +79,7 @@ cdef class Evaluation : cdef public A cdef public regularisation_params cdef public x + cdef public x_nnls cdef public CONFIG cdef public temp_data cdef public confidence_map_img @@ -103,6 +105,7 @@ cdef class Evaluation : self.regularisation_params = None # set by "set_regularisation" method self.x = None # set by "fit" method self.confidence_map_img = None # set by "fit" method + self.x_nnls = None # set by "fit" method (coefficients of IC compartment estimated without regularization) self.verbose = 3 # store all the parameters of an evaluation with COMMIT @@ -223,8 +226,9 @@ cdef class Evaluation : if self.get_config('doNormalizeSignal') : if self.scheme.b0_count > 0: - logger.subinfo('Normalizing to b0:', with_progress=True, indent_char='*', indent_lvl=1) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo('Normalizing to b0:', with_progress=True, indent_char='*', indent_lvl=1) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): b0 = np.mean( self.niiDWI_img[:,:,:,self.scheme.b0_idx], axis=3 ) idx = b0 <= b0_min_signal * b0[b0>0].mean() b0[ idx ] = 1 @@ -352,8 +356,9 @@ cdef class Evaluation : tic = time.time() logger.subinfo('') logger.info( 'Loading the kernels' ) - logger.subinfo( 'Resampling LUT for subject "%s":' % self.get_config('subject'), indent_char='*', indent_lvl=1, with_progress=True ) # TODO: check why not printed - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo( 'Resampling LUT for subject "%s":' % self.get_config('subject'), indent_char='*', indent_lvl=1, with_progress=True ) # TODO: check why not printed + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): # auxiliary data structures idx_OUT, Ylm_OUT = amico.lut.aux_structures_resample( self.scheme, self.get_config('lmax') ) @@ -375,8 +380,9 @@ cdef class Evaluation : # De-mean kernels if self.get_config('doDemean') : - logger.subinfo('Demeaning signal', with_progress=True, indent_lvl=2, indent_char='-' ) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo('Demeaning signal', with_progress=True, indent_lvl=2, indent_char='-' ) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): for j in xrange(self.get_config('ndirs')) : for i in xrange(nIC) : self.KERNELS['wmr'][i,j,:] -= self.KERNELS['wmr'][i,j,:].mean() @@ -387,8 +393,9 @@ cdef class Evaluation : # Normalize atoms if self.get_config('doNormalizeKernels') : - logger.subinfo('Normalizing kernels', with_progress=True, indent_lvl=2, indent_char='-' ) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo('Normalizing kernels', with_progress=True, indent_lvl=2, indent_char='-' ) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): self.KERNELS['wmr_norm'] = np.zeros( nIC ) for i in xrange(nIC) : self.KERNELS['wmr_norm'][i] = np.linalg.norm( self.KERNELS['wmr'][i,0,:] ) @@ -458,8 +465,9 @@ cdef class Evaluation : # segments from the tracts # ------------------------ - logger.subinfo('Segments from the tracts:', indent_char='*', indent_lvl=1, with_progress=True ) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo('Segments from the tracts:', indent_char='*', indent_lvl=1, with_progress=True ) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): self.DICTIONARY['TRK'] = {} self.DICTIONARY['TRK']['kept'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_TRK_kept.dict'), dtype=np.uint8 ) self.DICTIONARY['TRK']['norm'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_TRK_norm.dict'), dtype=np.float32 ) @@ -498,8 +506,9 @@ cdef class Evaluation : # segments from the peaks # ----------------------- - logger.subinfo('Segments from the peaks:', indent_char='*', indent_lvl=1, with_progress=True ) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo('Segments from the peaks:', indent_char='*', indent_lvl=1, with_progress=True ) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): self.DICTIONARY['EC'] = {} self.DICTIONARY['EC']['v'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_v.dict'), dtype=np.uint32 ) self.DICTIONARY['EC']['o'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_o.dict'), dtype=np.uint16 ) @@ -515,8 +524,9 @@ cdef class Evaluation : # isotropic compartments # ---------------------- - logger.subinfo('Isotropic contributions:', indent_char='*', indent_lvl=1, with_progress=True ) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo('Isotropic contributions:', indent_char='*', indent_lvl=1, with_progress=True ) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): self.DICTIONARY['ISO'] = {} self.DICTIONARY['ISO']['v'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_ISO_v.dict'), dtype=np.uint32 ) @@ -533,8 +543,9 @@ cdef class Evaluation : # post-processing # --------------- - logger.subinfo('Post-processing', indent_char='*', indent_lvl=1, with_progress=True ) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo('Post-processing', indent_char='*', indent_lvl=1, with_progress=True ) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): # get the indices to extract the VOI as in MATLAB (in place of DICTIONARY.MASKidx) idx = self.DICTIONARY['MASK'].ravel(order='F').nonzero()[0] self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] = np.unravel_index( idx, self.DICTIONARY['MASK'].shape, order='F' ) @@ -557,7 +568,7 @@ cdef class Evaluation : ---------- n : integer Number of threads to use (default : number of CPUs in the system) - """ + """ if n is None : # Use the same number of threads used in trk2dictionary n = self.DICTIONARY['n_threads'] @@ -574,7 +585,7 @@ cdef class Evaluation : self.THREADS['n'] = n cdef : - long [:] C + long long [:] C long t, tot, i1, i2, N, c int i @@ -584,8 +595,9 @@ cdef class Evaluation : logger.subinfo('Number of threads: %d' % n , indent_char='*', indent_lvl=1 ) # Distribute load for the computation of A*x product - logger.subinfo('A operator ', indent_char='*', indent_lvl=1, with_progress=True ) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo('A operator ', indent_char='*', indent_lvl=1, with_progress=True ) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): if self.DICTIONARY['IC']['n'] > 0 : self.THREADS['IC'] = np.zeros( n+1, dtype=np.uint32 ) if n > 1 : @@ -636,8 +648,9 @@ cdef class Evaluation : # Distribute load for the computation of At*y product - logger.subinfo('A\' operator', indent_char='*', indent_lvl=1, with_progress=True ) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo('A\' operator', indent_char='*', indent_lvl=1, with_progress=True ) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): if self.DICTIONARY['IC']['n'] > 0 : self.THREADS['ICt'] = np.full( self.DICTIONARY['IC']['n'], n-1, dtype=np.uint8 ) if n > 1 : @@ -692,19 +705,9 @@ cdef class Evaluation : logger.info( f'[ {format_time(time.time() - tic)} ]' ) - def build_operator( self, build_dir=None ) : - """Compile/build the operator for computing the matrix-vector multiplications by A and A' + def build_operator( self ) : + """Build the operator for computing the matrix-vector multiplications by A and A' using the informations from self.DICTIONARY, self.KERNELS and self.THREADS. - NB: needs to call this function to update pointers to data structures in case - the data is changed in self.DICTIONARY, self.KERNELS or self.THREADS. - - Parameters - ---------- - build_dir : string - The folder in which to store the compiled files. - If None (default), they will end up in the .pyxbld directory in the user’s home directory. - If using this option, it is recommended to use a temporary directory, quit your python - console between each build, and delete the content of the temporary directory. """ if self.DICTIONARY is None : logger.error( 'Dictionary not loaded; call "load_dictionary()" first' ) @@ -722,49 +725,7 @@ cdef class Evaluation : logger.subinfo('') logger.info( 'Building linear operator A' ) - # need to pass these parameters at runtime for compiling the C code - from commit.operator import config - - compilation_is_needed = False - - if config.nTHREADS is None or config.nTHREADS != self.THREADS['n']: - compilation_is_needed = True - if config.nIC is None or config.nIC != self.KERNELS['wmr'].shape[0]: - compilation_is_needed = True - if config.model is None or config.model != self.model.id: - compilation_is_needed = True - if config.nEC is None or config.nEC != self.KERNELS['wmh'].shape[0]: - compilation_is_needed = True - if config.nISO is None or config.nISO != self.KERNELS['iso'].shape[0]: - compilation_is_needed = True - if config.build_dir != build_dir: - compilation_is_needed = True - - if compilation_is_needed or not 'commit.operator.operator' in sys.modules : - - if build_dir is not None: - if isdir(build_dir) and not len(listdir(build_dir)) == 0: - logger.error( '\nbuild_dir is not empty, unsafe build option.' ) - elif config.nTHREADS is not None: - logger.error( '\nThe parameter build_dir has changed, unsafe build option.' ) - else: - logger.warning( '\nUsing build_dir, always quit your python console between COMMIT Evaluation.' ) - - config.nTHREADS = self.THREADS['n'] - config.model = self.model.id - config.nIC = self.KERNELS['wmr'].shape[0] - config.nEC = self.KERNELS['wmh'].shape[0] - config.nISO = self.KERNELS['iso'].shape[0] - config.build_dir = build_dir - - sys.dont_write_bytecode = True - pyximport.install( reload_support=True, language_level=3, build_dir=build_dir, build_in_temp=True, inplace=False ) - - if 'commit.operator.operator' in sys.modules : - del sys.modules['commit.operator.operator'] - import commit.operator.operator - - self.A = commit.operator.operator.LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS ) + self.A = operator.LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, True if hasattr(self.model, 'nolut') else False ) logger.info( f'[ {format_time(time.time() - tic)} ]' ) @@ -869,7 +830,7 @@ cdef class Evaluation : This field can be specified only if regularisers[0] is 'group_lasso' or 'sparse_group_lasso'. NB: this array must have the same size as the number of groups in the IC compartment and contain only non-negative values. 'coeff_weights' - np.array(np.float64) : - weights associated to each individual element of the compartment (implemented for all compartments). + weights associated to each individual element of the compartment (for the moment implemented only for IC compartment). This field can be specified only if the chosen regulariser is 'lasso' or 'sparse_group_lasso'. NB: this array must have the same size as the number of elements in the compartment and contain only non-negative values. @@ -960,7 +921,7 @@ cdef class Evaluation : logger.error('Regularisation parameters for the IC compartment ara not exactly two') elif lambdas[0][0] < 0 or lambdas[0][1] < 0: logger.error('Regularisation parameters for the IC compartment must be greater than 0') - elif lambdas[0][0] == 0 or lambdas[0][1] == 0: + elif lambdas[0][0] == 0 and lambdas[0][1] == 0: logger.warning('Regularisation parameters for the IC compartment are both 0, the solution will be the same as the one without regularisation') regularisation['lambdaIC_perc'] = lambdas[0] else: @@ -972,7 +933,7 @@ cdef class Evaluation : # check if coeff_weights is consistent with the regularisation if regularisation['regIC'] not in ['lasso', 'sparse_group_lasso'] and dictIC_params is not None and 'coeff_weights' in dictIC_params: - logger.warning('Coefficients weights are allowed only for lasso and sparse_group_lasso regularisation') + logger.warning('Coefficients weights are allowed only for lasso and sparse_group_lasso regularisations. Ignoring "coeff_weights"') # check if coeff_weights is consistent with the compartment size if regularisation['regIC'] == 'lasso' or regularisation['regIC'] == 'sparse_group_lasso': @@ -981,7 +942,7 @@ cdef class Evaluation : logger.error('All coefficients weights must be non-negative') if dictIC_params['coeff_weights'].size != len(self.DICTIONARY['TRK']['kept']): logger.error(f'"coeff_weights" must have the same size as the number of elements in the IC compartment (got {dictIC_params["coeff_weights"].size} but {len(self.DICTIONARY["TRK"]["kept"])} expected)') - dictIC_params['coeff_weights'] = dictIC_params['coeff_weights'][self.DICTIONARY['TRK']['kept']==1] + dictIC_params['coeff_weights_kept'] = dictIC_params['coeff_weights'][self.DICTIONARY['TRK']['kept']==1] # check if group parameters are consistent with the regularisation if regularisation['regIC'] not in ['group_lasso', 'sparse_group_lasso'] and dictIC_params is not None: @@ -1058,24 +1019,25 @@ cdef class Evaluation : newweightsIC_group.append(weightsIC_group[count]) newweightsIC_group = np.array(newweightsIC_group, dtype=np.float64) - dictIC_params['group_idx'] = np.array(newICgroup_idx, dtype=np.object_) + dictIC_params['group_idx_kept'] = np.array(newICgroup_idx, dtype=np.object_) if weightsIC_group.size != newweightsIC_group.size: logger.warning(f"""\ Not all the original groups are kept. - {weightsIC_group.size - newweightsIC_group.size} groups have been removed because their streamlines didn't satify the criteria set in trk2dictionary.""") + {weightsIC_group.size - newweightsIC_group.size} groups have been removed because their streamlines didn't satisfy the criteria set in trk2dictionary.""") else: newweightsIC_group = weightsIC_group + dictIC_params['group_idx_kept'] = dictIC_params['group_idx'] # compute group weights if regularisation['regIC'] == 'group_lasso' or regularisation['regIC'] == 'sparse_group_lasso': if dictIC_params['group_weights_cardinality']: - group_size = np.array([g.size for g in dictIC_params['group_idx']], dtype=np.int32) + group_size = np.array([g.size for g in dictIC_params['group_idx_kept']], dtype=np.int32) newweightsIC_group *= np.sqrt(group_size) if dictIC_params['group_weights_adaptive']: - if self.x is None or self.regularisation_params['regIC'] is not None: + if self.x_nnls is None: #or self.regularisation_params['regIC'] is not None: logger.error('Group weights cannot be computed if the fit without regularisation has not been performed before') - x_nnls, _, _ = self.get_coeffs(get_normalized=False) - group_x_norm = np.array([np.linalg.norm(x_nnls[g])+1e-12 for g in dictIC_params['group_idx']], dtype=np.float64) + # x_nnls, _, _ = self.get_coeffs(get_normalized=False) + group_x_norm = np.array([np.linalg.norm(self.x_nnls[g])+1e-12 for g in dictIC_params['group_idx_kept']], dtype=np.float64) newweightsIC_group /= group_x_norm dictIC_params['group_weights'] = newweightsIC_group @@ -1083,21 +1045,24 @@ cdef class Evaluation : # update lambdas using lambda_max if regularisation['regIC'] == 'lasso': - if dictIC_params is not None and 'coeff_weights' in dictIC_params: - regularisation['lambdaIC_max'] = compute_lambda_max_lasso(regularisation['startIC'], regularisation['sizeIC'], dictIC_params['coeff_weights']) + if dictIC_params is not None and 'coeff_weights_kept' in dictIC_params: + regularisation['lambdaIC_max'] = compute_lambda_max_lasso(regularisation['startIC'], regularisation['sizeIC'], dictIC_params['coeff_weights_kept']) else: regularisation['lambdaIC_max'] = compute_lambda_max_lasso(regularisation['startIC'], regularisation['sizeIC'], np.ones(regularisation['sizeIC'], dtype=np.float64)) regularisation['lambdaIC'] = regularisation['lambdaIC_perc'] * regularisation['lambdaIC_max'] if regularisation['regIC'] == 'group_lasso': - regularisation['lambdaIC_max'] = compute_lambda_max_group(dictIC_params['group_weights'], dictIC_params['group_idx']) + regularisation['lambdaIC_max'] = compute_lambda_max_group(dictIC_params['group_weights'], dictIC_params['group_idx_kept']) regularisation['lambdaIC'] = regularisation['lambdaIC_perc'] * regularisation['lambdaIC_max'] if regularisation['regIC'] == 'sparse_group_lasso': - regularisation['lambdaIC_max'] = ( compute_lambda_max_lasso(regularisation['startIC'], regularisation['sizeIC']), compute_lambda_max_group(dictIC_params['group_weights'], dictIC_params['group_idx']) ) + if 'coeff_weights_kept' in dictIC_params: + regularisation['lambdaIC_max'] = ( compute_lambda_max_lasso(regularisation['startIC'], regularisation['sizeIC'], dictIC_params['coeff_weights_kept']), compute_lambda_max_group(dictIC_params['group_weights'], dictIC_params['group_idx_kept']) ) + else: + regularisation['lambdaIC_max'] = ( compute_lambda_max_lasso(regularisation['startIC'], regularisation['sizeIC'], np.ones(regularisation['sizeIC'], dtype=np.float64)), compute_lambda_max_group(dictIC_params['group_weights'], dictIC_params['group_idx_kept']) ) regularisation['lambdaIC'] = ( regularisation['lambdaIC_perc'][0] * regularisation['lambdaIC_max'][0], regularisation['lambdaIC_perc'][1] * regularisation['lambdaIC_max'][1] ) # print if regularisation['regIC'] is not None: - if (regularisation['regIC'] == 'lasso' or regularisation['regIC'] == 'sparse_group_lasso') and dictIC_params is not None and 'coeff_weights' in dictIC_params: + if (regularisation['regIC'] == 'lasso' or regularisation['regIC'] == 'sparse_group_lasso') and dictIC_params is not None and 'coeff_weights_kept' in dictIC_params: logger.subinfo( f'Regularisation type: {regularisation["regIC"]} (weighted version)', indent_lvl=2, indent_char='-' ) else: logger.subinfo( f'Regularisation type: {regularisation["regIC"]}', indent_lvl=2, indent_char='-' ) @@ -1108,7 +1073,7 @@ cdef class Evaluation : logger.debug( f'% lambda: {regularisation["lambdaIC_perc"]}' ) logger.debug( f'Lambda used: {regularisation["lambdaIC"]}' ) if regularisation['regIC'] == 'group_lasso' or regularisation['regIC'] == 'sparse_group_lasso': - logger.debug( f'Number of groups: {len(dictIC_params["group_idx"])}' ) + logger.debug( f'Number of groups: {len(dictIC_params["group_idx_kept"])}' ) if dictIC_params['group_weights_cardinality']==False and dictIC_params['group_weights_adaptive']==False and dictIC_params['group_weights_extra'] is None: logger.debug( 'Group weights are not considered (all ones)' ) else: @@ -1139,9 +1104,9 @@ cdef class Evaluation : regularisation['lambdaEC_perc'] = lambdas[1] else: regularisation['lambdaEC_perc'] = lambdas[1] - if dictEC_params is not None and 'coeff_weights' in dictEC_params: - if dictEC_params['coeff_weights'].size != regularisation['sizeEC']: - logger.error(f'"coeff_weights" must have the same size as the number of elements in the EC compartment (got {dictEC_params["coeff_weights"].size} but {regularisation["sizeEC"]} expected)') + # if dictEC_params is not None and 'coeff_weights' in dictEC_params: + # if dictEC_params['coeff_weights'].size != regularisation['sizeEC']: + # logger.error(f'"coeff_weights" must have the same size as the number of elements in the EC compartment (got {dictEC_params["coeff_weights"].size} but {regularisation["sizeEC"]} expected)') elif regularisation['regEC'] == 'smoothness': logger.error('Not yet implemented') elif regularisation['regEC'] == 'group_lasso': @@ -1155,18 +1120,18 @@ cdef class Evaluation : # update lambdas using lambda_max if regularisation['regEC'] == 'lasso': - if dictEC_params is not None and 'coeff_weights' in dictEC_params: - regularisation['lambdaEC_max'] = compute_lambda_max_lasso(regularisation['startEC'], regularisation['sizeEC'], dictEC_params['coeff_weights']) - else: - regularisation['lambdaEC_max'] = compute_lambda_max_lasso(regularisation['startEC'], regularisation['sizeEC'], np.ones(regularisation['sizeEC'], dtype=np.float64)) + # if dictEC_params is not None and 'coeff_weights' in dictEC_params: + # regularisation['lambdaEC_max'] = compute_lambda_max_lasso(regularisation['startEC'], regularisation['sizeEC'], dictEC_params['coeff_weights']) + # else: + regularisation['lambdaEC_max'] = compute_lambda_max_lasso(regularisation['startEC'], regularisation['sizeEC'], np.ones(regularisation['sizeEC'], dtype=np.float64)) regularisation['lambdaEC'] = regularisation['lambdaEC_perc'] * regularisation['lambdaEC_max'] # print if regularisation['regEC'] is not None: - if regularisation['regEC'] == 'lasso' and dictEC_params is not None and 'coeff_weights' in dictEC_params: - logger.subinfo( f'Regularisation type: {regularisation["regEC"]} (weighted version)', indent_lvl=2, indent_char='-' ) - else: - logger.subinfo( f'Regularisation type: {regularisation["regEC"]}', indent_lvl=2, indent_char='-' ) + # if regularisation['regEC'] == 'lasso' and dictEC_params is not None and 'coeff_weights' in dictEC_params: + # logger.subinfo( f'Regularisation type: {regularisation["regEC"]} (weighted version)', indent_lvl=2, indent_char='-' ) + # else: + logger.subinfo( f'Regularisation type: {regularisation["regEC"]}', indent_lvl=2, indent_char='-' ) logger.subinfo( f'Non-negativity constraint: {regularisation["nnEC"]}', indent_char='-', indent_lvl=2 ) @@ -1194,9 +1159,9 @@ cdef class Evaluation : regularisation['lambdaISO_perc'] = lambdas[2] else: regularisation['lambdaISO_perc'] = lambdas[2] - if dictISO_params is not None and 'coeff_weights' in dictISO_params: - if dictISO_params['coeff_weights'].size != regularisation['sizeISO']: - logger.error(f'"coeff_weights" must have the same size as the number of elements in the ISO compartment (got {dictISO_params["coeff_weights"].size} but {regularisation["sizeISO"]} expected)') + # if dictISO_params is not None and 'coeff_weights' in dictISO_params: + # if dictISO_params['coeff_weights'].size != regularisation['sizeISO']: + # logger.error(f'"coeff_weights" must have the same size as the number of elements in the ISO compartment (got {dictISO_params["coeff_weights"].size} but {regularisation["sizeISO"]} expected)') elif regularisation['regISO'] == 'smoothness': logger.error('Not yet implemented') elif regularisation['regISO'] == 'group_lasso': @@ -1210,18 +1175,18 @@ cdef class Evaluation : # update lambdas using lambda_max if regularisation['regISO'] == 'lasso': - if dictISO_params is not None and 'coeff_weights' in dictISO_params: - regularisation['lambdaISO_max'] = compute_lambda_max_lasso(regularisation['startISO'], regularisation['sizeISO'], dictISO_params['coeff_weights']) - else: - regularisation['lambdaISO_max'] = compute_lambda_max_lasso(regularisation['startISO'], regularisation['sizeISO'], np.ones(regularisation['sizeISO'], dtype=np.float64)) + # if dictISO_params is not None and 'coeff_weights' in dictISO_params: + # regularisation['lambdaISO_max'] = compute_lambda_max_lasso(regularisation['startISO'], regularisation['sizeISO'], dictISO_params['coeff_weights']) + # else: + regularisation['lambdaISO_max'] = compute_lambda_max_lasso(regularisation['startISO'], regularisation['sizeISO'], np.ones(regularisation['sizeISO'], dtype=np.float64)) regularisation['lambdaISO'] = regularisation['lambdaISO_perc'] * regularisation['lambdaISO_max'] # print if regularisation['regISO'] is not None: - if regularisation['regISO'] == 'lasso' and dictISO_params is not None and 'coeff_weights' in dictISO_params: - logger.subinfo( f'Regularisation type: {regularisation["regISO"]} (weighted version)', indent_lvl=2, indent_char='-' ) - else: - logger.subinfo( f'Regularisation type: {regularisation["regISO"]}', indent_lvl=2, indent_char='-' ) + # if regularisation['regISO'] == 'lasso' and dictISO_params is not None and 'coeff_weights' in dictISO_params: + # logger.subinfo( f'Regularisation type: {regularisation["regISO"]} (weighted version)', indent_lvl=2, indent_char='-' ) + # else: + logger.subinfo( f'Regularisation type: {regularisation["regISO"]}', indent_lvl=2, indent_char='-' ) logger.subinfo( f'Non-negativity constraint: {regularisation["nnISO"]}', indent_char='-', indent_lvl=2 ) if regularisation['regISO'] is not None: @@ -1353,12 +1318,15 @@ cdef class Evaluation : # run solver t = time.time() - with ProgressBar(disable=self.verbose!=3, hide_on_exit=True) as pb: + with ProgressBar(disable=self.verbose!=3, hide_on_exit=True): self.x, opt_details = commit.solvers.solve(self.get_y(), self.A, self.A.T, tol_fun=tol_fun, tol_x=tol_x, max_iter=max_iter, verbose=self.verbose, x0=x0, regularisation=self.regularisation_params, confidence_array=confidence_array) self.CONFIG['optimization']['fit_details'] = opt_details self.CONFIG['optimization']['fit_time'] = time.time()-t + if self.regularisation_params['regIC'] is None and self.x_nnls is None: + self.x_nnls, _, _ = self.get_coeffs(get_normalized=False) + logger.info( f'[ {format_time(self.CONFIG["optimization"]["fit_time"])} ]' ) @@ -1515,8 +1483,9 @@ cdef class Evaluation : # Map of compartment contributions logger.subinfo('Voxelwise contributions:', indent_char='*', indent_lvl=1) - logger.subinfo('Intra-axonal', indent_lvl=2, indent_char='-', with_progress=True) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo('Intra-axonal', indent_lvl=2, indent_char='-', with_progress=True) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): niiIC_img = np.zeros( self.get_config('dim'), dtype=np.float32 ) if len(self.KERNELS['wmr']) > 0 : offset = nF * self.KERNELS['wmr'].shape[0] @@ -1526,8 +1495,9 @@ cdef class Evaluation : ).astype(np.float32) niiIC_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv - logger.subinfo('Extra-axonal', indent_lvl=2, indent_char='-', with_progress=True) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo('Extra-axonal', indent_lvl=2, indent_char='-', with_progress=True) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): niiEC_img = np.zeros( self.get_config('dim'), dtype=np.float32 ) if len(self.KERNELS['wmh']) > 0 : offset = nF * self.KERNELS['wmr'].shape[0] @@ -1535,8 +1505,9 @@ cdef class Evaluation : xv = np.bincount( self.DICTIONARY['EC']['v'], weights=tmp, minlength=nV ).astype(np.float32) niiEC_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv - logger.subinfo('Isotropic ', indent_lvl=2, indent_char='-', with_progress=True) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo('Isotropic ', indent_lvl=2, indent_char='-', with_progress=True) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): niiISO_img = np.zeros( self.get_config('dim'), dtype=np.float32 ) if len(self.KERNELS['iso']) > 0 : offset = nF * self.KERNELS['wmr'].shape[0] + nE * self.KERNELS['wmh'].shape[0] @@ -1560,8 +1531,9 @@ cdef class Evaluation : # Configuration and results logger.subinfo('Configuration and results:', indent_char='*', indent_lvl=1) - logger.subinfo('streamline_weights.txt', indent_lvl=2, indent_char='-', with_progress=True) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo('streamline_weights.txt', indent_lvl=2, indent_char='-', with_progress=True) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): xic, _, _ = self.get_coeffs() if stat_coeffs != 'all' and xic.size > 0 : xic = np.reshape( xic, (-1,self.DICTIONARY['TRK']['kept'].size) ) @@ -1620,8 +1592,9 @@ cdef class Evaluation : # item 0: dictionary with all the configuration details # item 1: np.array obtained through the optimisation process with the normalised kernels # item 2: np.array renormalisation of coeffs in item 1 - logger.subinfo('results.pickle', indent_char='-', indent_lvl=2, with_progress=True) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo('results.pickle', indent_char='-', indent_lvl=2, with_progress=True) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): xic, xec, xiso = self.get_coeffs() x = self.x if self.get_config('doNormalizeKernels') : @@ -1631,9 +1604,10 @@ cdef class Evaluation : self.CONFIG['optimization']['regularisation'].pop('prox', None) pickle.dump( [self.CONFIG, x, self.x], fid, protocol=2 ) - if save_est_dwi : - logger.subinfo('Estimated signal:', indent_char='-', indent_lvl=2, with_progress=True) - with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=True) as pbar: + if save_est_dwi: + log_list = [] + ret_subinfo = logger.subinfo('Estimated signal:', indent_char='-', indent_lvl=2, with_progress=True) + with ProgressBar(disable=self.verbose < 3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ] = y_est nibabel.save( nibabel.Nifti1Image( self.niiDWI_img , affine ), pjoin(RESULTS_path,'fit_signal_estimated.nii.gz') ) self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ] = y_mea diff --git a/commit/models.py b/commit/models.py index 03741d84..105489bd 100755 --- a/commit/models.py +++ b/commit/models.py @@ -74,6 +74,7 @@ def __init__(self): self.name = 'Volume fractions' self.maps_name = [] self.maps_descr = [] + self.nolut = True def set(self): return diff --git a/commit/operator/config.py b/commit/operator/config.py deleted file mode 100755 index 8cbac4ed..00000000 --- a/commit/operator/config.py +++ /dev/null @@ -1,6 +0,0 @@ -nTHREADS = None -model = None -nIC = None -nEC = None -nISO = None -build_dir = None diff --git a/commit/operator/operator.pyx b/commit/operator/operator.pyx index 70f1d505..d3ffbdbc 100755 --- a/commit/operator/operator.pyx +++ b/commit/operator/operator.pyx @@ -7,13 +7,14 @@ from dicelib.ui import setup_logger # Interfaces to actual C code performing the multiplications cdef extern void COMMIT_A( - int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs, + int _nF, int _nE, int _nV, int _nS, int _ndirs, double *_v_in, double *_v_out, unsigned int *_ICf, unsigned int *_ICv, unsigned short *_ICo, float *_ICl, unsigned int *_ECv, unsigned short *_ECo, unsigned int *_ISOv, float *_wmrSFP, float *_wmhSFP, float *_isoSFP, - unsigned int* _ICthreads, unsigned int* _ECthreads, unsigned int* _ISOthreads + unsigned int* _ICthreads, unsigned int* _ECthreads, unsigned int* _ISOthreads, + unsigned int _nIC, unsigned int _nEC, unsigned int _nISO, unsigned int _nThreads ) nogil cdef extern void COMMIT_At( @@ -23,7 +24,26 @@ cdef extern void COMMIT_At( unsigned int *_ECv, unsigned short *_ECo, unsigned int *_ISOv, float *_wmrSFP, float *_wmhSFP, float *_isoSFP, - unsigned char *_ICthreadsT, unsigned int *_ECthreadsT, unsigned int *_ISOthreadsT + unsigned char* _ICthreadsT, unsigned int* _ECthreadsT, unsigned int* _ISOthreadsT, + unsigned int _nIC, unsigned int _nEC, unsigned int _nISO, unsigned int _nThreads +) nogil + +cdef extern void COMMIT_A_nolut( + int _nF, + double *_v_in, double *_v_out, + unsigned int *_ICf, unsigned int *_ICv, float *_ICl, + unsigned int *_ISOv, + unsigned int* _ICthreads, unsigned int* _ISOthreads, + unsigned int _nISO, unsigned int _nThreads +) nogil + +cdef extern void COMMIT_At_nolut( + int _nF, int _n, + double *_v_in, double *_v_out, + unsigned int *_ICf, unsigned int *_ICv, float *_ICl, + unsigned int *_ISOv, + unsigned char* _ICthreadsT, unsigned int* _ISOthreadsT, + unsigned int _nISO, unsigned int _nThreads ) nogil logger = setup_logger('operator') @@ -39,6 +59,7 @@ cdef class LinearOperator : cdef DICTIONARY cdef KERNELS cdef THREADS + cdef nolut cdef unsigned int* ICf cdef float* ICl @@ -61,11 +82,12 @@ cdef class LinearOperator : cdef unsigned int* ISOthreadsT - def __init__( self, DICTIONARY, KERNELS, THREADS ) : + def __init__( self, DICTIONARY, KERNELS, THREADS, nolut=False ) : """Set the pointers to the data structures used by the C code.""" self.DICTIONARY = DICTIONARY self.KERNELS = KERNELS self.THREADS = THREADS + self.nolut = nolut self.nF = DICTIONARY['IC']['nF'] # number of FIBERS self.nR = KERNELS['wmr'].shape[0] # number of FIBER RADII @@ -131,7 +153,7 @@ cdef class LinearOperator : @property def T( self ) : """Transpose of the explicit matrix.""" - C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS ) + C = LinearOperator( self.DICTIONARY, self.KERNELS, self.THREADS, self.nolut ) C.adjoint = 1 - C.adjoint return C @@ -166,27 +188,59 @@ cdef class LinearOperator : # Create output array cdef double [::1] v_out = np.zeros( self.shape[0], dtype=np.float64 ) + cdef unsigned int nthreads = self.THREADS['n'] + cdef unsigned int nIC = self.KERNELS['wmr'].shape[0] + cdef unsigned int nEC = self.KERNELS['wmh'].shape[0] + cdef unsigned int nISO = self.KERNELS['iso'].shape[0] + # Call the cython function to read the memory pointers if not self.adjoint : # DIRECT PRODUCT A*x - - with nogil : - COMMIT_A( - self.nF, self.n, self.nE, self.nV, self.nS, self.ndirs, - &v_in[0], &v_out[0], - self.ICf, self.ICv, self.ICo, self.ICl, self.ECv, self.ECo, self.ISOv, - self.LUT_IC, self.LUT_EC, self.LUT_ISO, - self.ICthreads, self.ECthreads, self.ISOthreads - ) + if self.nolut: + with nogil: + COMMIT_A_nolut( + self.nF, + &v_in[0], &v_out[0], + self.ICf, self.ICv, self.ICl, + self.ISOv, + self.ICthreads, self.ISOthreads, + nISO, nthreads + ) + else: + with nogil: + COMMIT_A( + self.nF, self.nE, self.nV, self.nS, self.ndirs, + &v_in[0], &v_out[0], + self.ICf, self.ICv, self.ICo, self.ICl, + self.ECv, self.ECo, + self.ISOv, + self.LUT_IC, self.LUT_EC, self.LUT_ISO, + self.ICthreads, self.ECthreads, self.ISOthreads, + nIC, nEC, nISO, nthreads + ) else : # INVERSE PRODUCT A'*y - with nogil : - COMMIT_At( - self.nF, self.n, self.nE, self.nV, self.nS, self.ndirs, - &v_in[0], &v_out[0], - self.ICf, self.ICv, self.ICo, self.ICl, self.ECv, self.ECo, self.ISOv, - self.LUT_IC, self.LUT_EC, self.LUT_ISO, - self.ICthreadsT, self.ECthreadsT, self.ISOthreadsT - ) + if self.nolut: + with nogil: + COMMIT_At_nolut( + self.nF, self.n, + &v_in[0], &v_out[0], + self.ICf, self.ICv, self.ICl, + self.ISOv, + self.ICthreadsT, self.ISOthreadsT, + nISO, nthreads + ) + else: + with nogil: + COMMIT_At( + self.nF, self.n, self.nE, self.nV, self.nS, self.ndirs, + &v_in[0], &v_out[0], + self.ICf, self.ICv, self.ICo, self.ICl, + self.ECv, self.ECo, + self.ISOv, + self.LUT_IC, self.LUT_EC, self.LUT_ISO, + self.ICthreadsT, self.ECthreadsT, self.ISOthreadsT, + nIC, nEC, nISO, nthreads + ) return v_out diff --git a/commit/operator/operator.pyxbld b/commit/operator/operator.pyxbld deleted file mode 100644 index f3967a15..00000000 --- a/commit/operator/operator.pyxbld +++ /dev/null @@ -1,39 +0,0 @@ -import numpy -from os import utime -from os.path import dirname, join -from setuptools import Extension - -# pass parameters to the compiler at runtime -# [TODO] find a way to avoid using this fake module -from commit.operator import config - - -def make_ext(modname, pyxfilename): - - if (config.nTHREADS is None or config.nTHREADS < 1 or config.nTHREADS > 255): - raise RuntimeError('config.nTHREADS must be between 1 and 255') - if (config.nIC is None or config.nIC < 0 or config.nIC > 20): - raise RuntimeError('config.nIC must be in the range [0..20]') - if (config.nEC is None or config.nEC < 0 or config.nEC > 20): - raise RuntimeError('config.nEC must be in the range [0..20]') - if (config.nISO is None or config.nISO < 0 or config.nISO > 20): - raise RuntimeError('config.nISO must be in the range [0..20]') - - # Force recompilation - if config.model == "VolumeFractions": - filename = "operator_noLUT.c" - else: - filename = "operator_withLUT.c" - path = dirname(pyxfilename) - - if config.build_dir is None: - utime( join(path,filename), None) - - return Extension(name=modname, - sources=[pyxfilename, join(path, filename)], - include_dirs=[numpy.get_include()], - define_macros=[('nTHREADS', config.nTHREADS), - ('nIC', config.nIC), - ('nEC', config.nEC), - ('nISO', config.nISO)], - extra_compile_args=['-w', '-O3', '-Ofast']) diff --git a/commit/operator/operator_noLUT.c b/commit/operator/operator_noLUT.c deleted file mode 100644 index 0046f237..00000000 --- a/commit/operator/operator_noLUT.c +++ /dev/null @@ -1,187 +0,0 @@ -#include -#include // uint32_t etc - -// number of THREADS -#ifdef nTHREADS - #if (nTHREADS<1 || nTHREADS>255) - #error "nTHREADS" must be in the range 1..255 - #endif -#else - #error "nTHREADS" parameter must be passed to the compiler as "-DnTHREADS=" -#endif - - -/* global variables */ -int nF, n; -double *x, *Y; -uint32_t *ICthreads, *ISOthreads; -uint8_t *ICthreadsT; -uint32_t *ISOthreadsT; -uint32_t *ICf, *ICv, *ISOv; -float *ICl; - - -// ==================================================== -// Compute a sub-block of the A*x MAtRIX-VECTOR product -// ==================================================== -void* COMMIT_A__block( void *ptr ) -{ - int id = (long)ptr; - double x0; - double *xPtr; - uint32_t *t_v, *t_vEnd, *t_f; - float *t_l; - - // intra-cellular compartments - t_v = ICv + ICthreads[id]; - t_vEnd = ICv + ICthreads[id+1]; - t_l = ICl + ICthreads[id]; - t_f = ICf + ICthreads[id]; - - while( t_v != t_vEnd ) - { - x0 = x[*t_f]; - if ( x0 != 0 ) - Y[*t_v] += (double)(*t_l) * x0; - t_f++; - t_v++; - t_l++; - } - -#if nISO>=1 - // isotropic compartments - t_v = ISOv + ISOthreads[id]; - t_vEnd = ISOv + ISOthreads[id+1]; - xPtr = x + nF + ISOthreads[id]; - - while( t_v != t_vEnd ) - { - x0 = *xPtr++; - if ( x0 != 0 ) - Y[*t_v] += x0; - t_v++; - } -#endif - - pthread_exit( 0 ); -} - - -// ========================= -// Function called by CYTHON -// ========================= -void COMMIT_A( - int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs, - double *_vIN, double *_vOUT, - uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl, - uint32_t *_ECv, uint16_t *_ECo, - uint32_t *_ISOv, - float *_wmrSFP, float *_wmhSFP, float *_isoSFP, - uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads -) -{ - nF = _nF; - n = _n; - - x = _vIN; - Y = _vOUT; - - ICf = _ICf; - ICv = _ICv; - ICl = _ICl; - ISOv = _ISOv; - - ICthreads = _ICthreads; - ISOthreads = _ISOthreads; - - // Run SEPARATE THREADS to perform the multiplication - pthread_t threads[nTHREADS]; - int t; - for(t=0; t=1 - // isotropic compartments - t_v = ISOv + ISOthreadsT[id]; - t_vEnd = ISOv + ISOthreadsT[id+1]; - xPtr = x + nF + ISOthreadsT[id]; - - while( t_v != t_vEnd ) - (*xPtr++) += Y[*t_v++]; -#endif - - pthread_exit( 0 ); -} - - -// ========================= -// Function called by CYTHON -// ========================= -void COMMIT_At( - int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs, - double *_vIN, double *_vOUT, - uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl, - uint32_t *_ECv, uint16_t *_ECo, - uint32_t *_ISOv, - float *_wmrSFP, float *_wmhSFP, float *_isoSFP, - uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT -) -{ - nF = _nF; - n = _n; - - x = _vOUT; - Y = _vIN; - - ICf = _ICf; - ICv = _ICv; - ICl = _ICl; - ISOv = _ISOv; - - ICthreadsT = _ICthreadsT; - ISOthreadsT = _ISOthreadsT; - - // Run SEPARATE THREADS to perform the multiplication - pthread_t threads[nTHREADS]; - int t; - for(t=0; t -#include // uint32_t etc - -// number of THREADS -#ifdef nTHREADS - #if (nTHREADS<1 || nTHREADS>255) - #error "nTHREADS" must be in the range 1..255 - #endif -#else - #error "nTHREADS" parameter must be passed to the compiler as "-DnTHREADS=" -#endif - - -/* global variables */ -int nF, n, nE, nV, nS, ndirs; -double *x, *Y; -uint32_t *ICthreads, *ECthreads, *ISOthreads; -uint8_t *ICthreadsT; -uint32_t *ECthreadsT, *ISOthreadsT; -uint32_t *ICf, *ICv, *ECv, *ISOv; -uint16_t *ICo, *ECo; -float *ICl; -float *wmrSFP0, *wmrSFP1, *wmrSFP2, *wmrSFP3, *wmrSFP4, *wmrSFP5, *wmrSFP6, *wmrSFP7, *wmrSFP8, *wmrSFP9, *wmrSFP10, *wmrSFP11, *wmrSFP12, *wmrSFP13, *wmrSFP14, *wmrSFP15, *wmrSFP16, *wmrSFP17, *wmrSFP18, *wmrSFP19; -float *wmhSFP0, *wmhSFP1, *wmhSFP2, *wmhSFP3, *wmhSFP4, *wmhSFP5, *wmhSFP6, *wmhSFP7, *wmhSFP8, *wmhSFP9, *wmhSFP10, *wmhSFP11, *wmhSFP12, *wmhSFP13, *wmhSFP14, *wmhSFP15, *wmhSFP16, *wmhSFP17, *wmhSFP18, *wmhSFP19; -float *isoSFP0, *isoSFP1, *isoSFP2, *isoSFP3, *isoSFP4, *isoSFP5, *isoSFP6, *isoSFP7, *isoSFP8, *isoSFP9, *isoSFP10, *isoSFP11, *isoSFP12, *isoSFP13, *isoSFP14, *isoSFP15, *isoSFP16, *isoSFP17, *isoSFP18, *isoSFP19; - - - -// ==================================================== -// Compute a sub-block of the A*x MAtRIX-VECTOR product -// ==================================================== -void* COMMIT_A__block( void *ptr ) -{ - int id = (long)ptr; - int offset; - double x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w; - double *x_Ptr0, *x_Ptr1, *x_Ptr2, *x_Ptr3, *x_Ptr4, *x_Ptr5, *x_Ptr6, *x_Ptr7, *x_Ptr8, *x_Ptr9, *x_Ptr10, *x_Ptr11, *x_Ptr12, *x_Ptr13, *x_Ptr14, *x_Ptr15, *x_Ptr16, *x_Ptr17, *x_Ptr18, *x_Ptr19; - double *Yptr, *YptrEnd; - float *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr; - uint32_t *t_v, *t_vEnd, *t_f; - uint16_t *t_o; - float *t_l; - -#if nIC>=1 - // intra-cellular compartments - t_v = ICv + ICthreads[id]; - t_vEnd = ICv + ICthreads[id+1]; - t_o = ICo + ICthreads[id]; - t_l = ICl + ICthreads[id]; - t_f = ICf + ICthreads[id]; - - while( t_v != t_vEnd ) - { - x_Ptr0 = x + *t_f; - x0 = *x_Ptr0; - #if nIC>=2 - x_Ptr1 = x_Ptr0 + nF; - x1 = *x_Ptr1; - #endif - #if nIC>=3 - x_Ptr2 = x_Ptr1 + nF; - x2 = *x_Ptr2; - #endif - #if nIC>=4 - x_Ptr3 = x_Ptr2 + nF; - x3 = *x_Ptr3; - #endif - #if nIC>=5 - x_Ptr4 = x_Ptr3 + nF; - x4 = *x_Ptr4; - #endif - #if nIC>=6 - x_Ptr5 = x_Ptr4 + nF; - x5 = *x_Ptr5; - #endif - #if nIC>=7 - x_Ptr6 = x_Ptr5 + nF; - x6 = *x_Ptr6; - #endif - #if nIC>=8 - x_Ptr7 = x_Ptr6 + nF; - x7 = *x_Ptr7; - #endif - #if nIC>=9 - x_Ptr8 = x_Ptr7 + nF; - x8 = *x_Ptr8; - #endif - #if nIC>=10 - x_Ptr9 = x_Ptr8 + nF; - x9 = *x_Ptr9; - #endif - #if nIC>=11 - x_Ptr10 = x_Ptr9 + nF; - x10 = *x_Ptr10; - #endif - #if nIC>=12 - x_Ptr11 = x_Ptr10 + nF; - x11 = *x_Ptr11; - #endif - #if nIC>=13 - x_Ptr12 = x_Ptr11 + nF; - x12 = *x_Ptr12; - #endif - #if nIC>=14 - x_Ptr13 = x_Ptr12 + nF; - x13 = *x_Ptr13; - #endif - #if nIC>=15 - x_Ptr14 = x_Ptr13 + nF; - x14 = *x_Ptr14; - #endif - #if nIC>=16 - x_Ptr15 = x_Ptr14 + nF; - x15 = *x_Ptr15; - #endif - #if nIC>=17 - x_Ptr16 = x_Ptr15 + nF; - x16 = *x_Ptr16; - #endif - #if nIC>=18 - x_Ptr17 = x_Ptr16 + nF; - x17 = *x_Ptr17; - #endif - #if nIC>=19 - x_Ptr18 = x_Ptr17 + nF; - x18 = *x_Ptr18; - #endif - #if nIC>=20 - x_Ptr19 = x_Ptr18 + nF; - x19 = *x_Ptr19; - #endif - - if ( x0 != 0 - #if nIC>=2 - || x1 != 0 - #endif - #if nIC>=3 - || x2 != 0 - #endif - #if nIC>=4 - || x3 != 0 - #endif - #if nIC>=5 - || x4 != 0 - #endif - #if nIC>=6 - || x5 != 0 - #endif - #if nIC>=7 - || x6 != 0 - #endif - #if nIC>=8 - || x7 != 0 - #endif - #if nIC>=9 - || x8 != 0 - #endif - #if nIC>=10 - || x9 != 0 - #endif - #if nIC>=11 - || x10 != 0 - #endif - #if nIC>=12 - || x11 != 0 - #endif - #if nIC>=13 - || x12 != 0 - #endif - #if nIC>=14 - || x13 != 0 - #endif - #if nIC>=15 - || x14 != 0 - #endif - #if nIC>=16 - || x15 != 0 - #endif - #if nIC>=17 - || x16 != 0 - #endif - #if nIC>=18 - || x17 != 0 - #endif - #if nIC>=19 - || x18 != 0 - #endif - #if nIC>=20 - || x19 != 0 - #endif - ) - { - Yptr = Y + nS * (*t_v); - YptrEnd = Yptr + nS; - w = (double)(*t_l); - offset = nS * (*t_o); - SFP0ptr = wmrSFP0 + offset; - #if nIC>=2 - SFP1ptr = wmrSFP1 + offset; - #endif - #if nIC>=3 - SFP2ptr = wmrSFP2 + offset; - #endif - #if nIC>=4 - SFP3ptr = wmrSFP3 + offset; - #endif - #if nIC>=5 - SFP4ptr = wmrSFP4 + offset; - #endif - #if nIC>=6 - SFP5ptr = wmrSFP5 + offset; - #endif - #if nIC>=7 - SFP6ptr = wmrSFP6 + offset; - #endif - #if nIC>=8 - SFP7ptr = wmrSFP7 + offset; - #endif - #if nIC>=9 - SFP8ptr = wmrSFP8 + offset; - #endif - #if nIC>=10 - SFP9ptr = wmrSFP9 + offset; - #endif - #if nIC>=11 - SFP10ptr = wmrSFP10 + offset; - #endif - #if nIC>=12 - SFP11ptr = wmrSFP11 + offset; - #endif - #if nIC>=13 - SFP12ptr = wmrSFP12 + offset; - #endif - #if nIC>=14 - SFP13ptr = wmrSFP13 + offset; - #endif - #if nIC>=15 - SFP14ptr = wmrSFP14 + offset; - #endif - #if nIC>=16 - SFP15ptr = wmrSFP15 + offset; - #endif - #if nIC>=17 - SFP16ptr = wmrSFP16 + offset; - #endif - #if nIC>=18 - SFP17ptr = wmrSFP17 + offset; - #endif - #if nIC>=19 - SFP18ptr = wmrSFP18 + offset; - #endif - #if nIC>=20 - SFP19ptr = wmrSFP19 + offset; - #endif - - while( Yptr != YptrEnd ) - (*Yptr++) += w * ( - x0 * (*SFP0ptr++) - #if nIC>=2 - + x1 * (*SFP1ptr++) - #endif - #if nIC>=3 - + x2 * (*SFP2ptr++) - #endif - #if nIC>=4 - + x3 * (*SFP3ptr++) - #endif - #if nIC>=5 - + x4 * (*SFP4ptr++) - #endif - #if nIC>=6 - + x5 * (*SFP5ptr++) - #endif - #if nIC>=7 - + x6 * (*SFP6ptr++) - #endif - #if nIC>=8 - + x7 * (*SFP7ptr++) - #endif - #if nIC>=9 - + x8 * (*SFP8ptr++) - #endif - #if nIC>=10 - + x9 * (*SFP9ptr++) - #endif - #if nIC>=11 - + x10 * (*SFP10ptr++) - #endif - #if nIC>=12 - + x11 * (*SFP11ptr++) - #endif - #if nIC>=13 - + x12 * (*SFP12ptr++) - #endif - #if nIC>=14 - + x13 * (*SFP13ptr++) - #endif - #if nIC>=15 - + x14 * (*SFP14ptr++) - #endif - #if nIC>=16 - + x15 * (*SFP15ptr++) - #endif - #if nIC>=17 - + x16 * (*SFP16ptr++) - #endif - #if nIC>=18 - + x17 * (*SFP17ptr++) - #endif - #if nIC>=19 - + x18 * (*SFP18ptr++) - #endif - #if nIC>=20 - + x19 * (*SFP19ptr++) - #endif - ); - } - - t_f++; - t_v++; - t_o++; - t_l++; - } -#endif - -#if nEC>=1 - // extra-cellular compartments - t_v = ECv + ECthreads[id]; - t_vEnd = ECv + ECthreads[id+1]; - t_o = ECo + ECthreads[id]; - - x_Ptr0 = x + nIC*nF + ECthreads[id]; - #if nEC>=2 - x_Ptr1 = x_Ptr0 + nE; - #endif - #if nEC>=3 - x_Ptr2 = x_Ptr1 + nE; - #endif - #if nEC>=4 - x_Ptr3 = x_Ptr2 + nE; - #endif - #if nEC>=5 - x_Ptr4 = x_Ptr3 + nE; - #endif - #if nEC>=6 - x_Ptr5 = x_Ptr4 + nE; - #endif - #if nEC>=7 - x_Ptr6 = x_Ptr5 + nE; - #endif - #if nEC>=8 - x_Ptr7 = x_Ptr6 + nE; - #endif - #if nEC>=9 - x_Ptr8 = x_Ptr7 + nE; - #endif - #if nEC>=10 - x_Ptr9 = x_Ptr8 + nE; - #endif - #if nEC>=11 - x_Ptr10 = x_Ptr9 + nE; - #endif - #if nEC>=12 - x_Ptr11 = x_Ptr10 + nE; - #endif - #if nEC>=13 - x_Ptr12 = x_Ptr11 + nE; - #endif - #if nEC>=14 - x_Ptr13 = x_Ptr12 + nE; - #endif - #if nEC>=15 - x_Ptr14 = x_Ptr13 + nE; - #endif - #if nEC>=16 - x_Ptr15 = x_Ptr14 + nE; - #endif - #if nEC>=17 - x_Ptr16 = x_Ptr15 + nE; - #endif - #if nEC>=18 - x_Ptr17 = x_Ptr16 + nE; - #endif - #if nEC>=19 - x_Ptr18 = x_Ptr17 + nE; - #endif - #if nEC>=20 - x_Ptr19 = x_Ptr18 + nE; - #endif - - while( t_v != t_vEnd ) - { - x0 = *x_Ptr0++; - #if nEC>=2 - x1 = *x_Ptr1++; - #endif - #if nEC>=3 - x2 = *x_Ptr2++; - #endif - #if nEC>=4 - x3 = *x_Ptr3++; - #endif - #if nEC>=5 - x4 = *x_Ptr4++; - #endif - #if nEC>=6 - x5 = *x_Ptr5++; - #endif - #if nEC>=7 - x6 = *x_Ptr6++; - #endif - #if nEC>=8 - x7 = *x_Ptr7++; - #endif - #if nEC>=9 - x8 = *x_Ptr8++; - #endif - #if nEC>=10 - x9 = *x_Ptr9++; - #endif - #if nEC>=11 - x10 = *x_Ptr10++; - #endif - #if nEC>=12 - x11 = *x_Ptr11++; - #endif - #if nEC>=13 - x12 = *x_Ptr12++; - #endif - #if nEC>=14 - x13 = *x_Ptr13++; - #endif - #if nEC>=15 - x14 = *x_Ptr14++; - #endif - #if nEC>=16 - x15 = *x_Ptr15++; - #endif - #if nEC>=17 - x16 = *x_Ptr16++; - #endif - #if nEC>=18 - x17 = *x_Ptr17++; - #endif - #if nEC>=19 - x18 = *x_Ptr18++; - #endif - #if nEC>=20 - x19 = *x_Ptr19++; - #endif - if ( - x0 != 0 - #if nEC>=2 - || x1 != 0 - #endif - #if nEC>=3 - || x2 != 0 - #endif - #if nEC>=4 - || x3 != 0 - #endif - #if nEC>=5 - || x4 != 0 - #endif - #if nEC>=6 - || x5 != 0 - #endif - #if nEC>=7 - || x6 != 0 - #endif - #if nEC>=8 - || x7 != 0 - #endif - #if nEC>=9 - || x8 != 0 - #endif - #if nEC>=10 - || x9 != 0 - #endif - #if nEC>=11 - || x10 != 0 - #endif - #if nEC>=12 - || x11 != 0 - #endif - #if nEC>=13 - || x12 != 0 - #endif - #if nEC>=14 - || x13 != 0 - #endif - #if nEC>=15 - || x14 != 0 - #endif - #if nEC>=16 - || x15 != 0 - #endif - #if nEC>=17 - || x16 != 0 - #endif - #if nEC>=18 - || x17 != 0 - #endif - #if nEC>=19 - || x18 != 0 - #endif - #if nEC>=20 - || x19 != 0 - #endif - ) - { - Yptr = Y + nS * (*t_v); - YptrEnd = Yptr + nS; - offset = nS * (*t_o); - SFP0ptr = wmhSFP0 + offset; - #if nEC>=2 - SFP1ptr = wmhSFP1 + offset; - #endif - #if nEC>=3 - SFP2ptr = wmhSFP2 + offset; - #endif - #if nEC>=4 - SFP3ptr = wmhSFP3 + offset; - #endif - #if nEC>=5 - SFP4ptr = wmhSFP4 + offset; - #endif - #if nEC>=6 - SFP5ptr = wmhSFP5 + offset; - #endif - #if nEC>=7 - SFP6ptr = wmhSFP6 + offset; - #endif - #if nEC>=8 - SFP7ptr = wmhSFP7 + offset; - #endif - #if nEC>=9 - SFP8ptr = wmhSFP8 + offset; - #endif - #if nEC>=10 - SFP9ptr = wmhSFP9 + offset; - #endif - #if nEC>=11 - SFP10ptr = wmhSFP10 + offset; - #endif - #if nEC>=12 - SFP11ptr = wmhSFP11 + offset; - #endif - #if nEC>=13 - SFP12ptr = wmhSFP12 + offset; - #endif - #if nEC>=14 - SFP13ptr = wmhSFP13 + offset; - #endif - #if nEC>=15 - SFP14ptr = wmhSFP14 + offset; - #endif - #if nEC>=16 - SFP15ptr = wmhSFP15 + offset; - #endif - #if nEC>=17 - SFP16ptr = wmhSFP16 + offset; - #endif - #if nEC>=18 - SFP17ptr = wmhSFP17 + offset; - #endif - #if nEC>=19 - SFP18ptr = wmhSFP18 + offset; - #endif - #if nEC>=20 - SFP19ptr = wmhSFP19 + offset; - #endif - - while( Yptr != YptrEnd ) - (*Yptr++) += ( - x0 * (*SFP0ptr++) - #if nEC>=2 - + x1 * (*SFP1ptr++) - #endif - #if nEC>=3 - + x2 * (*SFP2ptr++) - #endif - #if nEC>=4 - + x3 * (*SFP3ptr++) - #endif - #if nEC>=5 - + x4 * (*SFP4ptr++) - #endif - #if nEC>=6 - + x5 * (*SFP5ptr++) - #endif - #if nEC>=7 - + x6 * (*SFP6ptr++) - #endif - #if nEC>=8 - + x7 * (*SFP7ptr++) - #endif - #if nEC>=9 - + x8 * (*SFP8ptr++) - #endif - #if nEC>=10 - + x9 * (*SFP9ptr++) - #endif - #if nEC>=11 - + x10 * (*SFP10ptr++) - #endif - #if nEC>=12 - + x11 * (*SFP11ptr++) - #endif - #if nEC>=13 - + x12 * (*SFP12ptr++) - #endif - #if nEC>=14 - + x13 * (*SFP13ptr++) - #endif - #if nEC>=15 - + x14 * (*SFP14ptr++) - #endif - #if nEC>=16 - + x15 * (*SFP15ptr++) - #endif - #if nEC>=17 - + x16 * (*SFP16ptr++) - #endif - #if nEC>=18 - + x17 * (*SFP17ptr++) - #endif - #if nEC>=19 - + x18 * (*SFP18ptr++) - #endif - #if nEC>=20 - + x19 * (*SFP19ptr++) - #endif - - ); - } - t_v++; - t_o++; - } -#endif - -#if nISO>=1 - // isotropic compartments - t_v = ISOv + ISOthreads[id]; - t_vEnd = ISOv + ISOthreads[id+1]; - - x_Ptr0 = x + nIC*nF + nEC*nE + ISOthreads[id]; - #if nISO>=2 - x_Ptr1 = x_Ptr0 + nV; - #endif - #if nISO>=3 - x_Ptr2 = x_Ptr1 + nV; - #endif - #if nISO>=4 - x_Ptr3 = x_Ptr2 + nV; - #endif - #if nISO>=5 - x_Ptr4 = x_Ptr3 + nV; - #endif - #if nISO>=6 - x_Ptr5 = x_Ptr4 + nV; - #endif - #if nISO>=7 - x_Ptr6 = x_Ptr5 + nV; - #endif - #if nISO>=8 - x_Ptr7 = x_Ptr6 + nV; - #endif - #if nISO>=9 - x_Ptr8 = x_Ptr7 + nV; - #endif - #if nISO>=10 - x_Ptr9 = x_Ptr8 + nV; - #endif - #if nISO>=11 - x_Ptr10 = x_Ptr9 + nV; - #endif - #if nISO>=12 - x_Ptr11 = x_Ptr10 + nV; - #endif - #if nISO>=13 - x_Ptr12 = x_Ptr11 + nV; - #endif - #if nISO>=14 - x_Ptr13 = x_Ptr12 + nV; - #endif - #if nISO>=15 - x_Ptr14 = x_Ptr13 + nV; - #endif - #if nISO>=16 - x_Ptr15 = x_Ptr14 + nV; - #endif - #if nISO>=17 - x_Ptr16 = x_Ptr15 + nV; - #endif - #if nISO>=18 - x_Ptr17 = x_Ptr16 + nV; - #endif - #if nISO>=19 - x_Ptr18 = x_Ptr17 + nV; - #endif - #if nISO>=20 - x_Ptr19 = x_Ptr18 + nV; - #endif - - while( t_v != t_vEnd ) - { - x0 = *x_Ptr0++; - #if nISO>=2 - x1 = *x_Ptr1++; - #endif - #if nISO>=3 - x2 = *x_Ptr2++; - #endif - #if nISO>=4 - x3 = *x_Ptr3++; - #endif - #if nISO>=5 - x4 = *x_Ptr4++; - #endif - #if nISO>=6 - x5 = *x_Ptr5++; - #endif - #if nISO>=7 - x6 = *x_Ptr6++; - #endif - #if nISO>=8 - x7 = *x_Ptr7++; - #endif - #if nISO>=9 - x8 = *x_Ptr8++; - #endif - #if nISO>=10 - x9 = *x_Ptr9++; - #endif - #if nISO>=11 - x10 = *x_Ptr10++; - #endif - #if nISO>=12 - x11 = *x_Ptr11++; - #endif - #if nISO>=13 - x12 = *x_Ptr12++; - #endif - #if nISO>=14 - x13 = *x_Ptr13++; - #endif - #if nISO>=15 - x14 = *x_Ptr14++; - #endif - #if nISO>=16 - x15 = *x_Ptr15++; - #endif - #if nISO>=17 - x16 = *x_Ptr16++; - #endif - #if nISO>=18 - x17 = *x_Ptr17++; - #endif - #if nISO>=19 - x18 = *x_Ptr18++; - #endif - #if nISO>=20 - x19 = *x_Ptr19++; - #endif - - if ( - x0 != 0 - #if nISO>=2 - || x1 != 0 - #endif - #if nISO>=3 - || x2 != 0 - #endif - #if nISO>=4 - || x3 != 0 - #endif - #if nISO>=5 - || x4 != 0 - #endif - #if nISO>=6 - || x5 != 0 - #endif - #if nISO>=7 - || x6 != 0 - #endif - #if nISO>=8 - || x7 != 0 - #endif - #if nISO>=9 - || x8 != 0 - #endif - #if nISO>=10 - || x9 != 0 - #endif - #if nISO>=11 - || x10 != 0 - #endif - #if nISO>=12 - || x11 != 0 - #endif - #if nISO>=13 - || x12 != 0 - #endif - #if nISO>=14 - || x13 != 0 - #endif - #if nISO>=15 - || x14 != 0 - #endif - #if nISO>=16 - || x15 != 0 - #endif - #if nISO>=17 - || x16 != 0 - #endif - #if nISO>=18 - || x17 != 0 - #endif - #if nISO>=19 - || x18 != 0 - #endif - #if nISO>=20 - || x19 != 0 - #endif - ) - { - Yptr = Y + nS * (*t_v); - YptrEnd = Yptr + nS; - SFP0ptr = isoSFP0; - #if nISO>=2 - SFP1ptr = isoSFP1; - #endif - #if nISO>=3 - SFP2ptr = isoSFP2; - #endif - #if nISO>=4 - SFP3ptr = isoSFP3; - #endif - #if nISO>=5 - SFP4ptr = isoSFP4; - #endif - #if nISO>=6 - SFP5ptr = isoSFP5; - #endif - #if nISO>=7 - SFP6ptr = isoSFP6; - #endif - #if nISO>=8 - SFP7ptr = isoSFP7; - #endif - #if nISO>=9 - SFP8ptr = isoSFP8; - #endif - #if nISO>=10 - SFP9ptr = isoSFP9; - #endif - #if nISO>=11 - SFP10ptr = isoSFP10; - #endif - #if nISO>=12 - SFP11ptr = isoSFP11; - #endif - #if nISO>=13 - SFP12ptr = isoSFP12; - #endif - #if nISO>=14 - SFP13ptr = isoSFP13; - #endif - #if nISO>=15 - SFP14ptr = isoSFP14; - #endif - #if nISO>=16 - SFP15ptr = isoSFP15; - #endif - #if nISO>=17 - SFP16ptr = isoSFP16; - #endif - #if nISO>=18 - SFP17ptr = isoSFP17; - #endif - #if nISO>=19 - SFP18ptr = isoSFP18; - #endif - #if nISO>=20 - SFP19ptr = isoSFP19; - #endif - - while( Yptr != YptrEnd ) - (*Yptr++) += ( - x0 * (*SFP0ptr++) - #if nISO>=2 - + x1 * (*SFP1ptr++) - #endif - #if nISO>=3 - + x2 * (*SFP2ptr++) - #endif - #if nISO>=4 - + x3 * (*SFP3ptr++) - #endif - #if nISO>=5 - + x4 * (*SFP4ptr++) - #endif - #if nISO>=6 - + x5 * (*SFP5ptr++) - #endif - #if nISO>=7 - + x6 * (*SFP6ptr++) - #endif - #if nISO>=8 - + x7 * (*SFP7ptr++) - #endif - #if nISO>=9 - + x8 * (*SFP8ptr++) - #endif - #if nISO>=10 - + x9 * (*SFP9ptr++) - #endif - #if nISO>=11 - + x10 * (*SFP10ptr++) - #endif - #if nISO>=12 - + x11 * (*SFP11ptr++) - #endif - #if nISO>=13 - + x12 * (*SFP12ptr++) - #endif - #if nISO>=14 - + x13 * (*SFP13ptr++) - #endif - #if nISO>=15 - + x14 * (*SFP14ptr++) - #endif - #if nISO>=16 - + x15 * (*SFP15ptr++) - #endif - #if nISO>=17 - + x16 * (*SFP16ptr++) - #endif - #if nISO>=18 - + x17 * (*SFP17ptr++) - #endif - #if nISO>=19 - + x18 * (*SFP18ptr++) - #endif - #if nISO>=20 - + x19 * (*SFP19ptr++) - #endif - ); - } - t_v++; - } -#endif - - pthread_exit( 0 ); -} - - -// ========================= -// Function called by CYTHON -// ========================= -void COMMIT_A( - int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs, - double *_vIN, double *_vOUT, - uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl, - uint32_t *_ECv, uint16_t *_ECo, - uint32_t *_ISOv, - float *_wmrSFP, float *_wmhSFP, float *_isoSFP, - uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads -) -{ - nF = _nF; - n = _n; - nE = _nE; - nV = _nV; - nS = _nS; - ndirs = _ndirs; - - x = _vIN; - Y = _vOUT; - - ICf = _ICf; - ICv = _ICv; - ICo = _ICo; - ICl = _ICl; - ECv = _ECv; - ECo = _ECo; - ISOv = _ISOv; - - #if nIC>=1 - wmrSFP0 = _wmrSFP; - #if nIC>=2 - wmrSFP1 = wmrSFP0 + _ndirs*_nS; - #if nIC>=3 - wmrSFP2 = wmrSFP1 + _ndirs*_nS; - #if nIC>=4 - wmrSFP3 = wmrSFP2 + _ndirs*_nS; - #if nIC>=5 - wmrSFP4 = wmrSFP3 + _ndirs*_nS; - #if nIC>=6 - wmrSFP5 = wmrSFP4 + _ndirs*_nS; - #if nIC>=7 - wmrSFP6 = wmrSFP5 + _ndirs*_nS; - #if nIC>=8 - wmrSFP7 = wmrSFP6 + _ndirs*_nS; - #if nIC>=9 - wmrSFP8 = wmrSFP7 + _ndirs*_nS; - #if nIC>=10 - wmrSFP9 = wmrSFP8 + _ndirs*_nS; - #if nIC>=11 - wmrSFP10 = wmrSFP9 + _ndirs*_nS; - #if nIC>=12 - wmrSFP11 = wmrSFP10 + _ndirs*_nS; - #if nIC>=13 - wmrSFP12 = wmrSFP11 + _ndirs*_nS; - #if nIC>=14 - wmrSFP13 = wmrSFP12 + _ndirs*_nS; - #if nIC>=15 - wmrSFP14 = wmrSFP13 + _ndirs*_nS; - #if nIC>=16 - wmrSFP15 = wmrSFP14 + _ndirs*_nS; - #if nIC>=17 - wmrSFP16 = wmrSFP15 + _ndirs*_nS; - #if nIC>=18 - wmrSFP17 = wmrSFP16 + _ndirs*_nS; - #if nIC>=19 - wmrSFP18 = wmrSFP17 + _ndirs*_nS; - #if nIC>=20 - wmrSFP19 = wmrSFP18 + _ndirs*_nS; - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #if nEC>=1 - wmhSFP0 = _wmhSFP; - #if nEC>=2 - wmhSFP1 = wmhSFP0 + _ndirs*_nS; - #if nEC>=3 - wmhSFP2 = wmhSFP1 + _ndirs*_nS; - #if nEC>=4 - wmhSFP3 = wmhSFP2 + _ndirs*_nS; - #if nEC>=5 - wmhSFP4 = wmhSFP3 + _ndirs*_nS; - #if nEC>=6 - wmhSFP5 = wmhSFP4 + _ndirs*_nS; - #if nEC>=7 - wmhSFP6 = wmhSFP5 + _ndirs*_nS; - #if nEC>=8 - wmhSFP7 = wmhSFP6 + _ndirs*_nS; - #if nEC>=9 - wmhSFP8 = wmhSFP7 + _ndirs*_nS; - #if nEC>=10 - wmhSFP9 = wmhSFP8 + _ndirs*_nS; - #if nEC>=11 - wmhSFP10 = wmhSFP9 + _ndirs*_nS; - #if nEC>=12 - wmhSFP11 = wmhSFP10 + _ndirs*_nS; - #if nEC>=13 - wmhSFP12 = wmhSFP11 + _ndirs*_nS; - #if nEC>=14 - wmhSFP13 = wmhSFP12 + _ndirs*_nS; - #if nEC>=15 - wmhSFP14 = wmhSFP13 + _ndirs*_nS; - #if nEC>=16 - wmhSFP15 = wmhSFP14 + _ndirs*_nS; - #if nEC>=17 - wmhSFP16 = wmhSFP15 + _ndirs*_nS; - #if nEC>=18 - wmhSFP17 = wmhSFP16 + _ndirs*_nS; - #if nEC>=19 - wmhSFP18 = wmhSFP17 + _ndirs*_nS; - #if nEC>=20 - wmhSFP19 = wmhSFP18 + _ndirs*_nS; - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #if nISO>=1 - isoSFP0 = _isoSFP; - #if nISO>=2 - isoSFP1 = isoSFP0 + _nS; - #if nISO>=3 - isoSFP2 = isoSFP1 + _nS; - #if nISO>=4 - isoSFP3 = isoSFP2 + _nS; - #if nISO>=5 - isoSFP4 = isoSFP3 + _nS; - #if nISO>=6 - isoSFP5 = isoSFP4 + _nS; - #if nISO>=7 - isoSFP6 = isoSFP5 + _nS; - #if nISO>=8 - isoSFP7 = isoSFP6 + _nS; - #if nISO>=9 - isoSFP8 = isoSFP7 + _nS; - #if nISO>=10 - isoSFP9 = isoSFP8 + _nS; - #if nISO>=11 - isoSFP10 = isoSFP9 + _nS; - #if nISO>=12 - isoSFP11 = isoSFP10 + _nS; - #if nISO>=13 - isoSFP12 = isoSFP11 + _nS; - #if nISO>=14 - isoSFP13 = isoSFP12 + _nS; - #if nISO>=15 - isoSFP14 = isoSFP13 + _nS; - #if nISO>=16 - isoSFP15 = isoSFP14 + _nS; - #if nISO>=17 - isoSFP16 = isoSFP15 + _nS; - #if nISO>=18 - isoSFP17 = isoSFP16 + _nS; - #if nISO>=19 - isoSFP18 = isoSFP17 + _nS; - #if nISO>=20 - isoSFP19 = isoSFP18 + _nS; - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - - ICthreads = _ICthreads; - ECthreads = _ECthreads; - ISOthreads = _ISOthreads; - - // Run SEPARATE THREADS to perform the multiplication - pthread_t threads[nTHREADS]; - int t; - for(t=0; t=1 - // intra-cellular compartments - t_v = ICv; - t_vEnd = ICv + n; - t_o = ICo; - t_l = ICl; - t_f = ICf; - t_t = ICthreadsT; - - while( t_v != t_vEnd ) - { - // in this case, I need to walk throug because the segments are ordered in "voxel order" - if ( *t_t == id ) - { - Yptr = Y + nS * (*t_v); - YptrEnd = Yptr + nS; - offset = nS * (*t_o); - - Y_tmp = *Yptr; - SFP0ptr = wmrSFP0 + offset; - x0 = (*SFP0ptr++) * Y_tmp; - #if nIC>=2 - SFP1ptr = wmrSFP1 + offset; - x1 = (*SFP1ptr++) * Y_tmp; - #endif - #if nIC>=3 - SFP2ptr = wmrSFP2 + offset; - x2 = (*SFP2ptr++) * Y_tmp; - #endif - #if nIC>=4 - SFP3ptr = wmrSFP3 + offset; - x3 = (*SFP3ptr++) * Y_tmp; - #endif - #if nIC>=5 - SFP4ptr = wmrSFP4 + offset; - x4 = (*SFP4ptr++) * Y_tmp; - #endif - #if nIC>=6 - SFP5ptr = wmrSFP5 + offset; - x5 = (*SFP5ptr++) * Y_tmp; - #endif - #if nIC>=7 - SFP6ptr = wmrSFP6 + offset; - x6 = (*SFP6ptr++) * Y_tmp; - #endif - #if nIC>=8 - SFP7ptr = wmrSFP7 + offset; - x7 = (*SFP7ptr++) * Y_tmp; - #endif - #if nIC>=9 - SFP8ptr = wmrSFP8 + offset; - x8 = (*SFP8ptr++) * Y_tmp; - #endif - #if nIC>=10 - SFP9ptr = wmrSFP9 + offset; - x9 = (*SFP9ptr++) * Y_tmp; - #endif - #if nIC>=11 - SFP10ptr = wmrSFP10 + offset; - x10 = (*SFP10ptr++) * Y_tmp; - #endif - #if nIC>=12 - SFP11ptr = wmrSFP11 + offset; - x11 = (*SFP11ptr++) * Y_tmp; - #endif - #if nIC>=13 - SFP12ptr = wmrSFP12 + offset; - x12 = (*SFP12ptr++) * Y_tmp; - #endif - #if nIC>=14 - SFP13ptr = wmrSFP13 + offset; - x13 = (*SFP13ptr++) * Y_tmp; - #endif - #if nIC>=15 - SFP14ptr = wmrSFP14 + offset; - x14 = (*SFP14ptr++) * Y_tmp; - #endif - #if nIC>=16 - SFP15ptr = wmrSFP15 + offset; - x15 = (*SFP15ptr++) * Y_tmp; - #endif - #if nIC>=17 - SFP16ptr = wmrSFP16 + offset; - x16 = (*SFP16ptr++) * Y_tmp; - #endif - #if nIC>=18 - SFP17ptr = wmrSFP17 + offset; - x17 = (*SFP17ptr++) * Y_tmp; - #endif - #if nIC>=19 - SFP18ptr = wmrSFP18 + offset; - x18 = (*SFP18ptr++) * Y_tmp; - #endif - #if nIC>=20 - SFP19ptr = wmrSFP19 + offset; - x19 = (*SFP19ptr++) * Y_tmp; - #endif - - while( ++Yptr != YptrEnd ) - { - Y_tmp = *Yptr; - x0 += (*SFP0ptr++) * Y_tmp; - #if nIC>=2 - x1 += (*SFP1ptr++) * Y_tmp; - #endif - #if nIC>=3 - x2 += (*SFP2ptr++) * Y_tmp; - #endif - #if nIC>=4 - x3 += (*SFP3ptr++) * Y_tmp; - #endif - #if nIC>=5 - x4 += (*SFP4ptr++) * Y_tmp; - #endif - #if nIC>=6 - x5 += (*SFP5ptr++) * Y_tmp; - #endif - #if nIC>=7 - x6 += (*SFP6ptr++) * Y_tmp; - #endif - #if nIC>=8 - x7 += (*SFP7ptr++) * Y_tmp; - #endif - #if nIC>=9 - x8 += (*SFP8ptr++) * Y_tmp; - #endif - #if nIC>=10 - x9 += (*SFP9ptr++) * Y_tmp; - #endif - #if nIC>=11 - x10 += (*SFP10ptr++) * Y_tmp; - #endif - #if nIC>=12 - x11 += (*SFP11ptr++) * Y_tmp; - #endif - #if nIC>=13 - x12 += (*SFP12ptr++) * Y_tmp; - #endif - #if nIC>=14 - x13 += (*SFP13ptr++) * Y_tmp; - #endif - #if nIC>=15 - x14 += (*SFP14ptr++) * Y_tmp; - #endif - #if nIC>=16 - x15 += (*SFP15ptr++) * Y_tmp; - #endif - #if nIC>=17 - x16 += (*SFP16ptr++) * Y_tmp; - #endif - #if nIC>=18 - x17 += (*SFP17ptr++) * Y_tmp; - #endif - #if nIC>=19 - x18 += (*SFP18ptr++) * Y_tmp; - #endif - #if nIC>=20 - x19 += (*SFP19ptr++) * Y_tmp; - #endif - } - - w = (double)(*t_l); - x[*t_f] += w * x0; - #if nIC>=2 - x[*t_f+nF] += w * x1; - #endif - #if nIC>=3 - x[*t_f+2*nF] += w * x2; - #endif - #if nIC>=4 - x[*t_f+3*nF] += w * x3; - #endif - #if nIC>=5 - x[*t_f+4*nF] += w * x4; - #endif - #if nIC>=6 - x[*t_f+5*nF] += w * x5; - #endif - #if nIC>=7 - x[*t_f+6*nF] += w * x6; - #endif - #if nIC>=8 - x[*t_f+7*nF] += w * x7; - #endif - #if nIC>=9 - x[*t_f+8*nF] += w * x8; - #endif - #if nIC>=10 - x[*t_f+9*nF] += w * x9; - #endif - #if nIC>=11 - x[*t_f+10*nF] += w * x10; - #endif - #if nIC>=12 - x[*t_f+11*nF] += w * x11; - #endif - #if nIC>=13 - x[*t_f+12*nF] += w * x12; - #endif - #if nIC>=14 - x[*t_f+13*nF] += w * x13; - #endif - #if nIC>=15 - x[*t_f+14*nF] += w * x14; - #endif - #if nIC>=16 - x[*t_f+15*nF] += w * x15; - #endif - #if nIC>=17 - x[*t_f+16*nF] += w * x16; - #endif - #if nIC>=18 - x[*t_f+17*nF] += w * x17; - #endif - #if nIC>=19 - x[*t_f+18*nF] += w * x18; - #endif - #if nIC>=20 - x[*t_f+19*nF] += w * x19; - #endif - } - - t_f++; - t_v++; - t_o++; - t_l++; - t_t++; - } -#endif - -#if nEC>=1 - // extra-cellular compartments - t_v = ECv + ECthreadsT[id]; - t_vEnd = ECv + ECthreadsT[id+1]; - t_o = ECo + ECthreadsT[id]; - - x_Ptr0 = x + nIC*nF + ECthreadsT[id]; - #if nEC>=2 - x_Ptr1 = x_Ptr0 + nE; - #endif - #if nEC>=3 - x_Ptr2 = x_Ptr1 + nE; - #endif - #if nEC>=4 - x_Ptr3 = x_Ptr2 + nE; - #endif - #if nEC>=5 - x_Ptr4 = x_Ptr3 + nE; - #endif - #if nEC>=6 - x_Ptr5 = x_Ptr4 + nE; - #endif - #if nEC>=7 - x_Ptr6 = x_Ptr5 + nE; - #endif - #if nEC>=8 - x_Ptr7 = x_Ptr6 + nE; - #endif - #if nEC>=9 - x_Ptr8 = x_Ptr7 + nE; - #endif - #if nEC>=10 - x_Ptr9 = x_Ptr8 + nE; - #endif - #if nEC>=11 - x_Ptr10 = x_Ptr9 + nE; - #endif - #if nEC>=12 - x_Ptr11 = x_Ptr10 + nE; - #endif - #if nEC>=13 - x_Ptr12 = x_Ptr11 + nE; - #endif - #if nEC>=14 - x_Ptr13 = x_Ptr12 + nE; - #endif - #if nEC>=15 - x_Ptr14 = x_Ptr13 + nE; - #endif - #if nEC>=16 - x_Ptr15 = x_Ptr14 + nE; - #endif - #if nEC>=17 - x_Ptr16 = x_Ptr15 + nE; - #endif - #if nEC>=18 - x_Ptr17 = x_Ptr16 + nE; - #endif - #if nEC>=19 - x_Ptr18 = x_Ptr17 + nE; - #endif - #if nEC>=20 - x_Ptr19 = x_Ptr18 + nE; - #endif - - while( t_v != t_vEnd ) - { - Yptr = Y + nS * (*t_v++); - YptrEnd = Yptr + nS; - offset = nS * (*t_o++); - - Y_tmp = *Yptr; - SFP0ptr = wmhSFP0 + offset; - x0 = (*SFP0ptr++) * Y_tmp; - #if nEC>=2 - SFP1ptr = wmhSFP1 + offset; - x1 = (*SFP1ptr++) * Y_tmp; - #endif - #if nEC>=3 - SFP2ptr = wmhSFP2 + offset; - x2 = (*SFP2ptr++) * Y_tmp; - #endif - #if nEC>=4 - SFP3ptr = wmhSFP3 + offset; - x3 = (*SFP3ptr++) * Y_tmp; - #endif - #if nEC>=5 - SFP4ptr = wmhSFP4 + offset; - x4 = (*SFP4ptr++) * Y_tmp; - #endif - #if nEC>=6 - SFP5ptr = wmhSFP5 + offset; - x5 = (*SFP5ptr++) * Y_tmp; - #endif - #if nEC>=7 - SFP6ptr = wmhSFP6 + offset; - x6 = (*SFP6ptr++) * Y_tmp; - #endif - #if nEC>=8 - SFP7ptr = wmhSFP7 + offset; - x7 = (*SFP7ptr++) * Y_tmp; - #endif - #if nEC>=9 - SFP8ptr = wmhSFP8 + offset; - x8 = (*SFP8ptr++) * Y_tmp; - #endif - #if nEC>=10 - SFP9ptr = wmhSFP9 + offset; - x9 = (*SFP9ptr++) * Y_tmp; - #endif - #if nEC>=11 - SFP10ptr = wmhSFP10 + offset; - x10 = (*SFP10ptr++) * Y_tmp; - #endif - #if nEC>=12 - SFP11ptr = wmhSFP11 + offset; - x11 = (*SFP11ptr++) * Y_tmp; - #endif - #if nEC>=13 - SFP12ptr = wmhSFP12 + offset; - x12 = (*SFP12ptr++) * Y_tmp; - #endif - #if nEC>=14 - SFP13ptr = wmhSFP13 + offset; - x13 = (*SFP13ptr++) * Y_tmp; - #endif - #if nEC>=15 - SFP14ptr = wmhSFP14 + offset; - x14 = (*SFP14ptr++) * Y_tmp; - #endif - #if nEC>=16 - SFP15ptr = wmhSFP15 + offset; - x15 = (*SFP15ptr++) * Y_tmp; - #endif - #if nEC>=17 - SFP16ptr = wmhSFP16 + offset; - x16 = (*SFP16ptr++) * Y_tmp; - #endif - #if nEC>=18 - SFP17ptr = wmhSFP17 + offset; - x17 = (*SFP17ptr++) * Y_tmp; - #endif - #if nEC>=19 - SFP18ptr = wmhSFP18 + offset; - x18 = (*SFP18ptr++) * Y_tmp; - #endif - #if nEC>=20 - SFP19ptr = wmhSFP19 + offset; - x19 = (*SFP19ptr++) * Y_tmp; - #endif - - while( ++Yptr != YptrEnd ) - { - Y_tmp = *Yptr; - x0 += (*SFP0ptr++) * Y_tmp; - #if nEC>=2 - x1 += (*SFP1ptr++) * Y_tmp; - #endif - #if nEC>=3 - x2 += (*SFP2ptr++) * Y_tmp; - #endif - #if nEC>=4 - x3 += (*SFP3ptr++) * Y_tmp; - #endif - #if nEC>=5 - x4 += (*SFP4ptr++) * Y_tmp; - #endif - #if nEC>=6 - x5 += (*SFP5ptr++) * Y_tmp; - #endif - #if nEC>=7 - x6 += (*SFP6ptr++) * Y_tmp; - #endif - #if nEC>=8 - x7 += (*SFP7ptr++) * Y_tmp; - #endif - #if nEC>=9 - x8 += (*SFP8ptr++) * Y_tmp; - #endif - #if nEC>=10 - x9 += (*SFP9ptr++) * Y_tmp; - #endif - #if nEC>=11 - x10 += (*SFP10ptr++) * Y_tmp; - #endif - #if nEC>=12 - x11 += (*SFP11ptr++) * Y_tmp; - #endif - #if nEC>=13 - x12 += (*SFP12ptr++) * Y_tmp; - #endif - #if nEC>=14 - x13 += (*SFP13ptr++) * Y_tmp; - #endif - #if nEC>=15 - x14 += (*SFP14ptr++) * Y_tmp; - #endif - #if nEC>=16 - x15 += (*SFP15ptr++) * Y_tmp; - #endif - #if nEC>=17 - x16 += (*SFP16ptr++) * Y_tmp; - #endif - #if nEC>=18 - x17 += (*SFP17ptr++) * Y_tmp; - #endif - #if nEC>=19 - x18 += (*SFP18ptr++) * Y_tmp; - #endif - #if nEC>=20 - x19 += (*SFP19ptr++) * Y_tmp; - #endif - } - (*x_Ptr0++) += x0; - #if nEC>=2 - (*x_Ptr1++) += x1; - #endif - #if nEC>=3 - (*x_Ptr2++) += x2; - #endif - #if nEC>=4 - (*x_Ptr3++) += x3; - #endif - #if nEC>=5 - (*x_Ptr4++) += x4; - #endif - #if nEC>=6 - (*x_Ptr5++) += x5; - #endif - #if nEC>=7 - (*x_Ptr6++) += x6; - #endif - #if nEC>=8 - (*x_Ptr7++) += x7; - #endif - #if nEC>=9 - (*x_Ptr8++) += x8; - #endif - #if nEC>=10 - (*x_Ptr9++) += x9; - #endif - #if nEC>=11 - (*x_Ptr10++) += x10; - #endif - #if nEC>=12 - (*x_Ptr11++) += x11; - #endif - #if nEC>=13 - (*x_Ptr12++) += x12; - #endif - #if nEC>=14 - (*x_Ptr13++) += x13; - #endif - #if nEC>=15 - (*x_Ptr14++) += x14; - #endif - #if nEC>=16 - (*x_Ptr15++) += x15; - #endif - #if nEC>=17 - (*x_Ptr16++) += x16; - #endif - #if nEC>=18 - (*x_Ptr17++) += x17; - #endif - #if nEC>=19 - (*x_Ptr18++) += x18; - #endif - #if nEC>=20 - (*x_Ptr19++) += x19; - #endif - } -#endif - -#if nISO>=1 - // isotropic compartments - t_v = ISOv + ISOthreadsT[id]; - t_vEnd = ISOv + ISOthreadsT[id+1]; - - x_Ptr0 = x + nIC*nF + nEC*nE + ISOthreadsT[id]; - #if nISO>=2 - x_Ptr1 = x_Ptr0 + nV; - #endif - #if nISO>=3 - x_Ptr2 = x_Ptr1 + nV; - #endif - #if nISO>=4 - x_Ptr3 = x_Ptr2 + nV; - #endif - #if nISO>=5 - x_Ptr4 = x_Ptr3 + nV; - #endif - #if nISO>=6 - x_Ptr5 = x_Ptr4 + nV; - #endif - #if nISO>=7 - x_Ptr6 = x_Ptr5 + nV; - #endif - #if nISO>=8 - x_Ptr7 = x_Ptr6 + nV; - #endif - #if nISO>=9 - x_Ptr8 = x_Ptr7 + nV; - #endif - #if nISO>=10 - x_Ptr9 = x_Ptr8 + nV; - #endif - #if nISO>=11 - x_Ptr10 = x_Ptr9 + nV; - #endif - #if nISO>=12 - x_Ptr11 = x_Ptr10 + nV; - #endif - #if nISO>=13 - x_Ptr12 = x_Ptr11 + nV; - #endif - #if nISO>=14 - x_Ptr13 = x_Ptr12 + nV; - #endif - #if nISO>=15 - x_Ptr14 = x_Ptr13 + nV; - #endif - #if nISO>=16 - x_Ptr15 = x_Ptr14 + nV; - #endif - #if nISO>=17 - x_Ptr16 = x_Ptr15 + nV; - #endif - #if nISO>=18 - x_Ptr17 = x_Ptr16 + nV; - #endif - #if nISO>=19 - x_Ptr18 = x_Ptr17 + nV; - #endif - #if nISO>=20 - x_Ptr19 = x_Ptr18 + nV; - #endif - - while( t_v != t_vEnd ) - { - Yptr = Y + nS * (*t_v++); - YptrEnd = Yptr + nS; - - SFP0ptr = isoSFP0; - #if nISO>=2 - SFP1ptr = isoSFP1; - #endif - #if nISO>=3 - SFP2ptr = isoSFP2; - #endif - #if nISO>=4 - SFP3ptr = isoSFP3; - #endif - #if nISO>=5 - SFP4ptr = isoSFP4; - #endif - #if nISO>=6 - SFP5ptr = isoSFP5; - #endif - #if nISO>=7 - SFP6ptr = isoSFP6; - #endif - #if nISO>=8 - SFP7ptr = isoSFP7; - #endif - #if nISO>=9 - SFP8ptr = isoSFP8; - #endif - #if nISO>=10 - SFP9ptr = isoSFP9; - #endif - #if nISO>=11 - SFP10ptr = isoSFP10; - #endif - #if nISO>=12 - SFP11ptr = isoSFP11; - #endif - #if nISO>=13 - SFP12ptr = isoSFP12; - #endif - #if nISO>=14 - SFP13ptr = isoSFP13; - #endif - #if nISO>=15 - SFP14ptr = isoSFP14; - #endif - #if nISO>=16 - SFP15ptr = isoSFP15; - #endif - #if nISO>=17 - SFP16ptr = isoSFP16; - #endif - #if nISO>=18 - SFP17ptr = isoSFP17; - #endif - #if nISO>=19 - SFP18ptr = isoSFP18; - #endif - #if nISO>=20 - SFP19ptr = isoSFP19; - #endif - - Y_tmp = *Yptr; - x0 = (*SFP0ptr++) * Y_tmp; - #if nISO>=2 - x1 = (*SFP1ptr++) * Y_tmp; - #endif - #if nISO>=3 - x2 = (*SFP2ptr++) * Y_tmp; - #endif - #if nISO>=4 - x3 = (*SFP3ptr++) * Y_tmp; - #endif - #if nISO>=5 - x4 = (*SFP4ptr++) * Y_tmp; - #endif - #if nISO>=6 - x5 = (*SFP5ptr++) * Y_tmp; - #endif - #if nISO>=7 - x6 = (*SFP6ptr++) * Y_tmp; - #endif - #if nISO>=8 - x7 = (*SFP7ptr++) * Y_tmp; - #endif - #if nISO>=9 - x8 = (*SFP8ptr++) * Y_tmp; - #endif - #if nISO>=10 - x9 = (*SFP9ptr++) * Y_tmp; - #endif - #if nISO>=11 - x10 = (*SFP10ptr++) * Y_tmp; - #endif - #if nISO>=12 - x11 = (*SFP11ptr++) * Y_tmp; - #endif - #if nISO>=13 - x12 = (*SFP12ptr++) * Y_tmp; - #endif - #if nISO>=14 - x13 = (*SFP13ptr++) * Y_tmp; - #endif - #if nISO>=15 - x14 = (*SFP14ptr++) * Y_tmp; - #endif - #if nISO>=16 - x15 = (*SFP15ptr++) * Y_tmp; - #endif - #if nISO>=17 - x16 = (*SFP16ptr++) * Y_tmp; - #endif - #if nISO>=18 - x17 = (*SFP17ptr++) * Y_tmp; - #endif - #if nISO>=19 - x18 = (*SFP18ptr++) * Y_tmp; - #endif - #if nISO>=20 - x19 = (*SFP19ptr++) * Y_tmp; - #endif - - while( ++Yptr != YptrEnd ) - { - Y_tmp = *Yptr; - x0 += (*SFP0ptr++) * Y_tmp; - #if nISO>=2 - x1 += (*SFP1ptr++) * Y_tmp; - #endif - #if nISO>=3 - x2 += (*SFP2ptr++) * Y_tmp; - #endif - #if nISO>=4 - x3 += (*SFP3ptr++) * Y_tmp; - #endif - #if nISO>=5 - x4 += (*SFP4ptr++) * Y_tmp; - #endif - #if nISO>=6 - x5 += (*SFP5ptr++) * Y_tmp; - #endif - #if nISO>=7 - x6 += (*SFP6ptr++) * Y_tmp; - #endif - #if nISO>=8 - x7 += (*SFP7ptr++) * Y_tmp; - #endif - #if nISO>=9 - x8 += (*SFP8ptr++) * Y_tmp; - #endif - #if nISO>=10 - x9 += (*SFP9ptr++) * Y_tmp; - #endif - #if nISO>=11 - x10 += (*SFP10ptr++) * Y_tmp; - #endif - #if nISO>=12 - x11 += (*SFP11ptr++) * Y_tmp; - #endif - #if nISO>=13 - x12 += (*SFP12ptr++) * Y_tmp; - #endif - #if nISO>=14 - x13 += (*SFP13ptr++) * Y_tmp; - #endif - #if nISO>=15 - x14 += (*SFP14ptr++) * Y_tmp; - #endif - #if nISO>=16 - x15 += (*SFP15ptr++) * Y_tmp; - #endif - #if nISO>=17 - x16 += (*SFP16ptr++) * Y_tmp; - #endif - #if nISO>=18 - x17 += (*SFP17ptr++) * Y_tmp; - #endif - #if nISO>=19 - x18 += (*SFP18ptr++) * Y_tmp; - #endif - #if nISO>=20 - x19 += (*SFP19ptr++) * Y_tmp; - #endif - } - - (*x_Ptr0++) += x0; - #if nISO>=2 - (*x_Ptr1++) += x1; - #endif - #if nISO>=3 - (*x_Ptr2++) += x2; - #endif - #if nISO>=4 - (*x_Ptr3++) += x3; - #endif - #if nISO>=5 - (*x_Ptr4++) += x4; - #endif - #if nISO>=6 - (*x_Ptr5++) += x5; - #endif - #if nISO>=7 - (*x_Ptr6++) += x6; - #endif - #if nISO>=8 - (*x_Ptr7++) += x7; - #endif - #if nISO>=9 - (*x_Ptr8++) += x8; - #endif - #if nISO>=10 - (*x_Ptr9++) += x9; - #endif - #if nISO>=11 - (*x_Ptr10++) += x10; - #endif - #if nISO>=12 - (*x_Ptr11++) += x11; - #endif - #if nISO>=13 - (*x_Ptr12++) += x12; - #endif - #if nISO>=14 - (*x_Ptr13++) += x13; - #endif - #if nISO>=15 - (*x_Ptr14++) += x14; - #endif - #if nISO>=16 - (*x_Ptr15++) += x15; - #endif - #if nISO>=17 - (*x_Ptr16++) += x16; - #endif - #if nISO>=18 - (*x_Ptr17++) += x17; - #endif - #if nISO>=19 - (*x_Ptr18++) += x18; - #endif - #if nISO>=20 - (*x_Ptr19++) += x19; - #endif - } -#endif - - pthread_exit( 0 ); -} - - -// ========================= -// Function called by CYTHON -// ========================= -void COMMIT_At( - int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs, - double *_vIN, double *_vOUT, - uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl, - uint32_t *_ECv, uint16_t *_ECo, - uint32_t *_ISOv, - float *_wmrSFP, float *_wmhSFP, float *_isoSFP, - uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT -) -{ - nF = _nF; - n = _n; - nE = _nE; - nV = _nV; - nS = _nS; - ndirs = _ndirs; - - x = _vOUT; - Y = _vIN; - - ICf = _ICf; - ICv = _ICv; - ICo = _ICo; - ICl = _ICl; - ECv = _ECv; - ECo = _ECo; - ISOv = _ISOv; - - #if nIC>=1 - wmrSFP0 = _wmrSFP; - #if nIC>=2 - wmrSFP1 = wmrSFP0 + _ndirs*_nS; - #if nIC>=3 - wmrSFP2 = wmrSFP1 + _ndirs*_nS; - #if nIC>=4 - wmrSFP3 = wmrSFP2 + _ndirs*_nS; - #if nIC>=5 - wmrSFP4 = wmrSFP3 + _ndirs*_nS; - #if nIC>=6 - wmrSFP5 = wmrSFP4 + _ndirs*_nS; - #if nIC>=7 - wmrSFP6 = wmrSFP5 + _ndirs*_nS; - #if nIC>=8 - wmrSFP7 = wmrSFP6 + _ndirs*_nS; - #if nIC>=9 - wmrSFP8 = wmrSFP7 + _ndirs*_nS; - #if nIC>=10 - wmrSFP9 = wmrSFP8 + _ndirs*_nS; - #if nIC>=11 - wmrSFP10 = wmrSFP9 + _ndirs*_nS; - #if nIC>=12 - wmrSFP11 = wmrSFP10 + _ndirs*_nS; - #if nIC>=13 - wmrSFP12 = wmrSFP11 + _ndirs*_nS; - #if nIC>=14 - wmrSFP13 = wmrSFP12 + _ndirs*_nS; - #if nIC>=15 - wmrSFP14 = wmrSFP13 + _ndirs*_nS; - #if nIC>=16 - wmrSFP15 = wmrSFP14 + _ndirs*_nS; - #if nIC>=17 - wmrSFP16 = wmrSFP15 + _ndirs*_nS; - #if nIC>=18 - wmrSFP17 = wmrSFP16 + _ndirs*_nS; - #if nIC>=19 - wmrSFP18 = wmrSFP17 + _ndirs*_nS; - #if nIC>=20 - wmrSFP19 = wmrSFP18 + _ndirs*_nS; - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #if nEC>=1 - wmhSFP0 = _wmhSFP; - #if nEC>=2 - wmhSFP1 = wmhSFP0 + _ndirs*_nS; - #if nEC>=3 - wmhSFP2 = wmhSFP1 + _ndirs*_nS; - #if nEC>=4 - wmhSFP3 = wmhSFP2 + _ndirs*_nS; - #if nEC>=5 - wmhSFP4 = wmhSFP3 + _ndirs*_nS; - #if nEC>=6 - wmhSFP5 = wmhSFP4 + _ndirs*_nS; - #if nEC>=7 - wmhSFP6 = wmhSFP5 + _ndirs*_nS; - #if nEC>=8 - wmhSFP7 = wmhSFP6 + _ndirs*_nS; - #if nEC>=9 - wmhSFP8 = wmhSFP7 + _ndirs*_nS; - #if nEC>=10 - wmhSFP9 = wmhSFP8 + _ndirs*_nS; - #if nEC>=11 - wmhSFP10 = wmhSFP9 + _ndirs*_nS; - #if nEC>=12 - wmhSFP11 = wmhSFP10 + _ndirs*_nS; - #if nEC>=13 - wmhSFP12 = wmhSFP11 + _ndirs*_nS; - #if nEC>=14 - wmhSFP13 = wmhSFP12 + _ndirs*_nS; - #if nEC>=15 - wmhSFP14 = wmhSFP13 + _ndirs*_nS; - #if nEC>=16 - wmhSFP15 = wmhSFP14 + _ndirs*_nS; - #if nEC>=17 - wmhSFP16 = wmhSFP15 + _ndirs*_nS; - #if nEC>=18 - wmhSFP17 = wmhSFP16 + _ndirs*_nS; - #if nEC>=19 - wmhSFP18 = wmhSFP17 + _ndirs*_nS; - #if nEC>=20 - wmhSFP19 = wmhSFP18 + _ndirs*_nS; - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #if nISO>=1 - isoSFP0 = _isoSFP; - #if nISO>=2 - isoSFP1 = isoSFP0 + _nS; - #if nISO>=3 - isoSFP2 = isoSFP1 + _nS; - #if nISO>=4 - isoSFP3 = isoSFP2 + _nS; - #if nISO>=5 - isoSFP4 = isoSFP3 + _nS; - #if nISO>=6 - isoSFP5 = isoSFP4 + _nS; - #if nISO>=7 - isoSFP6 = isoSFP5 + _nS; - #if nISO>=8 - isoSFP7 = isoSFP6 + _nS; - #if nISO>=9 - isoSFP8 = isoSFP7 + _nS; - #if nISO>=10 - isoSFP9 = isoSFP8 + _nS; - #if nISO>=11 - isoSFP10 = isoSFP9 + _nS; - #if nISO>=12 - isoSFP11 = isoSFP10 + _nS; - #if nISO>=13 - isoSFP12 = isoSFP11 + _nS; - #if nISO>=14 - isoSFP13 = isoSFP12 + _nS; - #if nISO>=15 - isoSFP14 = isoSFP13 + _nS; - #if nISO>=16 - isoSFP15 = isoSFP14 + _nS; - #if nISO>=17 - isoSFP16 = isoSFP15 + _nS; - #if nISO>=18 - isoSFP17 = isoSFP16 + _nS; - #if nISO>=19 - isoSFP18 = isoSFP17 + _nS; - #if nISO>=20 - isoSFP19 = isoSFP18 + _nS; - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - #endif - - ICthreadsT = _ICthreadsT; - ECthreadsT = _ECthreadsT; - ISOthreadsT = _ISOthreadsT; - - // Run SEPARATE THREADS to perform the multiplication - pthread_t threads[nTHREADS]; - int t; - for(t=0; t" % criterion ) + print( "< Stopping criterion: %s >" % criterion, flush=True ) opt_details = {} opt_details['residual'] = 0.5*res_norm**2 diff --git a/commit/trk2dictionary/trk2dictionary.pyx b/commit/trk2dictionary/trk2dictionary.pyx old mode 100755 new mode 100644 index ab7c8ee7..db35f4a0 --- a/commit/trk2dictionary/trk2dictionary.pyx +++ b/commit/trk2dictionary/trk2dictionary.pyx @@ -54,12 +54,12 @@ cpdef cat_function( infilename, outfilename ): for fname in infilename: with open( fname, 'rb' ) as inFile: shutil.copyfileobj( inFile, outfile ) - remove( fname ) + remove( fname ) cpdef compute_tdi( np.uint32_t[::1] v, np.float32_t[::1] l, int nx, int ny, int nz, int verbose): cdef np.float32_t [::1] tdi = np.zeros( nx*ny*nz, dtype=np.float32 ) cdef unsigned long long i=0 - with ProgressBar(total=v.size, disable=(verbose in [0, 1, 3]), hide_on_exit=True) as pbar: + with ProgressBar(total=v.size, disable=verbose<3, hide_on_exit=True) as pbar: for i in range(v.size): tdi[ v[i] ] += l[i] @@ -322,14 +322,15 @@ cpdef run( filename_tractogram=None, path_out=None, filename_peaks=None, filenam hdr = nibabel.streamlines.load( filename_tractogram, lazy_load=True ).header temp_idx = np.arange(int(hdr['count'])) log_list = [] - subinfo = logger.subinfo(f'Clustering with threshold = {blur_clust_thr[0]}', indent_lvl=2, indent_char='-', with_progress=verbose>2) - with ProgressBar(disable=verbose<3, hide_on_exit=True, subinfo=subinfo, log_list=log_list) as pbar: + ret_subinfo = logger.subinfo(f'Clustering with threshold = {blur_clust_thr[0]}', indent_lvl=2, indent_char='-', with_progress=verbose>2) + with ProgressBar(disable=verbose<3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): idx_centroids = run_clustering(tractogram_in=filename_tractogram, tractogram_out=filename_out, temp_folder=path_temp, atlas=blur_clust_groupby, clust_thr=blur_clust_thr[0], n_threads=n_threads, keep_temp_files=True, force=True, verbose=1, log_list=log_list) else: - logger.subinfo(f'Clustering with threshold = {blur_clust_thr[0]}', indent_lvl=2, indent_char='-', with_progress=verbose>2) - with ProgressBar(disable=verbose<3, hide_on_exit=True, subinfo=True) as pbar: + log_list = [] + ret_subinfo = logger.subinfo(f'Clustering with threshold = {blur_clust_thr[0]}', indent_lvl=2, indent_char='-', with_progress=verbose>2) + with ProgressBar(disable=verbose<3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): idx_centroids = run_clustering(tractogram_in=filename_tractogram, tractogram_out=filename_out, temp_folder=path_temp, clust_thr=blur_clust_thr[0], keep_temp_files=True, force=True, verbose=1) @@ -543,14 +544,22 @@ cpdef run( filename_tractogram=None, path_out=None, filename_peaks=None, filenam logger.warning( 'DICTIONARY not generated' ) return None + # free memory + free(ptrTDI) + + # NOTE: this is to ensure flushing all the output from the cpp code + print(end='', flush=True) + time.sleep(0.5) + # Concatenate files together - logger.subinfo( 'Saving data structure', indent_char='*', indent_lvl=1, with_progress=True ) + log_list = [] + ret_subinfo = logger.subinfo( 'Saving data structure', indent_char='*', indent_lvl=1, with_progress=verbose>2 ) cdef int discarded = 0 - with ProgressBar(disable=verbose<3, hide_on_exit=True, subinfo=True) as pbar: + with ProgressBar(disable=verbose<3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): for j in range(n_threads-1): - path_IC_f = path_temp + f'/dictionary_IC_f_{j+1}.dict' - kept = np.fromfile( path_temp + f'/dictionary_TRK_kept_{j}.dict', dtype=np.uint8 ) - IC_f = np.fromfile( path_temp + f'/dictionary_IC_f_{j+1}.dict', dtype=np.uint32 ) + path_IC_f = join(path_temp, f'dictionary_IC_f_{j+1}.dict') + kept = np.fromfile( join(path_temp, f'dictionary_TRK_kept_{j}.dict'), dtype=np.uint8 ) + IC_f = np.fromfile( join(path_temp, f'dictionary_IC_f_{j+1}.dict'), dtype=np.uint32 ) discarded += np.count_nonzero(kept==0) IC_f -= discarded IC_f_save = np.memmap( path_IC_f, dtype="uint32", mode='w+', shape=IC_f.shape ) @@ -559,52 +568,52 @@ cpdef run( filename_tractogram=None, path_out=None, filename_peaks=None, filenam del IC_f_save # np.save( path_out + f'/dictionary_IC_f_{j+1}.dict', IC_f, allow_pickle=True) - fileout = path_out + '/dictionary_TRK_kept.dict' + fileout = join(path_out, 'dictionary_TRK_kept.dict') dict_list = [] for j in range(n_threads): - dict_list += [ path_temp + f'/dictionary_TRK_kept_{j}.dict' ] + dict_list += [ join(path_temp, f'dictionary_TRK_kept_{j}.dict') ] cat_function( dict_list, fileout ) - fileout = path_out + '/dictionary_TRK_norm.dict' + fileout = join(path_out, 'dictionary_TRK_norm.dict') dict_list = [] for j in range(n_threads): - dict_list += [ path_temp + f'/dictionary_TRK_norm_{j}.dict' ] + dict_list += [ join(path_temp, f'dictionary_TRK_norm_{j}.dict') ] cat_function( dict_list, fileout ) - fileout = path_out + '/dictionary_TRK_len.dict' + fileout = join(path_out, 'dictionary_TRK_len.dict') dict_list = [] for j in range(n_threads): - dict_list += [ path_temp + f'/dictionary_TRK_len_{j}.dict' ] + dict_list += [ join(path_temp, f'dictionary_TRK_len_{j}.dict') ] cat_function( dict_list, fileout ) - fileout = path_out + '/dictionary_TRK_lenTot.dict' + fileout = join(path_out, 'dictionary_TRK_lenTot.dict') dict_list = [] for j in range(n_threads): - dict_list += [ path_temp + f'/dictionary_TRK_lenTot_{j}.dict' ] + dict_list += [ join(path_temp, f'dictionary_TRK_lenTot_{j}.dict') ] cat_function( dict_list, fileout ) - fileout = path_out + '/dictionary_IC_f.dict' + fileout = join(path_out, 'dictionary_IC_f.dict') dict_list = [] for j in range(n_threads): - dict_list += [ path_temp + f'/dictionary_IC_f_{j}.dict' ] + dict_list += [ join(path_temp, f'dictionary_IC_f_{j}.dict') ] cat_function( dict_list, fileout ) - fileout = path_out + '/dictionary_IC_v.dict' + fileout = join(path_out, 'dictionary_IC_v.dict') dict_list = [] for j in range(n_threads): - dict_list += [ path_temp + f'/dictionary_IC_v_{j}.dict' ] + dict_list += [ join(path_temp, f'dictionary_IC_v_{j}.dict') ] cat_function( dict_list, fileout ) - fileout = path_out + '/dictionary_IC_o.dict' + fileout = join(path_out, 'dictionary_IC_o.dict') dict_list = [] for j in range(n_threads): - dict_list += [ path_temp + f'/dictionary_IC_o_{j}.dict' ] + dict_list += [ join(path_temp, f'dictionary_IC_o_{j}.dict') ] cat_function( dict_list, fileout ) - fileout = path_out + '/dictionary_IC_len.dict' + fileout = join(path_out, 'dictionary_IC_len.dict') dict_list = [] for j in range(n_threads): - dict_list += [ path_temp + f'/dictionary_IC_len_{j}.dict' ] + dict_list += [ join(path_temp, f'dictionary_IC_len_{j}.dict') ] cat_function( dict_list, fileout ) @@ -619,29 +628,31 @@ cpdef run( filename_tractogram=None, path_out=None, filename_peaks=None, filenam TDI_affine = np.diag( [Px, Py, Pz, 1] ) # save TDI and MASK maps - logger.subinfo( 'Saving TDI and MASK maps', indent_char='*', indent_lvl=1 ) - v = np.fromfile( join(path_out, 'dictionary_IC_v.dict'), dtype=np.uint32 ) - l = np.fromfile( join(path_out, 'dictionary_IC_len.dict'), dtype=np.float32 ) - cdef np.float32_t [::1] niiTDI_mem = np.zeros( Nx*Ny*Nz, dtype=np.float32 ) - niiTDI_mem = compute_tdi( v, l, Nx, Ny, Nz, verbose ) - niiTDI_img_save = np.reshape( niiTDI_mem, (Nx,Ny,Nz), order='F' ) + log_list = [] + ret_subinfo = logger.subinfo( 'Saving TDI and MASK maps', indent_char='*', indent_lvl=1, with_progress=verbose>2) + with ProgressBar(disable=verbose<3, hide_on_exit=True, subinfo=ret_subinfo, log_list=log_list): + v = np.fromfile( join(path_out, 'dictionary_IC_v.dict'), dtype=np.uint32 ) + l = np.fromfile( join(path_out, 'dictionary_IC_len.dict'), dtype=np.float32 ) + + niiTDI_mem = compute_tdi( v, l, Nx, Ny, Nz, verbose=0 ) + niiTDI_img_save = np.reshape( niiTDI_mem, (Nx,Ny,Nz), order='F' ) - niiTDI = nibabel.Nifti1Image( niiTDI_img_save, TDI_affine ) - niiTDI_hdr = _get_header( niiTDI ) - niiTDI_hdr['descrip'] = f'Created with COMMIT {get_distribution("dmri-commit").version}' - nibabel.save( niiTDI, join(path_out,'dictionary_tdi.nii.gz') ) + niiTDI = nibabel.Nifti1Image( niiTDI_img_save, TDI_affine ) + niiTDI_hdr = _get_header( niiTDI ) + niiTDI_hdr['descrip'] = f'Created with COMMIT {get_distribution("dmri-commit").version}' + nibabel.save( niiTDI, join(path_out,'dictionary_tdi.nii.gz') ) - if filename_mask is not None : - niiMASK = nibabel.Nifti1Image( niiMASK_img, TDI_affine ) - else : - niiMASK = nibabel.Nifti1Image( (np.asarray(niiTDI_img_save)>0).astype(np.float32), TDI_affine ) + if filename_mask is not None : + niiMASK = nibabel.Nifti1Image( niiMASK_img, TDI_affine ) + else : + niiMASK = nibabel.Nifti1Image( (np.asarray(niiTDI_img_save)>0).astype(np.float32), TDI_affine ) - niiMASK_hdr = _get_header( niiMASK ) - niiMASK_hdr['descrip'] = niiTDI_hdr['descrip'] - nibabel.save( niiMASK, join(path_out,'dictionary_mask.nii.gz') ) + niiMASK_hdr = _get_header( niiMASK ) + niiMASK_hdr['descrip'] = niiTDI_hdr['descrip'] + nibabel.save( niiMASK, join(path_out,'dictionary_mask.nii.gz') ) - if not keep_temp: - shutil.rmtree(path_temp) + if not keep_temp: + shutil.rmtree(path_temp) logger.info( f'[ {format_time(time.time() - tic)} ]' ) \ No newline at end of file diff --git a/commit/trk2dictionary/trk2dictionary_c.cpp b/commit/trk2dictionary/trk2dictionary_c.cpp index 7b259379..be8e24ee 100644 --- a/commit/trk2dictionary/trk2dictionary_c.cpp +++ b/commit/trk2dictionary/trk2dictionary_c.cpp @@ -178,9 +178,9 @@ int trk2dictionary( // Open tractogram file and compute the offset for each thread // ----------------------------------------------------------------- unsigned long long int current; - unsigned long long int OffsetArr[threads_count]; + unsigned long long int *OffsetArr = new unsigned long long int[threads_count](); int f = 0; - float Buff[3]; + float *Buff = new float[3](); int N; FILE* fpTractogram = fopen(str_filename,"rb"); @@ -232,8 +232,8 @@ int trk2dictionary( // ========================================== // Parallel IC compartments // ========================================== - - printf( " * Exporting IC compartments:\033[0m\n" ); + + std::cout << " * Exporting IC compartments:" << std::endl; // unsigned int width = 25; // PROGRESS = new ProgressBar( (unsigned int) n_count, (unsigned int) width); if (verbosity > 2) @@ -256,7 +256,7 @@ int trk2dictionary( if (verbosity > 2) PROGRESS->close(); - printf( " [ %d streamlines kept, %ld segments in total ]\n", std::accumulate(totFibers.begin(), totFibers.end(), 0), std::accumulate( totICSegments.begin(), totICSegments.end(), 0) ); + std::cout << " [ " << std::accumulate(totFibers.begin(), totFibers.end(), 0) << " streamlines kept, " << std::accumulate( totICSegments.begin(), totICSegments.end(), 0) << " segments in total ]" << std::endl; totFibers.clear(); threads.clear(); @@ -264,20 +264,23 @@ int trk2dictionary( // Parallel EC compartments // ========================================== - printf( " * Exporting EC compartments:\033[0m\n" ); - + std::cout << " * Exporting EC compartments:" << std::endl; int EC = ECSegments( ptrPEAKS, Np, vf_THR, ECix, ECiy, ECiz, ptrTDI, ptrHashTable, path_out, ptrPeaksAffine, threads_count ); - printf(" [ %d voxels, %d segments ]\n", totECVoxels, totECSegments ); + std::cout << " [ " << totECVoxels << " voxels, " << totECSegments << " segments ]" << std::endl; /*=========================*/ /* Restricted ISO compartments */ /*=========================*/ - printf( " * Exporting ISO compartments:\033[0m\n" ); - + std::cout << " * Exporting ISO compartments:" << std::endl; int totISO = ISOcompartments(ptrTDI, path_out, threads_count); - printf(" [ %d voxels ]\n", totISO ); + std::cout << " [ " << totISO << " voxels ]" << std::endl; + + delete[] batch_size; + delete[] Pos; + delete[] OffsetArr; + delete[] Buff; return 1; @@ -290,8 +293,7 @@ int ECSegments(float* ptrPEAKS, int Np, float vf_THR, int ECix, int ECiy, int EC // Variables definition string filename; string OUTPUT_path(path_out); - std::size_t pos = OUTPUT_path.find("/temp"); - OUTPUT_path = OUTPUT_path.substr (0,pos); + OUTPUT_path = OUTPUT_path.substr (0,OUTPUT_path.size()-5); unsigned short o; unsigned int v; @@ -308,7 +310,7 @@ int ECSegments(float* ptrPEAKS, int Np, float vf_THR, int ECix, int ECiy, int EC segKey ec_seg; int ix, iy, iz, id, atLeastOne; float peakMax; - float norms[ Np ]; + float *norms = new float[Np](); float *ptr; int ox, oy; int skip = 0; @@ -387,6 +389,7 @@ int ECSegments(float* ptrPEAKS, int Np, float vf_THR, int ECix, int ECiy, int EC } } } + delete[] norms; } totECSegments = temp_totECSegments; totECVoxels = temp_totECVoxels; @@ -404,8 +407,7 @@ int ISOcompartments(double** ptrTDI, char* path_out, int threads){ // Variables definition string filename; string OUTPUT_path(path_out); - std::size_t pos = OUTPUT_path.find("/temp"); - OUTPUT_path = OUTPUT_path.substr (0,pos); + OUTPUT_path = OUTPUT_path.substr (0,OUTPUT_path.size()-5); unsigned int totISOVoxels = 0, v=0; filename = OUTPUT_path+"/dictionary_ISO_v.dict"; FILE* pDict_ISO_v = fopen( filename.c_str(), "wb" ); @@ -481,7 +483,7 @@ unsigned long long int offset, int idx, unsigned int startpos, unsigned int endp filename = OUTPUT_path+"/dictionary_TRK_norm_" + std::to_string(idx) + ".dict"; FILE* pDict_TRK_norm = fopen(filename.c_str(),"wb"); if ( !pDict_TRK_norm ) { - printf( "\n[trk2dictionary] Unable to create output files" ); + std::cout << "\n[trk2dictionary] Unable to create output files"; } filename = OUTPUT_path+"/dictionary_IC_f_" + std::to_string(idx) + ".dict"; FILE* pDict_IC_f = fopen(filename.c_str(),"wb"); filename = OUTPUT_path+"/dictionary_IC_v_" + std::to_string(idx) + ".dict"; FILE* pDict_IC_v = fopen(filename.c_str(),"wb"); diff --git a/pyproject.toml b/pyproject.toml index 37d31d98..46c095b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,18 +1,18 @@ [build-system] requires = [ "cython>=3.0.10", - "numpy>=1.24.4", + "numpy>=1.24.4,<2.0.0", "setuptools>=69.2.0" ] build-backend = "setuptools.build_meta" [project] name = "dmri-commit" -version = "2.2.0" +version = "2.3.0" dependencies = [ "dmri-amico>=2.0.1", "dmri-dicelib>=1.1.0", - "numpy>=1.24.4", + "numpy>=1.24.4,<2.0.0", "scipy>=1.10.1" ] requires-python = ">=3.8" @@ -62,3 +62,6 @@ include-package-data = false "*.pyx", "*.pyxbld" ] + +[tool.cibuildwheel] +build-verbosity = 3 diff --git a/setup.py b/setup.py index aef9e8a6..1079cef4 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ import os +import sys from setuptools import Extension, setup from setuptools.command.build_ext import build_ext @@ -11,19 +12,35 @@ def get_extensions(): # for the computation of matrix-vector multiplications trk2dictionary = Extension(name=f'{package_name}.trk2dictionary', sources=[f'{package_name}/trk2dictionary/trk2dictionary.pyx'], - libraries=['stdc++'], - extra_compile_args=['-w', '-std=c++11'], + libraries=[] if sys.platform == 'win32' else ['stdc++'], + extra_compile_args=[] if sys.platform == 'win32' else ['-w', '-std=c++11'], language='c++') core = Extension(name=f'{package_name}.core', sources=[f'{package_name}/core.pyx'], - extra_compile_args=['-w'], + extra_compile_args=[] if sys.platform == 'win32' else ['-w', '-std=c++11'], language='c++') proximals = Extension(name=f'{package_name}.proximals', sources=[f'{package_name}/proximals.pyx'], - extra_compile_args=['-w'], + extra_compile_args=[] if sys.platform == 'win32' else ['-w', '-std=c++11'], language='c++') + # NOTE: Windows requires the pthread-win32 static library to compile the operator extension + # The library can be downloaded from https://github.com/GerHobbelt/pthread-win32 + # The PTHREAD_WIN_INCLUDE and PTHREAD_WIN_LIB environment variables must be set to the include and lib directories + if sys.platform == 'win32': + try: + pthread_win_include = os.environ['PTHREAD_WIN_INCLUDE'] + pthread_win_lib = os.environ['PTHREAD_WIN_LIB'] + except KeyError: + raise RuntimeError('PTHREAD_WIN_INCLUDE and PTHREAD_WIN_LIB must be set') + operator = Extension(name=f'{package_name}.operator.operator', + sources=[f'{package_name}/operator/operator.pyx', f'{package_name}/operator/operator_c.c'], + include_dirs=[pthread_win_include] if sys.platform == 'win32' else [], + libraries=['pthread'] if sys.platform == 'win32' else [], + library_dirs=[pthread_win_lib] if sys.platform == 'win32' else [], + extra_compile_args=['/fp:fast', '/DHAVE_STRUCT_TIMESPEC'] if sys.platform == 'win32' else ['-w', '-O3', '-Ofast'], + language='c') - return [trk2dictionary, core, proximals] + return [trk2dictionary, core, proximals, operator] class CustomBuildExtCommand(build_ext): """ build_ext command to use when numpy headers are needed. """ @@ -36,7 +53,7 @@ def run(self): # Add everything requires for build self.swig_opts = None - self.include_dirs = [get_include()] + self.include_dirs.extend([get_include()]) self.distribution.ext_modules[:] = cythonize( self.distribution.ext_modules, build_dir='build' ) # if not specified via '-j N' option, set compilation using max number of cores @@ -48,6 +65,11 @@ def run(self): build_ext.finalize_options(self) build_ext.run(self) +# generate the operator_c.c file +sys.path.insert(0, os.path.dirname(__file__)) +from setup_operator import write_operator_c_file +write_operator_c_file() + # create the 'build' directory if not os.path.exists('build'): os.makedirs('build') diff --git a/setup_operator.py b/setup_operator.py new file mode 100644 index 00000000..3fa86743 --- /dev/null +++ b/setup_operator.py @@ -0,0 +1,932 @@ +''' +This script writes the operator_c.c file. +The main functions are: + COMMIT_A__block + COMMIT_A + COMMIT_At__block + COMMIT_At + COMMIT_A__block_nolut + COMMIT_A_nolut + COMMIT_At__block_nolut + COMMIT_At_nolut +''' + +from typing import NoReturn +from os.path import join as path_join + + +def add_imports() -> str: + '''Adds the imports to the operator_c.c file.''' + s: str = '''\ +#include +#include + +// max number of threads +#define MAX_THREADS 255\n\n''' + return s + + +def add_global_variables() -> str: + '''Adds the global variables to the operator_c.c file.''' + s: str = '''\ +// global variables +int nF, n, nE, nV, nS, ndirs; +double *x, *Y; +uint32_t *ICthreads, *ECthreads, *ISOthreads; +uint8_t *ICthreadsT; +uint32_t *ECthreadsT, *ISOthreadsT; +uint32_t *ICf, *ICv, *ECv, *ISOv; +uint16_t *ICo, *ECo; +float *ICl; +float *wmrSFP0, *wmrSFP1, *wmrSFP2, *wmrSFP3, *wmrSFP4, *wmrSFP5, *wmrSFP6, *wmrSFP7, *wmrSFP8, *wmrSFP9, *wmrSFP10, *wmrSFP11, *wmrSFP12, *wmrSFP13, *wmrSFP14, *wmrSFP15, *wmrSFP16, *wmrSFP17, *wmrSFP18, *wmrSFP19; +float *wmhSFP0, *wmhSFP1, *wmhSFP2, *wmhSFP3, *wmhSFP4, *wmhSFP5, *wmhSFP6, *wmhSFP7, *wmhSFP8, *wmhSFP9, *wmhSFP10, *wmhSFP11, *wmhSFP12, *wmhSFP13, *wmhSFP14, *wmhSFP15, *wmhSFP16, *wmhSFP17, *wmhSFP18, *wmhSFP19; +float *isoSFP0, *isoSFP1, *isoSFP2, *isoSFP3, *isoSFP4, *isoSFP5, *isoSFP6, *isoSFP7, *isoSFP8, *isoSFP9, *isoSFP10, *isoSFP11, *isoSFP12, *isoSFP13, *isoSFP14, *isoSFP15, *isoSFP16, *isoSFP17, *isoSFP18, *isoSFP19; +uint32_t nIC, nEC, nISO;\n\n\n''' + return s + + +def add_commit_a_block() -> str: + '''Adds the COMMIT_A__block function to the operator_c.c file.''' + def add_intracellular_compartments() -> str: + s: str = '''\ + // intra-cellular compartments + if (nIC > 0) + { + t_v = ICv + ICthreads[id]; + t_vEnd = ICv + ICthreads[id+1]; + t_o = ICo + ICthreads[id]; + t_l = ICl + ICthreads[id]; + t_f = ICf + ICthreads[id]; + switch (nIC) + {''' + s1: str = '''\ +xPtr0 = x + (*t_f); + x0 = *xPtr0;''' + s2: str = 'if (x0 != 0' + s3: str = 'SFP0ptr = wmrSFP0 + offset;' + s4: str = 'x0 * (*SFP0ptr++)' + + for i in range(0, 20): + if i > 0: + s1 += f'''\ + + xPtr{i} = xPtr{i - 1} + nF; + x{i} = *xPtr{i};''' + s2 += f' || x{i} != 0' + s3 += f'''\ + + SFP{i}ptr = wmrSFP{i} + offset;''' + s4 += f' + x{i} * (*SFP{i}ptr++)' + s += f'''\ + + case {i + 1}: + while (t_v != t_vEnd) + {{ + {s1} + {s2}) + {{ + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + {s3} + while (YPtr != YPtrEnd) + (*YPtr++) += w * ({s4}); + }} + t_f++; + t_v++; + t_o++; + t_l++; + }} + break;''' + + s += '''\ + + } + }\n\n''' + return s + + def add_extracellular_compartments() -> str: + s: str = '''\ + // extra-cellular compartments + if (nEC > 0) + { + t_v = ECv + ECthreads[id]; + t_vEnd = ECv + ECthreads[id+1]; + t_o = ECo + ECthreads[id]; + xPtr0 = x + nIC*nF + ECthreads[id]; + switch (nEC) + {''' + s1: str = '' + s2: str = 'x0 = *xPtr0++;' + s3: str = 'if (x0 != 0' + s4: str = 'SFP0ptr = wmhSFP0 + offset;' + s5: str = 'x0 * (*SFP0ptr++)' + + for i in range(0, 20): + if i > 0: + s1 += f'''\ + + xPtr{i} = xPtr{i - 1} + nE;''' + s2 += f'''\ + + x{i} = *xPtr{i}++;''' + s3 += f' || x{i} != 0' + s4 += f'''\ + + SFP{i}ptr = wmhSFP{i} + offset;''' + s5 += f' + x{i} * (*SFP{i}ptr++)' + s += f'''\ + + case {i + 1}:{s1} + while (t_v != t_vEnd) + {{ + {s2} + {s3}) + {{ + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + {s4} + while (YPtr != YPtrEnd) + (*YPtr++) += ({s5}); + }} + t_v++; + t_o++; + }} + break;''' + + s += '''\ + + } + }\n\n''' + return s + + def add_isotropic_compartments() -> str: + s: str = '''\ + // isotropic compartments + if (nISO > 0) + { + t_v = ISOv + ISOthreads[id]; + t_vEnd = ISOv + ISOthreads[id+1]; + xPtr0 = x + nIC*nF + nEC*nE + ISOthreads[id]; + switch (nISO) + {''' + s1: str = '' + s2: str = 'x0 = *xPtr0++;' + s3: str = 'if (x0 != 0' + s4: str = 'SFP0ptr = isoSFP0;' + s5: str = 'x0 * (*SFP0ptr++)' + + for i in range(0, 20): + if i > 0: + s1 += f'''\ + + xPtr{i} = xPtr{i - 1} + nV;''' + s2 += f'''\ + + x{i} = *xPtr{i}++;''' + s3 += f' || x{i} != 0' + s4 += f'''\ + + SFP{i}ptr = isoSFP{i};''' + s5 += f' + x{i} * (*SFP{i}ptr++)' + s += f'''\ + + case {i + 1}:{s1} + while (t_v != t_vEnd) + {{ + {s2} + {s3}) + {{ + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + {s4} + while (YPtr != YPtrEnd) + (*YPtr++) += ({s5}); + }} + t_v++; + }} + break;''' + + s += '''\ + + } + }\n\n''' + return s + + s: str = '''\ +// +// Compute a sub-block of the A*x MATRIX-VECTOR product +// +void* COMMIT_A__block( void *ptr ) +{ + int id = (long)ptr; + int offset; + double x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w; + double *xPtr0, *xPtr1, *xPtr2, *xPtr3, *xPtr4, *xPtr5, *xPtr6, *xPtr7, *xPtr8, *xPtr9, *xPtr10, *xPtr11, *xPtr12, *xPtr13, *xPtr14, *xPtr15, *xPtr16, *xPtr17, *xPtr18, *xPtr19; + double *YPtr, *YPtrEnd; + float *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr; + uint32_t *t_v, *t_vEnd, *t_f; + uint16_t *t_o; + float *t_l;\n\n''' + s += add_intracellular_compartments() + s += add_extracellular_compartments() + s += add_isotropic_compartments() + s += '''\ + pthread_exit( 0 ); +}\n\n''' + return s + + +def add_commit_a() -> str: + '''Adds the COMMIT_A function to the operator_c.c file.''' + def add_intracellular_compartments() -> str: + s: str = '''\ + switch (nIC) + {''' + s1: str = 'wmrSFP0 = _wmrSFP;' + + for i in range(0, 20): + if i > 0: + s1 += f'''\ + + wmrSFP{i} = wmrSFP{i - 1} + _ndirs*_nS;''' + s += f'''\ + + case {i + 1}: + {s1} + break;''' + s += '''\ + + }\n\n''' + return s + + def add_extracellular_compartments() -> str: + s: str = '''\ + switch (nEC) + {''' + s1: str = 'wmhSFP0 = _wmhSFP;' + + for i in range(0, 20): + if i > 0: + s1 += f'''\ + + wmhSFP{i} = wmhSFP{i - 1} + _ndirs*_nS;''' + s += f'''\ + + case {i + 1}: + {s1} + break;''' + s += '''\ + + }\n\n''' + return s + + def add_isotropic_compartments() -> str: + s: str = '''\ + switch (nISO) + {''' + s1: str = 'isoSFP0 = _isoSFP;' + + for i in range(0, 20): + if i > 0: + s1 += f'''\ + + isoSFP{i} = isoSFP{i - 1} + _nS;''' + s += f'''\ + + case {i + 1}: + {s1} + break;''' + s += '''\ + + }\n\n''' + return s + + s: str = '''\ +// +// Function called by Cython +// +void COMMIT_A( + int _nF, int _nE, int _nV, int _nS, int _ndirs, + double *_vIN, double *_vOUT, + uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl, + uint32_t *_ECv, uint16_t *_ECo, + uint32_t *_ISOv, + float *_wmrSFP, float *_wmhSFP, float *_isoSFP, + uint32_t* _ICthreads, uint32_t* _ECthreads, uint32_t* _ISOthreads, + uint32_t _nIC, uint32_t _nEC, uint32_t _nISO, uint32_t _nThreads +) +{ + nF = _nF; + nE = _nE; + nV = _nV; + nS = _nS; + ndirs = _ndirs; + + x = _vIN; + Y = _vOUT; + + ICf = _ICf; + ICv = _ICv; + ICo = _ICo; + ICl = _ICl; + ECv = _ECv; + ECo = _ECo; + ISOv = _ISOv; + + nIC = _nIC; + nEC = _nEC; + nISO = _nISO;\n\n''' + s += add_intracellular_compartments() + s += add_extracellular_compartments() + s += add_isotropic_compartments() + s += '''\ + + ICthreads = _ICthreads; + ECthreads = _ECthreads; + ISOthreads = _ISOthreads; + + // Run SEPARATE THREADS to perform the multiplication + pthread_t threads[MAX_THREADS]; + int t; + for(t=0; t<_nThreads ; t++) + pthread_create( &threads[t], NULL, COMMIT_A__block, (void *) (long int)t ); + for(t=0; t<_nThreads ; t++) + pthread_join( threads[t], NULL ); + return; +}\n\n''' + return s + + +def add_commit_at_block() -> str: + '''Adds the COMMIT_At__block function to the operator_c.c file.''' + def add_intracellular_compartments() -> str: + s: str = '''\ + // intra-cellular compartments + if (nIC > 0) + { + t_v = ICv; + t_vEnd = ICv + n; + t_o = ICo; + t_l = ICl; + t_f = ICf; + t_t = ICthreadsT; + switch (nIC) + {''' + s1: str = 'SFP0ptr = wmrSFP0 + offset;' + s2: str = 'x0 = (*SFP0ptr++) * YTmp;' + s3: str = 'x0 += (*SFP0ptr++) * YTmp;' + s4: str = 'x[*t_f] += w * x0;' + s5: str = '' + + for i in range(0, 20): + if i > 0: + if i > 1: + s5 = f'{i}*' + s1 += f'''\ + + SFP{i}ptr = wmrSFP{i} + offset;''' + s2 += f'''\ + + x{i} = (*SFP{i}ptr++) * YTmp;''' + s3 += f'''\ + + x{i} += (*SFP{i}ptr++) * YTmp;''' + s4 += f'''\ + + x[*t_f+{s5}nF] += w * x{i};''' + s += f'''\ + + case {i + 1}: + while (t_v != t_vEnd) + {{ + if (*t_t == id) + {{ + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + w = (double)(*t_l); + offset = nS * (*t_o); + YTmp = *YPtr; + {s1} + {s2} + while (++YPtr != YPtrEnd) + {{ + YTmp = *YPtr; + {s3} + }} + {s4} + }} + t_f++; + t_v++; + t_o++; + t_l++; + t_t++; + }} + break;''' + + s += '''\ + + } + }\n\n''' + return s + + def add_extracellular_compartments() -> str: + s: str = '''\ + // extra-cellular compartments + if (nEC > 0) + { + t_v = ECv + ECthreadsT[id]; + t_vEnd = ECv + ECthreadsT[id+1]; + t_o = ECo + ECthreadsT[id]; + xPtr0 = x + nIC*nF + ECthreadsT[id]; + switch (nEC) + {''' + s1: str = '' + s2: str = 'SFP0ptr = wmhSFP0 + offset;' + s3: str = 'x0 = (*SFP0ptr++) * YTmp;' + s4: str = 'x0 += (*SFP0ptr++) * YTmp;' + s5: str = '(*xPtr0++) += x0;' + + for i in range(0, 20): + if i > 0: + s1 += f'''\ + + xPtr{i} = xPtr{i - 1} + nE;''' + s2 += f'''\ + + SFP{i}ptr = wmhSFP{i} + offset;''' + s3 += f'''\ + + x{i} = (*SFP{i}ptr++) * YTmp;''' + s4 += f'''\ + + x{i} += (*SFP{i}ptr++) * YTmp;''' + s5 += f'''\ + + (*xPtr{i}++) += x{i};''' + s += f'''\ + + case {i + 1}:{s1} + while (t_v != t_vEnd) + {{ + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + offset = nS * (*t_o); + YTmp = *YPtr; + {s2} + {s3} + while (++YPtr != YPtrEnd) + {{ + YTmp = *YPtr; + {s4} + }} + {s5} + t_v++; + t_o++; + }} + break;''' + + s += '''\ + + } + }\n\n''' + return s + + def add_isotropic_compartments() -> str: + s: str = '''\ + // isotropic compartments + if (nISO > 0) + { + t_v = ISOv + ISOthreadsT[id]; + t_vEnd = ISOv + ISOthreadsT[id+1]; + xPtr0 = x + nIC*nF + nEC*nE + ISOthreadsT[id]; + switch (nISO) + {''' + s1: str = '' + s2: str = 'SFP0ptr = isoSFP0;' + s3: str = 'x0 = (*SFP0ptr++) * YTmp;' + s4: str = 'x0 += (*SFP0ptr++) * YTmp;' + s5: str = '(*xPtr0++) += x0;' + + for i in range(0, 20): + if i > 0: + s1 += f'''\ + + xPtr{i} = xPtr{i - 1} + nV;''' + s2 += f'''\ + + SFP{i}ptr = isoSFP{i};''' + s3 += f'''\ + + x{i} = (*SFP{i}ptr++) * YTmp;''' + s4 += f'''\ + + x{i} += (*SFP{i}ptr++) * YTmp;''' + s5 += f'''\ + + (*xPtr{i}++) += x{i};''' + s += f'''\ + + case {i + 1}:{s1} + while (t_v != t_vEnd) + {{ + YPtr = Y + nS * (*t_v); + YPtrEnd = YPtr + nS; + YTmp = *YPtr; + {s2} + {s3} + while (++YPtr != YPtrEnd) + {{ + YTmp = *YPtr; + {s4} + }} + {s5} + t_v++; + }} + break;''' + + s += '''\ + + } + }\n\n''' + return s + + s: str = '''\ +// +// Compute a sub-block of the At*y MATRIX-VECTOR product +// +void* COMMIT_At__block( void *ptr ) +{ + int id = (long)ptr; + int offset; + double x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, w, YTmp; + double *xPtr0, *xPtr1, *xPtr2, *xPtr3, *xPtr4, *xPtr5, *xPtr6, *xPtr7, *xPtr8, *xPtr9, *xPtr10, *xPtr11, *xPtr12, *xPtr13, *xPtr14, *xPtr15, *xPtr16, *xPtr17, *xPtr18, *xPtr19; + double *YPtr, *YPtrEnd; + float *SFP0ptr, *SFP1ptr, *SFP2ptr, *SFP3ptr, *SFP4ptr, *SFP5ptr, *SFP6ptr, *SFP7ptr, *SFP8ptr, *SFP9ptr, *SFP10ptr, *SFP11ptr, *SFP12ptr, *SFP13ptr, *SFP14ptr, *SFP15ptr, *SFP16ptr, *SFP17ptr, *SFP18ptr, *SFP19ptr; + uint32_t *t_v, *t_vEnd, *t_f; + uint16_t *t_o; + float *t_l; + uint8_t *t_t;\n\n''' + s += add_intracellular_compartments() + s += add_extracellular_compartments() + s += add_isotropic_compartments() + s += '''\ + pthread_exit( 0 ); +}\n\n''' + return s + + +def add_commit_at() -> str: + '''Adds the COMMIT_At function to the operator_c.c file.''' + def add_intracellular_compartments() -> str: + s: str = '''\ + switch (nIC) + {''' + s1: str = 'wmrSFP0 = _wmrSFP;' + + for i in range(0, 20): + if i > 0: + s1 += f'''\ + + wmrSFP{i} = wmrSFP{i - 1} + _ndirs*_nS;''' + s += f'''\ + + case {i + 1}: + {s1} + break;''' + s += '''\ + + }\n\n''' + return s + + def add_extracellular_compartments() -> str: + s: str = '''\ + switch (nEC) + {''' + s1: str = 'wmhSFP0 = _wmhSFP;' + + for i in range(0, 20): + if i > 0: + s1 += f'''\ + + wmhSFP{i} = wmhSFP{i - 1} + _ndirs*_nS;''' + s += f'''\ + + case {i + 1}: + {s1} + break;''' + s += '''\ + + }\n\n''' + return s + + def add_isotropic_compartments() -> str: + s: str = '''\ + switch (nISO) + {''' + s1: str = 'isoSFP0 = _isoSFP;' + + for i in range(0, 20): + if i > 0: + s1 += f'''\ + + isoSFP{i} = isoSFP{i - 1} + _nS;''' + s += f'''\ + + case {i + 1}: + {s1} + break;''' + s += '''\ + + }\n\n''' + return s + + s: str = '''\ +// +// Function called by Cython +// +void COMMIT_At( + int _nF, int _n, int _nE, int _nV, int _nS, int _ndirs, + double *_vIN, double *_vOUT, + uint32_t *_ICf, uint32_t *_ICv, uint16_t *_ICo, float *_ICl, + uint32_t *_ECv, uint16_t *_ECo, + uint32_t *_ISOv, + float *_wmrSFP, float *_wmhSFP, float *_isoSFP, + uint8_t* _ICthreadsT, uint32_t* _ECthreadsT, uint32_t* _ISOthreadsT, + uint32_t _nIC, uint32_t _nEC, uint32_t _nISO, uint32_t _nThreads +) +{ + nF = _nF; + n = _n; + nE = _nE; + nV = _nV; + nS = _nS; + ndirs = _ndirs; + + x = _vOUT; + Y = _vIN; + + ICf = _ICf; + ICv = _ICv; + ICo = _ICo; + ICl = _ICl; + ECv = _ECv; + ECo = _ECo; + ISOv = _ISOv; + + nIC = _nIC; + nEC = _nEC; + nISO = _nISO;\n\n''' + s += add_intracellular_compartments() + s += add_extracellular_compartments() + s += add_isotropic_compartments() + s += '''\ + ICthreadsT = _ICthreadsT; + ECthreadsT = _ECthreadsT; + ISOthreadsT = _ISOthreadsT; + + // Run SEPARATE THREADS to perform the multiplication + pthread_t threads[MAX_THREADS]; + int t; + for(t=0; t<_nThreads ; t++) + pthread_create( &threads[t], NULL, COMMIT_At__block, (void *) (long int)t ); + for(t=0; t<_nThreads ; t++) + pthread_join( threads[t], NULL ); + return; +}\n\n''' + return s + + +def add_commit_a_block_nolut() -> str: + '''Adds the COMMIT_A__block_nolut function to the operator_c.c file.''' + def add_intracellular_compartments() -> str: + s: str = '''\ + // intra-cellular compartments + t_v = ICv + ICthreads[id]; + t_vEnd = ICv + ICthreads[id+1]; + t_l = ICl + ICthreads[id]; + t_f = ICf + ICthreads[id]; + + while( t_v != t_vEnd ) + { + x0 = x[*t_f]; + if ( x0 != 0 ) + Y[*t_v] += (double)(*t_l) * x0; + t_f++; + t_v++; + t_l++; + }\n\n''' + return s + + def add_isotropic_compartments() -> str: + s: str = '''\ + // isotropic compartments + if (nISO > 0) + { + t_v = ISOv + ISOthreads[id]; + t_vEnd = ISOv + ISOthreads[id+1]; + xPtr = x + nF + ISOthreads[id]; + + while( t_v != t_vEnd ) + { + x0 = *xPtr++; + if ( x0 != 0 ) + Y[*t_v] += x0; + t_v++; + } + }\n\n''' + return s + + s: str = '''\ +// +// Compute a sub-block of the A*x MATRIX-VECTOR product +// +void* COMMIT_A__block_nolut( void *ptr ) +{ + int id = (long)ptr; + double x0; + double *xPtr; + uint32_t *t_v, *t_vEnd, *t_f; + float *t_l;\n\n''' + s += add_intracellular_compartments() + s += add_isotropic_compartments() + s += '''\ + pthread_exit( 0 ); +}\n\n''' + return s + + +def add_commit_a_nolut() -> str: + '''Adds the COMMIT_A_nolut function to the operator_c.c file.''' + s: str = '''\ +// +// Function called by Cython +// +void COMMIT_A_nolut( + int _nF, + double *_vIN, double *_vOUT, + uint32_t *_ICf, uint32_t *_ICv, float *_ICl, + uint32_t *_ISOv, + uint32_t* _ICthreads, uint32_t* _ISOthreads, + uint32_t _nISO, uint32_t _nThreads +) +{ + nF = _nF; + + x = _vIN; + Y = _vOUT; + + ICf = _ICf; + ICv = _ICv; + ICl = _ICl; + ISOv = _ISOv; + + nISO = _nISO; + + ICthreads = _ICthreads; + ISOthreads = _ISOthreads; + + // Run SEPARATE THREADS to perform the multiplication + pthread_t threads[MAX_THREADS]; + int t; + for(t=0; t<_nThreads ; t++) + pthread_create( &threads[t], NULL, COMMIT_A__block_nolut, (void *) (long int)t ); + for(t=0; t<_nThreads ; t++) + pthread_join( threads[t], NULL ); + return; +}\n\n''' + return s + + +def add_commit_at_block_nolut() -> str: + '''Adds the COMMIT_At__block_nolut function to the operator_c.c file.''' + def add_intracellular_compartments() -> str: + s: str = '''\ + // intra-cellular compartments + t_v = ICv; + t_vEnd = ICv + n; + t_l = ICl; + t_f = ICf; + t_t = ICthreadsT; + + while( t_v != t_vEnd ) + { + // in this case, I need to walk throug because the segments are ordered in "voxel order" + if ( *t_t == id ) + x[*t_f] += (double)(*t_l) * Y[*t_v]; + t_t++; + t_f++; + t_v++; + t_l++; + }\n\n''' + return s + + def add_isotropic_compartments() -> str: + s: str = '''\ + // isotropic compartments + if (nISO > 0) + { + t_v = ISOv + ISOthreadsT[id]; + t_vEnd = ISOv + ISOthreadsT[id+1]; + xPtr = x + nF + ISOthreadsT[id]; + + while( t_v != t_vEnd ) + (*xPtr++) += Y[*t_v++]; + }\n\n''' + return s + + s: str = '''\ +// +// Compute a sub-block of the At*y MATRIX-VECTOR product +// +void* COMMIT_At__block_nolut( void *ptr ) +{ + int id = (long)ptr; + double *xPtr; + uint32_t *t_v, *t_vEnd, *t_f; + float *t_l; + uint8_t *t_t;\n\n''' + s += add_intracellular_compartments() + s += add_isotropic_compartments() + s += '''\ + + pthread_exit( 0 ); +}\n\n''' + return s + + +def add_commit_at_nolut() -> str: + '''Adds the COMMIT_At_nolut function to the operator_c.c file.''' + s: str = '''\ +// +// Function called by Cython +// +void COMMIT_At_nolut( + int _nF, int _n, + double *_vIN, double *_vOUT, + uint32_t *_ICf, uint32_t *_ICv, float *_ICl, + uint32_t *_ISOv, + uint8_t* _ICthreadsT, uint32_t* _ISOthreadsT, + uint32_t _nISO, uint32_t _nThreads +) +{ + nF = _nF; + n = _n; + + x = _vOUT; + Y = _vIN; + + ICf = _ICf; + ICv = _ICv; + ICl = _ICl; + ISOv = _ISOv; + + nISO = _nISO; + + ICthreadsT = _ICthreadsT; + ISOthreadsT = _ISOthreadsT; + + // Run SEPARATE THREADS to perform the multiplication + pthread_t threads[MAX_THREADS]; + int t; + for(t=0; t<_nThreads ; t++) + pthread_create( &threads[t], NULL, COMMIT_At__block_nolut, (void *) (long int)t ); + for(t=0; t<_nThreads ; t++) + pthread_join( threads[t], NULL ); + return; +}\n''' + return s + + +def add_commit() -> str: + '''Merge all the COMMIT functions.''' + s: str = '' + s += add_commit_a_block() + s += add_commit_a() + s += add_commit_at_block() + s += add_commit_at() + return s + + +def add_commit_nolut() -> str: + '''Merge all the COMMIT_nolut functions.''' + s: str = '' + s += add_commit_a_block_nolut() + s += add_commit_a_nolut() + s += add_commit_at_block_nolut() + s += add_commit_at_nolut() + return s + + +def write_operator_c_file() -> NoReturn: + '''Merge the COMMIT and COMMIT_nolut functions and write them to the operator_c.c file.''' + s: str = '' + s += add_imports() + s += add_global_variables() + s += add_commit() + s += add_commit_nolut() + + with open(path_join('commit', 'operator', 'operator_c.c'), 'w') as f: + f.write(s)