From 1e3b0317e6e8db62e5d205b87d563311e91b915c Mon Sep 17 00:00:00 2001 From: Qianqian Fang Date: Fri, 29 Dec 2023 14:15:52 -0500 Subject: [PATCH] [feat] initial implementation of mua/mus/g/n all float format --- src/mcx_const.h | 1 + src/mcx_core.cu | 21 ++++++- src/mcx_utils.c | 153 +++++++++++++++++++++++++++++------------------- src/mcx_utils.h | 1 + src/mcxlab.cpp | 69 +++++----------------- src/pmcx.cpp | 68 +++++---------------- 6 files changed, 144 insertions(+), 169 deletions(-) diff --git a/src/mcx_const.h b/src/mcx_const.h index 91e8690f..8b099be7 100644 --- a/src/mcx_const.h +++ b/src/mcx_const.h @@ -65,6 +65,7 @@ #define MCX_DEBUG_PROGRESS 4 /**< debug flags: 4 - print progress bar */ #define MCX_DEBUG_MOVE_ONLY 8 /**< debug flags: 8 - only save photon trajectory data, disable volume and detphoton output */ +#define MEDIA_ASGN_F2H 96 /**< media format: input: float4 {mua,mus,g,n} -> {[half: mua],[half: mus],[half: g],[half: n]} */ #define MEDIA_2LABEL_SPLIT 97 /**< media format: 64bit:{[byte: lower label][byte: upper label][byte*3: reference point][byte*3: normal vector]} */ #define MEDIA_2LABEL_MIX 98 /**< media format: {[int: label1][int: label2][float32: label1 %]} -> 32bit:{[short 0-32767 scaled label1 %],[byte: label2],[byte: label1]} */ #define MEDIA_LABEL_HALF 99 /**< media format: {[float32: mua/mus/g/n][float32: 1/2/3/4][float32: label]} -> 32bit:{[half: mua/mus/g/n][int16: [B15-B16: 0/1/2/3][B1-B14: tissue type]} */ diff --git a/src/mcx_core.cu b/src/mcx_core.cu index dcc3eb1e..5d4fa80f 100644 --- a/src/mcx_core.cu +++ b/src/mcx_core.cu @@ -580,7 +580,7 @@ __device__ void updateproperty(Medium* prop, unsigned int& mediaid, RandType t[R } else if (gcfg->mediaformat == MEDIA_MUA_FLOAT) { //< [f0]: single-prec mua every voxel; mus/g/n uses 2nd row in gcfg.prop prop->mua = fabsf(*((float*)&mediaid)); prop->n = gproperty[!(mediaid & MED_MASK) == 0].w; - } else if (gcfg->mediaformat == MEDIA_AS_F2H || gcfg->mediaformat == MEDIA_AS_HALF) { //< [h1][h0]: h1/h0: single-prec mua/mus for every voxel; g/n uses those in cfg.prop(2,:) + } else if (gcfg->mediaformat == MEDIA_AS_F2H || gcfg->mediaformat == MEDIA_AS_HALF) { //< [h1][h0]: h1/h0: half-prec mua/mus for every voxel; g/n uses those in cfg.prop(2,:) union { unsigned int i; #if ! defined(__CUDACC_VER_MAJOR__) || __CUDACC_VER_MAJOR__ >= 9 @@ -593,6 +593,21 @@ __device__ void updateproperty(Medium* prop, unsigned int& mediaid, RandType t[R prop->mua = fabsf(__half2float(val.h[0])); prop->mus = fabsf(__half2float(val.h[1])); prop->n = gproperty[!(mediaid & MED_MASK) == 0].w; + } else if (gcfg->mediaformat == MEDIA_ASGN_F2H) { //< [h3][h2][h1][h0]: h3/h2/h1/h0: half-prec n/g/mus/mua for every voxel + union { + unsigned int i[2]; +#if ! defined(__CUDACC_VER_MAJOR__) || __CUDACC_VER_MAJOR__ >= 9 + __half_raw h[4]; +#else + half h[4]; +#endif + } val; + val.i[0] = mediaid & MED_MASK; + val.i[1] = media[idx1d + gcfg->dimlen.z]; + prop->mua = fabsf(__half2float(val.h[0])); + prop->mus = fabsf(__half2float(val.h[1])); + prop->g = fabsf(__half2float(val.h[2])); + prop->n = fabsf(__half2float(val.h[3])); } else if (gcfg->mediaformat == MEDIA_2LABEL_MIX) { //< [s1][c1][c0]: s1: (volume fraction of tissue 1)*(2^16-1), c1: tissue 1 label, c0: tissue 0 label union { unsigned int i; @@ -2917,7 +2932,7 @@ void mcx_run_simulation(Config* cfg, GPUInfo* gpu) { /** * Allocate all GPU buffers to store input or output data */ - if (cfg->mediabyte != MEDIA_2LABEL_SPLIT) { + if (cfg->mediabyte != MEDIA_2LABEL_SPLIT && cfg->mediabyte != MEDIA_ASGN_F2H) { CUDA_ASSERT(cudaMalloc((void**) &gmedia, sizeof(uint) * (cfg->dim.x * cfg->dim.y * cfg->dim.z))); } else { CUDA_ASSERT(cudaMalloc((void**) &gmedia, sizeof(uint) * (2 * cfg->dim.x * cfg->dim.y * cfg->dim.z))); @@ -3086,7 +3101,7 @@ void mcx_run_simulation(Config* cfg, GPUInfo* gpu) { mcx_flush(cfg); - if (cfg->mediabyte != MEDIA_2LABEL_SPLIT) { + if (cfg->mediabyte != MEDIA_2LABEL_SPLIT && cfg->mediabyte != MEDIA_ASGN_F2H) { CUDA_ASSERT(cudaMemcpy(gmedia, media, sizeof(uint)*cfg->dim.x * cfg->dim.y * cfg->dim.z, cudaMemcpyHostToDevice)); } else { CUDA_ASSERT(cudaMemcpy(gmedia, media, sizeof(uint) * 2 * cfg->dim.x * cfg->dim.y * cfg->dim.z, cudaMemcpyHostToDevice)); diff --git a/src/mcx_utils.c b/src/mcx_utils.c index 6018d1b9..6c8195a7 100644 --- a/src/mcx_utils.c +++ b/src/mcx_utils.c @@ -210,8 +210,8 @@ const char* srctypeid[] = {"pencil", "isotropic", "cone", "gaussian", "planar", * User can specify the source type using a string */ -const unsigned int mediaformatid[] = {1, 2, 4, 97, 98, 99, 100, 101, 102, 103, 104, 0}; -const char* mediaformat[] = {"byte", "short", "integer", "svmc", "mixlabel", "labelplus", +const unsigned int mediaformatid[] = {1, 2, 4, 96, 97, 98, 99, 100, 101, 102, 103, 104, 0}; +const char* mediaformat[] = {"byte", "short", "integer", "asgn_float", "svmc", "mixlabel", "labelplus", "muamus_float", "mua_float", "muamus_half", "asgn_byte", "muamus_short", "" }; @@ -1546,7 +1546,7 @@ void mcx_preprocess(Config* cfg) { if (cfg->isrowmajor) { /*from here on, the array is always col-major*/ - if (cfg->mediabyte == MEDIA_2LABEL_SPLIT) { + if (cfg->mediabyte == MEDIA_2LABEL_SPLIT || cfg->mediabyte == MEDIA_ASGN_F2H) { mcx_convertrow2col64((size_t**) & (cfg->vol), &(cfg->dim)); } else { mcx_convertrow2col(&(cfg->vol), &(cfg->dim)); @@ -3200,18 +3200,20 @@ void mcx_loadvolume(char* filename, Config* cfg, int isbuf) { } datalen = cfg->dim.x * cfg->dim.y * cfg->dim.z; - cfg->vol = (unsigned int*)malloc(sizeof(unsigned int) * datalen * (1 + (cfg->mediabyte == MEDIA_2LABEL_SPLIT))); + cfg->vol = (unsigned int*)malloc(sizeof(unsigned int) * datalen * (1 + (cfg->mediabyte == MEDIA_2LABEL_SPLIT || cfg->mediabyte == MEDIA_ASGN_F2H))); if (!isbuf) { if (cfg->mediabyte == MEDIA_AS_F2H) { inputvol = (unsigned char*)malloc(sizeof(unsigned char) * (datalen << 3)); + } else if (cfg->mediabyte == MEDIA_ASGN_F2H) { + inputvol = (unsigned char*)malloc(sizeof(unsigned char) * (datalen << 4)); } else if (cfg->mediabyte >= 4) { inputvol = (unsigned char*)(cfg->vol); } else { inputvol = (unsigned char*)malloc(sizeof(unsigned char) * cfg->mediabyte * datalen); } - res = fread(inputvol, sizeof(unsigned char) * ((cfg->mediabyte == MEDIA_AS_F2H || cfg->mediabyte == MEDIA_2LABEL_SPLIT) ? 8 : MIN(cfg->mediabyte, 4)), datalen, fp); + res = fread(inputvol, sizeof(unsigned char) * ((cfg->mediabyte == MEDIA_ASGN_F2H) ? 16 : ((cfg->mediabyte == MEDIA_AS_F2H || cfg->mediabyte == MEDIA_2LABEL_SPLIT) ? 8 : MIN(cfg->mediabyte, 4))), datalen, fp); fclose(fp); if (res != datalen) { @@ -3253,70 +3255,28 @@ void mcx_loadvolume(char* filename, Config* cfg, int isbuf) { cfg->vol[i] = f2i.i; } - } else if (cfg->mediabyte == MEDIA_AS_F2H) { + } else if (cfg->mediabyte == MEDIA_AS_F2H || cfg->mediabyte == MEDIA_ASGN_F2H) { float* val = (float*)inputvol; - union { - float f[2]; - unsigned int i[2]; - unsigned short h[2]; - } f2h; - unsigned short tmp, m; + float f2h[2]; + int offset = (cfg->mediabyte == MEDIA_ASGN_F2H); for (i = 0; i < datalen; i++) { - f2h.f[0] = val[i << 1] * cfg->unitinmm; - f2h.f[1] = val[(i << 1) + 1] * cfg->unitinmm; + f2h[0] = val[i << (1 + offset)] * cfg->unitinmm; // mua + f2h[1] = val[(i << (1 + offset)) + 1] * cfg->unitinmm; // mus - if (f2h.f[0] != f2h.f[0] || f2h.f[1] != f2h.f[1]) { /*if one of mua/mus is nan in continuous medium, convert to 0-voxel*/ + if (f2h[0] != f2h[0] || f2h[1] != f2h[1]) { /*if one of mua/mus is nan in continuous medium, convert to 0-voxel*/ cfg->vol[i] = 0; continue; } - /** - float to half conversion - https://stackoverflow.com/questions/3026441/float32-to-float16/5587983#5587983 - https://gamedev.stackexchange.com/a/17410 (for denorms) - */ - m = ((f2h.i[0] >> 13) & 0x03ff); - tmp = (f2h.i[0] >> 23) & 0xff; /*exponent*/ - tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27); - - if (m < 0x10 && tmp == 0) { /*handle denorms - between 2^-24 and 2^-14*/ - unsigned short sign = (f2h.i[0] >> 16) & 0x8000; - tmp = ((f2h.i[0] >> 23) & 0xff); - m = (f2h.i[0] >> 12) & 0x07ff; - m |= 0x0800u; - f2h.h[0] = sign | ((m >> (114 - tmp)) + ((m >> (113 - tmp)) & 1)); + if (cfg->mediabyte == MEDIA_ASGN_F2H) { + cfg->vol[i] = mcx_float2half2(f2h); + f2h[0] = val[(i << 2) + 2]; // g + f2h[1] = val[(i << 2) + 3]; // n + cfg->vol[i + datalen] = mcx_float2half2(f2h); } else { - f2h.h[0] = (f2h.i[0] >> 31) << 5; - f2h.h[0] = (f2h.h[0] | tmp) << 10; - f2h.h[0] |= (f2h.i[0] >> 13) & 0x3ff; + cfg->vol[i] = mcx_float2half2(f2h); } - - m = ((f2h.i[1] >> 13) & 0x03ff); - tmp = (f2h.i[1] >> 23) & 0xff; /*exponent*/ - tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27); - - if (m < 0x10 && tmp == 0) { /*handle denorms - between 2^-24 and 2^-14*/ - unsigned short sign = (f2h.i[1] >> 16) & 0x8000; - tmp = ((f2h.i[1] >> 23) & 0xff); - m = (f2h.i[1] >> 12) & 0x07ff; - m |= 0x0800u; - f2h.h[1] = sign | ((m >> (114 - tmp)) + ((m >> (113 - tmp)) & 1)); - } else { - f2h.h[1] = (f2h.i[1] >> 31) << 5; - f2h.h[1] = (f2h.h[1] | tmp) << 10; - f2h.h[1] |= (f2h.i[1] >> 13) & 0x3ff; - } - - if (f2h.i[0] == 0) { /*avoid being detected as a 0-label voxel, setting mus=EPS_fp16*/ - f2h.i[0] = 0x00010000; - } - - if (f2h.i[0] == SIGN_BIT) { /*avoid being detected as a 0-label voxel, setting mus=EPS_fp16*/ - f2h.i[0] = 0; - } - - cfg->vol[i] = f2h.i[0]; } } else if (cfg->mediabyte == MEDIA_2LABEL_SPLIT) { memcpy(cfg->vol, inputvol, (datalen << 3)); @@ -3331,7 +3291,7 @@ void mcx_loadvolume(char* filename, Config* cfg, int isbuf) { } } - if (!isbuf && (cfg->mediabyte < 4 || cfg->mediabyte == MEDIA_AS_F2H)) { + if (!isbuf && (cfg->mediabyte < 4 || cfg->mediabyte == MEDIA_AS_F2H || cfg->mediabyte == MEDIA_ASGN_F2H)) { free(inputvol); } } @@ -4985,6 +4945,77 @@ int mcx_isbinstr(const char* str) { return 1; } +/** + * @brief Function to parse one subfield of the input structure + * + * This function reads in all necessary information from the cfg input structure. + * it can handle single scalar inputs, short vectors (3-4 elem), strings and arrays. + * + * @param[in] root: the cfg input data structure + * @param[in] item: the current element of the cfg input data structure + * @param[in] idx: the index of the current element (starting from 0) + * @param[out] cfg: the simulation configuration structure to store all input read from the parameters + */ + +int mcx_float2half2(float input[2]) { + union { + float f[2]; + unsigned int i[2]; + unsigned short h[2]; + } f2h; + unsigned short tmp, m; + + f2h.f[0] = input[0]; + f2h.f[1] = input[1]; + + /** + float to half conversion + https://stackoverflow.com/questions/3026441/float32-to-float16/5587983#5587983 + https://gamedev.stackexchange.com/a/17410 (for denorms) + */ + m = ((f2h.i[0] >> 13) & 0x03ff); + tmp = (f2h.i[0] >> 23) & 0xff; /*exponent*/ + tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27); + + if (m < 0x10 && tmp == 0) { /*handle denorms - between 2^-24 and 2^-14*/ + unsigned short sign = (f2h.i[0] >> 16) & 0x8000; + tmp = ((f2h.i[0] >> 23) & 0xff); + m = (f2h.i[0] >> 12) & 0x07ff; + m |= 0x0800u; + f2h.h[0] = sign | ((m >> (114 - tmp)) + ((m >> (113 - tmp)) & 1)); + } else { + f2h.h[0] = (f2h.i[0] >> 31) << 5; + f2h.h[0] = (f2h.h[0] | tmp) << 10; + f2h.h[0] |= (f2h.i[0] >> 13) & 0x3ff; + } + + m = ((f2h.i[1] >> 13) & 0x03ff); + tmp = (f2h.i[1] >> 23) & 0xff; /*exponent*/ + tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27); + + if (m < 0x10 && tmp == 0) { /*handle denorms - between 2^-24 and 2^-14*/ + unsigned short sign = (f2h.i[1] >> 16) & 0x8000; + tmp = ((f2h.i[1] >> 23) & 0xff); + m = (f2h.i[1] >> 12) & 0x07ff; + m |= 0x0800u; + f2h.h[1] = sign | ((m >> (114 - tmp)) + ((m >> (113 - tmp)) & 1)); + } else { + f2h.h[1] = (f2h.i[1] >> 31) << 5; + f2h.h[1] = (f2h.h[1] | tmp) << 10; + f2h.h[1] |= (f2h.i[1] >> 13) & 0x3ff; + } + + if (f2h.i[0] == 0) { /*avoid being detected as a 0-label voxel, setting mus=EPS_fp16, only happens for _AS, not ASGN as n will never be 0*/ + f2h.i[0] = 0x00010000; + } + + if (f2h.i[0] == SIGN_BIT) { /*avoid being detected as a 0-label voxel, setting mus=EPS_fp16, , only happens for _AS, not ASGN as n will never be 0*/ + f2h.i[0] = 0; + } + + return f2h.i[0]; +} + #ifndef MCX_CONTAINER /** @@ -5136,6 +5167,8 @@ where possible parameters include (the first value in [*|*] is the default)\n\ 1 or byte: 0-128 tissue labels\n\ 2 or short: 0-65535 (max to 4000) tissue labels\n\ 4 or integer: integer tissue labels \n\ + 96 or asgn_float: mua/mus/g/n 4xfloat format\n\ + {[f:mua][f:mus][f:g][f:n]}\n\ 97 or svmc: split-voxel MC 8-byte format\n\ {[n.z][n.y][n.x][p.z][p.y][p.x][upper][lower]}\n\ 98 or mixlabel: label1+label2+label1_percentage\n\ diff --git a/src/mcx_utils.h b/src/mcx_utils.h index 907532d9..cf6ed3a4 100644 --- a/src/mcx_utils.h +++ b/src/mcx_utils.h @@ -317,6 +317,7 @@ void mcx_loadseedjdat(char* filename, Config* cfg); void mcx_prep_polarized(Config* cfg); void mcx_replayinit(Config* cfg, float* detps, int dimdetps[2], int seedbyte); void mcx_validatecfg(Config* cfg, float* detps, int dimdetps[2], int seedbyte); +int mcx_float2half2(float input[2]); #ifdef MCX_CONTAINER #ifdef __cplusplus diff --git a/src/mcxlab.cpp b/src/mcxlab.cpp index fdcf41b9..617c2fdb 100644 --- a/src/mcxlab.cpp +++ b/src/mcxlab.cpp @@ -505,6 +505,7 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { return; } + /** * @brief Function to parse one subfield of the input structure * @@ -595,6 +596,8 @@ void mcx_set_field(const mxArray* root, const mxArray* item, int idx, Config* cf } else if (mxGetNumberOfDimensions(item) == 4) { // if dimension is 4D, 1st dim is the property records: mua/mus/g/n if ((mxIsUint8(item) || mxIsInt8(item)) && arraydim[0] == 4) { // if 4D byte array has a 1st dim of 4 cfg->mediabyte = MEDIA_ASGN_BYTE; + } else if (mxIsSingle(item) && arraydim[0] == 4) { // if 4D float array has a 1st dim of 4 + cfg->mediabyte = MEDIA_ASGN_F2H; } else if ((mxIsUint8(item) || mxIsInt8(item)) && arraydim[0] == 8) { cfg->mediabyte = MEDIA_2LABEL_SPLIT; } else if (mxIsSingle(item) && arraydim[0] == 3) { @@ -675,70 +678,28 @@ void mcx_set_field(const mxArray* root, const mxArray* item, int idx, Config* cf cfg->vol[i] = f2i.i; } - } else if (cfg->mediabyte == MEDIA_AS_F2H) { + } else if (cfg->mediabyte == MEDIA_AS_F2H || cfg->mediabyte == MEDIA_ASGN_F2H) { float* val = (float*)mxGetPr(item); - union { - float f[2]; - unsigned int i[2]; - unsigned short h[2]; - } f2h; - unsigned short tmp, m; + float f2h[2]; + int offset = (cfg->mediabyte == MEDIA_ASGN_F2H); for (i = 0; i < dimxyz; i++) { - f2h.f[0] = val[i << 1] * cfg->unitinmm; // mua - f2h.f[1] = val[(i << 1) + 1] * cfg->unitinmm; // mus + f2h[0] = val[i << (1 + offset)] * cfg->unitinmm; // mua + f2h[1] = val[(i << (1 + offset)) + 1] * cfg->unitinmm; // mus - if (f2h.f[0] != f2h.f[0] || f2h.f[1] != f2h.f[1]) { /*if one of mua/mus is nan in continuous medium, convert to 0-voxel*/ + if (f2h[0] != f2h[0] || f2h[1] != f2h[1]) { /*if one of mua/mus is nan in continuous medium, convert to 0-voxel*/ cfg->vol[i] = 0; continue; } - /** - float to half conversion - https://stackoverflow.com/questions/3026441/float32-to-float16/5587983#5587983 - https://gamedev.stackexchange.com/a/17410 (for denorms) - */ - m = ((f2h.i[0] >> 13) & 0x03ff); - tmp = (f2h.i[0] >> 23) & 0xff; /*exponent*/ - tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27); - - if (m < 0x10 && tmp == 0) { /*handle denorms - between 2^-24 and 2^-14*/ - unsigned short sign = (f2h.i[0] >> 16) & 0x8000; - tmp = ((f2h.i[0] >> 23) & 0xff); - m = (f2h.i[0] >> 12) & 0x07ff; - m |= 0x0800u; - f2h.h[0] = sign | ((m >> (114 - tmp)) + ((m >> (113 - tmp)) & 1)); + if (cfg->mediabyte == MEDIA_ASGN_F2H) { + cfg->vol[i] = mcx_float2half2(f2h); + f2h[0] = val[(i << 2) + 2]; // g + f2h[1] = val[(i << 2) + 3]; // n + cfg->vol[i + dimxyz] = mcx_float2half2(f2h); } else { - f2h.h[0] = (f2h.i[0] >> 31) << 5; - f2h.h[0] = (f2h.h[0] | tmp) << 10; - f2h.h[0] |= (f2h.i[0] >> 13) & 0x3ff; + cfg->vol[i] = mcx_float2half2(f2h); } - - m = ((f2h.i[1] >> 13) & 0x03ff); - tmp = (f2h.i[1] >> 23) & 0xff; /*exponent*/ - tmp = (tmp - 0x70) & ((unsigned int)((int)(0x70 - tmp) >> 4) >> 27); - - if (m < 0x10 && tmp == 0) { /*handle denorms - between 2^-24 and 2^-14*/ - unsigned short sign = (f2h.i[1] >> 16) & 0x8000; - tmp = ((f2h.i[1] >> 23) & 0xff); - m = (f2h.i[1] >> 12) & 0x07ff; - m |= 0x0800u; - f2h.h[1] = sign | ((m >> (114 - tmp)) + ((m >> (113 - tmp)) & 1)); - } else { - f2h.h[1] = (f2h.i[1] >> 31) << 5; - f2h.h[1] = (f2h.h[1] | tmp) << 10; - f2h.h[1] |= (f2h.i[1] >> 13) & 0x3ff; - } - - if (f2h.i[0] == 0) { /*avoid being detected as a 0-label voxel, setting mus=EPS_fp16*/ - f2h.i[0] = 0x00010000; - } - - if (f2h.i[0] == SIGN_BIT) { /*avoid being detected as a 0-label voxel, setting mus=EPS_fp16*/ - f2h.i[0] = 0; - } - - cfg->vol[i] = f2h.i[0]; } } else if (cfg->mediabyte == MEDIA_LABEL_HALF) { float* val = (float*)mxGetPr(item); diff --git a/src/pmcx.cpp b/src/pmcx.cpp index 37835cba..a5248a91 100644 --- a/src/pmcx.cpp +++ b/src/pmcx.cpp @@ -297,68 +297,32 @@ void parseVolume(const py::dict& user_cfg, Config& mcx_config) { break; } - case 2: { - mcx_config.mediabyte = MEDIA_AS_F2H; + case 2: + case 4: { + mcx_config.mediabyte = (buffer.shape.at(0) == 2 ? MEDIA_AS_F2H : MEDIA_ASGN_F2H); auto val = (float*) buffer.ptr; - union { - float f[2]; - unsigned int i[2]; - unsigned short h[2]; - } f2h; - unsigned short tmp, m; + + float f2h[2]; + int offset = (cfg->mediabyte == MEDIA_ASGN_F2H); for (i = 0; i < dim_xyz; i++) { - f2h.f[0] = val[i << 1] * mcx_config.unitinmm; // mua - f2h.f[1] = val[(i << 1) + 1] * mcx_config.unitinmm; // mus + f2h[0] = val[i << (1 + offset)] * mcx_config.unitinmm; // mua + f2h[1] = val[(i << (1 + offset)) + 1] * mcx_config->unitinmm; // mus - if (f2h.f[0] != f2h.f[0] - || f2h.f[1] != f2h.f[1]) { /*if one of mua/mus is nan in continuous medium, convert to 0-voxel*/ + if (f2h[0] != f2h[0] + || f2h[1] != f2h[1]) { /*if one of mua/mus is nan in continuous medium, convert to 0-voxel*/ mcx_config.vol[i] = 0; continue; } - /** - * float to half conversion - * https://stackoverflow.com/questions/3026441/float32-to-float16/5587983#5587983 - * https://gamedev.stackexchange.com/a/17410 (for denorms) - */ - m = ((f2h.i[0] >> 13) & 0x03ff); - tmp = (f2h.i[0] >> 23) & 0xff; /*exponent*/ - tmp = (tmp - 0x70) & ((unsigned int) ((int) (0x70 - tmp) >> 4) >> 27); - - if (m < 0x10 && tmp == 0) { /*handle denorms - between 2^-24 and 2^-14*/ - unsigned short sign = (f2h.i[0] >> 16) & 0x8000; - tmp = ((f2h.i[0] >> 23) & 0xff); - m = (f2h.i[0] >> 12) & 0x07ff; - m |= 0x0800u; - f2h.h[0] = sign | ((m >> (114 - tmp)) + ((m >> (113 - tmp)) & 1)); + if (cfg->mediabyte == MEDIA_ASGN_F2H) { + mcx_config->vol[i] = mcx_float2half2(f2h); + f2h[0] = val[(i << 2) + 2]; // g + f2h[1] = val[(i << 2) + 3]; // n + mcx_config->vol[i + dim_xyz] = mcx_float2half2(f2h); } else { - f2h.h[0] = (f2h.i[0] >> 31) << 5; - f2h.h[0] = (f2h.h[0] | tmp) << 10; - f2h.h[0] |= (f2h.i[0] >> 13) & 0x3ff; + mcx_config->vol[i] = mcx_float2half2(f2h); } - - m = ((f2h.i[1] >> 13) & 0x03ff); - tmp = (f2h.i[1] >> 23) & 0xff; /*exponent*/ - tmp = (tmp - 0x70) & ((unsigned int) ((int) (0x70 - tmp) >> 4) >> 27); - - if (m < 0x10 && tmp == 0) { /*handle denorms - between 2^-24 and 2^-14*/ - unsigned short sign = (f2h.i[1] >> 16) & 0x8000; - tmp = ((f2h.i[1] >> 23) & 0xff); - m = (f2h.i[1] >> 12) & 0x07ff; - m |= 0x0800u; - f2h.h[1] = sign | ((m >> (114 - tmp)) + ((m >> (113 - tmp)) & 1)); - } else { - f2h.h[1] = (f2h.i[1] >> 31) << 5; - f2h.h[1] = (f2h.h[1] | tmp) << 10; - f2h.h[1] |= (f2h.i[1] >> 13) & 0x3ff; - } - - if (f2h.i[0] == 0) { /*avoid being detected as a 0-label voxel, setting mus=EPS_fp16*/ - f2h.i[0] = 0x00010000; - } - - mcx_config.vol[i] = f2h.i[0]; } break;