Bruker dwi bmat/bvec/bval extraction (#10)

* added extraction of bmat,bval,bvec,dcm from bruker dcm (uses jcamp.py) * also updated branch naming for circleci
khanlab · Dec 7, 2019 · 0c2dacf · 0c2dacf
1 parent 23bc379
commit 0c2dacf
Show file tree

Hide file tree

Showing 6 changed files with 316 additions and 10 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -10,7 +10,8 @@ jobs:
           name: Building Docker container
           no_output_timeout: 1h
           command: |
-              export DOCKER_NAME=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME:latest
+              if [ "$CIRCLE_BRANCH" = "master" -o "$CIRCLE_BRANCH" = "" ]; then MY_TAG=latest; else MY_TAG=$CIRCLE_BRANCH; fi
+              export DOCKER_NAME=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME:$MY_TAG
               docker build -t $DOCKER_NAME .
               docker save $DOCKER_NAME -o /tmp/docker_image.tar
       - persist_to_workspace:
@@ -37,7 +38,8 @@ jobs:
       - run:
           name: Running basic T1w test
           command: |
-              export DOCKER_NAME=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME:latest
+              if [ "$CIRCLE_BRANCH" = "master" -o "$CIRCLE_BRANCH" = "" ]; then MY_TAG=latest; else MY_TAG=$CIRCLE_BRANCH; fi
+              export DOCKER_NAME=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME:$MY_TAG
               export TAR_URL=https://www.dropbox.com/s/9ocdstdqk4tgt4h/Doe_TestProject_18000101_loc_shim_t1w_t2w_dwi_P01_1.5234F73D.tar.gz
               sh ./.circleci/tar2bidsDocker $DOCKER_NAME $TAR_URL out 
 
@@ -56,7 +58,8 @@ jobs:
           name: Pushing build to docker hub
           command: |
             docker login -u $DOCKER_USER -p $DOCKER_PASS
-            export DOCKER_NAME=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME:latest
+            if [ "$CIRCLE_BRANCH" = "master" -o "$CIRCLE_BRANCH" = "" ]; then MY_TAG=latest; else MY_TAG=$CIRCLE_BRANCH; fi
+            export DOCKER_NAME=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME:$MY_TAG
             # tag and push here:
             echo docker push $DOCKER_NAME
             docker push $DOCKER_NAME

diff --git a/etc/bidsignore b/etc/bidsignore
@@ -33,3 +33,8 @@
 */dwi/*_ogse.*
 */*/dwi/*_ogse.*
 
+#bmat, dcm for Bruker
+*/dwi/*.dcm
+*/dwi/*.bmat
+*/*/dwi/*.dcm
+*/*/dwi/*.bmat
diff --git a/etc/correctBrukerBdata b/etc/correctBrukerBdata
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+if [ "$#" -lt 2 ]
+then
+    echo "Usage: $0 <in_bids> <in_tar_file> <subj ID> <session id (optional)>"
+	exit 1
+fi 
+in_bids=$1
+in_tar=$2
+
+#assumes python script is in same folder as this wrapper -- eventually just replace this..
+exec_path=`dirname $0`
+
+subj=$3
+subj=${subj##sub-} #strip off sub- if it exists, to make consistent
+
+if [ "$#" -gt 3 ]
+then
+    ses=$4
+	ses=${ses##ses-} #strip off sub- if it exists, to make consistent
+fi
+
+
+
+	if [ -n "$ses" ]
+    then
+        out_folder=sub-$subj/ses-$ses
+    else
+        out_folder=sub-$subj
+    fi
+
+    	#find dwi nifti images that don't have corresponding bvec
+	for dwi_nii in `ls $in_bids/$out_folder/dwi/*_dwi.nii.gz`
+	do
+		if [ -e ${dwi_nii%.nii.gz}.bvec ]
+		then
+			continue
+		fi
+
+
+    		#look at json of that image, to find the series number
+		dwi_json=${dwi_nii%.nii.gz}.json
+		seriesnum=`grep SeriesNumber $dwi_json`
+		seriesnum=${seriesnum%,}
+		seriesnum=${seriesnum##*:\ }
+
+
+		#get matching dcm file from tar
+		out_dicom=${dwi_nii%.nii.gz}.dcm
+		tar -xf $in_tar  --wildcards */${seriesnum}/* --to-stdout > $out_dicom
+		#now get bmat, bvec, bvals from dcm
+		python $exec_path/getBrukerBdataFromDicoms.py $out_dicom
+
+	done
+
+
+
diff --git a/etc/getBrukerBdataFromDicoms.py b/etc/getBrukerBdataFromDicoms.py
@@ -0,0 +1,60 @@
+import sys
+import pprint
+import json
+import pydicom
+import jcamp
+#from pybruker import jcamp
+import numpy as np
+
+
+#take in <path_to_dicom> as input
+#write out <path_to_dicom>.{bvec,bval,bmat} as output
+
+# Strip off '.dcm'
+fname = sys.argv[1]
+fname_noExt = fname[0:-4]
+
+# read dicom header
+H = pydicom.read_file(fname, stop_before_pixels=True)
+
+#read bruker headers
+method=jcamp.jcamp_parse(
+          H[0x0177,0x1100].value.decode('utf-8').splitlines()
+          )
+visu_pars=jcamp.jcamp_parse(
+          H[0x0177,0x1101].value.decode('utf-8').splitlines()
+          )
+
+#with open(fname_noExt+'_method.json', 'w') as fp:
+#    json.dump(method,  fp, indent=4)
+
+#with open(fname_noExt+'_visu_pars.json', 'w') as fp:
+#    json.dump(visu_pars,  fp, indent=4)
+
+# Bvalue information for Budde sequence
+#if visu_pars["$VisuAcqSequenceName"]["value"].find('rFOV_DWEpiWave') > -1:
+
+bval = method["$PVM_DwEffBval"]["value"]
+bvec = method["$PVM_DwDir"]["value"]
+bvec = np.reshape(bvec, (int(len(bvec)/3), 3)).T
+bmat = method["$PVM_DwBMat"]["value"]
+bmat = np.reshape(bmat, (int(len(bmat)/9), 9)).T
+
+with open(fname_noExt+'.bval', 'w') as fp:
+    for item in bval:
+        fp.write("%f " % item)
+    fp.write("\n")
+
+with open(fname_noExt+'.bvec', 'w') as fp:
+    for row in bvec:
+        for item in row:
+            fp.write("%f " % item)
+        fp.write("\n")
+
+with open(fname_noExt+'.bmat', 'w') as fp:
+    for row in bmat:
+        for item in row:
+            fp.write("%s " % item)
+        fp.write("\n")
+
+
diff --git a/etc/jcamp.py b/etc/jcamp.py
@@ -0,0 +1,168 @@
+from io import StringIO
+import numpy as np
+
+
+def reformat(data):
+    """
+    Convert data into integer or floating point data
+
+    :param data:
+    :return:
+    """
+    try:
+        data = data.decode('utf-8')
+    except AttributeError:
+        pass
+
+    if data.isdecimal():
+        return int(data)
+    else:
+        try:
+            return float(data)
+        except ValueError:
+            # not a float
+            pass
+
+        if data.startswith('(') and data.endswith(')'):
+            data = [reformat(x.strip()) for x in data.lstrip('(').rstrip(')').split(',')]
+        elif data == 'Yes':
+            return True
+        elif data == 'No':
+            return False
+
+    return data
+
+
+def jcamp_parse(data):
+    jcamp_dict = dict()
+    variable = None
+
+    for line in data:
+        check = line.lstrip()
+
+        # Skip comment lines
+        if check.startswith('$$'):
+            pass
+        # Line starts a new variable
+        elif check.startswith('##'):
+            lhs, _, rhs = check.strip('##').partition('=')
+
+            # Stop reading when end is found
+            if lhs.lower() == 'end':
+                break
+
+            variable = [rhs.rstrip('\n')]
+            jcamp_dict[lhs] = variable
+        # Line is a data line to append to the previous variable
+        elif variable:
+            variable.append(line.rstrip('\n'))
+
+    # All variables have now been read
+
+    # Parse the contents of the variables
+    for key, value in jcamp_dict.items():
+
+        # If the variable starts with ( then combine all the lines until the first one that ends with )
+        # TODO:
+        # Edge case when variable is a struct of structs. A contained struct might end a line and therefore exit
+        # prematurely before the containing struct is complete. Fixing requires parsing the entire contents because
+        # the struct could contain strings with parenthesis. This requires a char by char parser which is considerably
+        # more difficult for an extremely unlikely occurrence.
+        has_struct = False
+        if value[0].lstrip().startswith('('):
+            has_struct = True
+            first_line = ''
+            line = value[0]
+
+            # Concatenate lines until one that ends in ) is reached
+            for line in value:
+                first_line += line
+                if line.rstrip().endswith(')'):
+                    break
+
+            # Combine all the rest of the lines into one long data string
+            data = ''.join(value[value.index(line)+1:])
+            if data:
+                value = [first_line, data]
+            else:
+                value = [first_line]
+
+        if len(value) == 1:
+            # Only one line in data so just parse the data
+            jcamp_dict[key] = reformat(value[0])
+        elif has_struct:
+            # Data has multiple lines, so the first line is the dimensions of the array of data
+            # Read the dimensions
+            dim = [int(x.strip()) for x in value[0].lstrip('(').rstrip(')').split(',')]
+
+            data = value[1]
+            if data.lstrip().startswith('('):
+                # Second line starts with ( so there is an array of structs
+                data = [[reformat(y.strip()) for y in x.lstrip('(').rstrip(')').split(',')] for x in data.split(') (')]
+            elif len(dim) == 1 and data.lstrip().startswith('<'):
+                # Second line start with < so it is a string and the dimensions are the maximum length
+                jcamp_dict[key] = {'maximum_length': dim[0], 'value': data}
+                continue
+            else:
+                # Second line is an array of data. Split on whitespace and reformat each entry.
+                data = [reformat(x) for x in data.split()]
+
+            jcamp_dict[key] = {'dim': dim, 'value': data}
+
+    return jcamp_dict
+
+
+def jcamp_read(data):
+
+    if not isinstance(data, bytes) or isinstance(data, str):
+        # Try to use data as a filename
+        try:
+            with open(data, 'r') as fh:
+                return jcamp_parse(fh)
+        except IOError:
+            pass
+        except (TypeError, ValueError):
+            # Was not a valid filename, so continue trying other options
+            pass
+        except OSError as err:
+            if err.errno != 63:
+                raise
+
+    # It was not a filename, so it should be a string
+    # Make sure that data is str and not bytes
+    try:
+        data = data.decode('utf-8')
+    except AttributeError:
+        # Was not a bytes so it did not have decode attribute
+        pass
+
+    # Convert to IO object which permits iterating through string on linebreaks
+    try:
+        handle = StringIO(data)
+    except TypeError:
+        # Was not a str object so assume it is a list of lines
+        handle = data
+
+    # Parse the data
+    return jcamp_parse(handle)
+
+
+class JCAMPData(dict):
+    def __init__(self, data, *args, **kwargs):
+        super(JCAMPData, self).__init__(*args, **kwargs)
+        self.update(jcamp_read(data))
+
+    def __str__(self):
+        return '\n'.join(['{key} = {value}'.format(key=key, value=value) for key, value in self.items()])
+
+    def format(self, variable):
+        data = self[variable]
+        if 'dim' in data:
+            a = np.empty((len(data['value']), 1))
+            for counter, value in enumerate(data['value']):
+                a[counter] = value
+            data = np.reshape(a, data['dim'])
+        elif 'maximum_length' in data:
+            data = str(data['value'])
+
+        return data