Skip to content

Commit

Permalink
Merge pull request #293 from madgik/dev/313_converter_improvements
Browse files Browse the repository at this point in the history
Dev/313 converter improvements
  • Loading branch information
ThanKarab authored Nov 3, 2020
2 parents dddc160 + cb778ee commit 9b2b936
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 11 deletions.
18 changes: 13 additions & 5 deletions Exareme-Docker/files/root/exareme/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ if [[ -z ${ENVIRONMENT_TYPE} ]]; then
echo "ENVIRONMENT_TYPE is unset. Check docker-compose file."
exit
fi
if [[ -z ${CONVERT_CSVS} ]]; then
echo "CONVERT_CSVS is unset. Check docker-compose file."
exit
fi

timestamp() {
date +%F' '%T
Expand Down Expand Up @@ -78,15 +82,19 @@ getMasterIPFromConsul() {

# Convert CSVs to DB
convertCSVsToDB() {
# Both Master and Worker should transform the csvs to sqlite db files
NODE_TYPE=${1}

# Skip convertion if flag is false
if [[ ${CONVERT_CSVS} == "FALSE" ]]; then
echo "$(timestamp) CSV convertion turned off. "
return 0
fi

# Removing all previous .db files from the DOCKER_DATA_FOLDER
echo "$(timestamp) Deleting previous db files. "
rm -rf ${DOCKER_DATA_FOLDER}/**/*.db

echo "$(timestamp) Parsing the csv files in " ${DOCKER_DATA_FOLDER} " to db files. "
python ./convert-csv-dataset-to-db.py -f ${DOCKER_DATA_FOLDER} -t ${NODE_TYPE}
python3 ./convert-csv-dataset-to-db.py -f ${DOCKER_DATA_FOLDER}
#Get the status code from previous command
py_script=$?
#If status code != 0 an error has occurred
Expand Down Expand Up @@ -228,7 +236,7 @@ if [[ "${FEDERATION_ROLE}" == "master" ]]; then
periodicExaremeNodesHealthCheck &

# Prepare datasets from CSVs to SQLite db files
convertCSVsToDB "master"
convertCSVsToDB

else ##### Running bootstrap on a worker node #####

Expand All @@ -252,7 +260,7 @@ else ##### Running bootstrap on a worker node #####
periodicExaremeNodesHealthCheck &

# Prepare datasets from CSVs to SQLite db files
convertCSVsToDB "worker"
convertCSVsToDB

fi

Expand Down
17 changes: 11 additions & 6 deletions Exareme-Docker/files/root/exareme/convert-csv-dataset-to-db.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# This metadata dictionary contains only code and sqltype so that processing will be faster
# It also includes the subjectcode
def createMetadataDictionary(CDEsMetadataPath):
CDEsMetadata = open(CDEsMetadataPath)
CDEsMetadata = open(CDEsMetadataPath, "r", encoding="utf-8")
metadataJSON = json.load(CDEsMetadata)

metadataDictionary = {}
Expand All @@ -44,7 +44,7 @@ def addGroupVariablesToDictionary(groupMetadata, metadataDictionary):

# This metadata list is used to create the metadata table. It contains all the known information for each variable.
def createMetadataList(CDEsMetadataPath):
CDEsMetadata = open(CDEsMetadataPath)
CDEsMetadata = open(CDEsMetadataPath, "r", encoding="utf-8")
metadataJSON = json.load(CDEsMetadata)

metadataList = []
Expand Down Expand Up @@ -80,7 +80,7 @@ def addGroupVariablesToList(groupMetadata, metadataList):
if 'enumerations' in variable:
enumerations = []
for enumeration in variable['enumerations']:
enumerations.append(unicode(enumeration['code']))
enumerations.append(str(enumeration['code']))
variableDictionary['enumerations'] = ','.join(enumerations)
else:
variableDictionary['enumerations'] = None
Expand Down Expand Up @@ -178,7 +178,7 @@ def createDataTable(metadataDictionary, cur):

def addCSVInTheDataTable(csvFilePath, metadataDictionary, cur):
# Open the csv
csvFile = open(csvFilePath, 'r')
csvFile = open(csvFilePath, "r", encoding="utf-8")
csvReader = csv.reader(csvFile)

# Create the csv INSERT statement
Expand Down Expand Up @@ -267,14 +267,19 @@ def main():
parser = ArgumentParser()
parser.add_argument('-f', '--pathologiesFolderPath', required=True,
help='The folder with the pathologies data.')
parser.add_argument('-t', '--nodeType', required=True,
help='Is this a master or a worker node?'
parser.add_argument('-p', '--pathologies', required=False,
help='Specific pathologies to parse. (Example: "dementia,tbi"'
)
args = parser.parse_args()
pathologiesFolderPath = os.path.abspath(args.pathologiesFolderPath)

# Get all pathologies
pathologiesList = next(os.walk(pathologiesFolderPath))[1]

if args.pathologies != None:
pathologiesToConvert = args.pathologies.split(",")
pathologiesList = list(set(pathologiesList) & set(pathologiesToConvert))
print ("Converting csvs for pathologies: " + ",".join(pathologiesList))

# Create the datasets db for each pathology
for pathologyName in pathologiesList:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ services:
- NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED
- ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD
- LOG_LEVEL=INFO # INFO / DEBUG
- CONVERT_CSVS=TRUE # TRUE / FALSE
depends_on:
- exareme-keystore
deploy:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ services:
- NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED
- ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD
- LOG_LEVEL=INFO # INFO / DEBUG
- CONVERT_CSVS=TRUE # TRUE / FALSE
deploy:
placement:
constraints:
Expand Down
1 change: 1 addition & 0 deletions Local-Deployment/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ services:
- NODE_COMMUNICATION_TIMEOUT=30000 # (MILIS) NODE COMMUNICATION WILL DROP IF TIMEOUT IS PASSED
- ENVIRONMENT_TYPE=PROD # TEST / DEV / PROD
- LOG_LEVEL=INFO # INFO / DEBUG
- CONVERT_CSVS=TRUE # TRUE / FALSE
depends_on:
- exareme_keystore
ports:
Expand Down

0 comments on commit 9b2b936

Please sign in to comment.