-
Notifications
You must be signed in to change notification settings - Fork 0
/
step4_actscoreDiff.py
42 lines (32 loc) · 1.62 KB
/
step4_actscoreDiff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
__author__ = "Junhee Yoon"
__version__ = "1.0.0"
__maintainer__ = "Junhee Yoon"
__email__ = "[email protected]"
"""
Description: Mimic of notebook code for pipeline work, please see step1 in Jun notebook archive
"""
import os
import pandas as pd
import numpy as np
import argparse
from libraries.metaHandler import metaExt
from sys import argv
from libraries.statFunction import StatHandler
parser = argparse.ArgumentParser(prog='step4_actscoreDiff.py')
# Input data
parser.add_argument('-t','--type', dest='resultType', default='RR,CIS',\
help='Result type, ex: long, healthy, "RR,CIS" ')
args = parser.parse_args()
# Copy of OpenKbcMSToolkit.py
if __name__ == "__main__":
SharedFilePath = os.environ['efspoint'] # Main data path here, goes to EFS volume
metaName = os.environ['metafile'] # EPIC_HCvB_metadata_baseline_updated-share.csv
msigFile = os.environ['msigDBPATH'] # msigdb.v7.4.entrez.gmt
step1Input = os.environ['startFile'] # counts_vst_CD4.csv
inputFile = SharedFilePath+os.path.basename(step1Input).replace('.csv', '.step3.csv') # replace to step3 input
df = pd.read_csv(inputFile, engine='c', index_col=0).T.dropna() # Activation Score
meta_data = pd.read_csv(SharedFilePath+metaName) # Meta data
longDD_samples, shortDD_samples = metaExt._LoadDiseaseDuration(df, meta_data, args.resultType)
ranksumSig = StatHandler.calculate_ranksum(df, shortDD_samples, longDD_samples) # get ranksum result
outputFile = SharedFilePath+os.path.basename(step1Input).replace('.csv', '.step4.csv') # replace to step4 output
df.loc[ranksumSig["Names"].values.tolist()].to_csv(outputFile) # Writing