Skip to content

Commit

Permalink
Added one input tabular file input for PS report
Browse files Browse the repository at this point in the history
  • Loading branch information
Maux82 committed Oct 23, 2018
1 parent 4011359 commit 6d4b55e
Showing 1 changed file with 34 additions and 3 deletions.
37 changes: 34 additions & 3 deletions moff_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import ast
import configparser
import gc
import json
import logging.config
import multiprocessing
Expand Down Expand Up @@ -178,6 +179,38 @@
os.makedirs(args.loc_out)
log.critical("created output folder %r", args.loc_out)

config = configparser.RawConfigParser()
config.read(os.path.join(os.path.dirname(
os.path.realpath(sys.argv[0])), 'moff_setting.properties'))

# just for Galaxy input is possible to use one big input file and a list of raw file.
# the big file must have the result of each raw file and the columns 'Spectrum File' should be availabe
# This option work only with PS report using only --tsv_list and --raw_list
if ( args.tsv_list is not None) and ( args.raw_list is not None) and (len(args.tsv_list)==1) :
data_temp= pd.read_csv(args.tsv_list[0],sep="\t")
if moff.check_ps_input_data(data_temp.columns.tolist(), ast.literal_eval(config.get('moFF', 'ps_default_export_v1'))) == 1:
# split the data input file only if inave more than ONE raw file and tha input file contain identification for more the ONE run
if len(data_temp['Spectrum File'].unique())> 1 and len(args.raw_list) > 1:

output_list_loc=[]
for file in data_temp['Spectrum File'].unique():
data_temp[data_temp['Spectrum File']== file].to_csv(os.path.join(os.path.split(args.tsv_list[0])[0],file.split('.')[0]+ '.txt')
, sep='\t' , index=False )
output_list_loc.append(os.path.join(os.path.split(args.tsv_list[0])[0],file.split('.')[0]+ '.txt') )

if len(args.raw_list) != len(output_list_loc):
exit('-- Number of raw file is different to the number of input sources detectd in your one input file --')
#sort them to be sure about the association between input - raw file
args.raw_list= sorted(args.raw_list)
args.tsv_list= sorted(output_list_loc)
#clean dataset thta I don use anymore
del data_temp
gc.collect()



##---

# fixed variable number of split and also number of CPU presence in the macine
# change this variable with repset to the machine setting of the user

Expand Down Expand Up @@ -258,9 +291,7 @@
loc_raw = args.raw_repo if not None else raw_list
loc_output = args.loc_out

config = configparser.RawConfigParser()
config.read(os.path.join(os.path.dirname(
os.path.realpath(sys.argv[0])), 'moff_setting.properties'))

df = pd.read_csv(file_name, sep="\t")
# add same safety checks len > 1
# Flag for pride pipeline, or to set from second to minute as input rt time scale
Expand Down

0 comments on commit 6d4b55e

Please sign in to comment.