From 6d4b55ee8ec71456900a6a9fc3540baf0aa9d745 Mon Sep 17 00:00:00 2001
From: Maux82 <a.argentini@gmail.com>
Date: Tue, 23 Oct 2018 15:44:44 +0200
Subject: [PATCH] Added one input tabular file input for PS report

---
 moff_all.py | 37 ++++++++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/moff_all.py b/moff_all.py
index 0640c59..4737490 100755
--- a/moff_all.py
+++ b/moff_all.py
@@ -3,6 +3,7 @@
 import argparse
 import ast
 import configparser
+import gc
 import json
 import logging.config
 import multiprocessing
@@ -178,6 +179,38 @@
             os.makedirs(args.loc_out)
             log.critical("created output folder  %r", args.loc_out)
 
+    config = configparser.RawConfigParser()
+    config.read(os.path.join(os.path.dirname(
+        os.path.realpath(sys.argv[0])), 'moff_setting.properties'))
+
+    # just for Galaxy input is possible to use one big input file and a list of raw file.
+    # the big file must have the result of each raw file and the columns 'Spectrum File' should be availabe
+    # This option work only with PS report using only --tsv_list and --raw_list
+    if  ( args.tsv_list is not None) and  ( args.raw_list is not None) and (len(args.tsv_list)==1)  :
+        data_temp= pd.read_csv(args.tsv_list[0],sep="\t")
+        if moff.check_ps_input_data(data_temp.columns.tolist(), ast.literal_eval(config.get('moFF', 'ps_default_export_v1'))) == 1:
+            # split the data input file only if inave more than ONE raw file and tha input file contain identification for more the ONE run
+            if  len(data_temp['Spectrum File'].unique())> 1 and len(args.raw_list) > 1:
+
+                output_list_loc=[]
+                for file in data_temp['Spectrum File'].unique():
+                    data_temp[data_temp['Spectrum File']== file].to_csv(os.path.join(os.path.split(args.tsv_list[0])[0],file.split('.')[0]+ '.txt')
+                                                                        , sep='\t' , index=False )
+                    output_list_loc.append(os.path.join(os.path.split(args.tsv_list[0])[0],file.split('.')[0]+ '.txt') )
+
+                if len(args.raw_list) != len(output_list_loc):
+                    exit('-- Number of raw file is different to the number of input sources detectd in your one input file --')
+                #sort them to be sure about the association between input - raw file
+                args.raw_list= sorted(args.raw_list)
+                args.tsv_list= sorted(output_list_loc)
+                #clean dataset thta I don use anymore
+                del data_temp
+                gc.collect()
+
+
+
+    ##---
+
     # fixed variable number of split and also number of CPU presence in the macine
     # change this variable  with repset to the machine setting of the user
 
@@ -258,9 +291,7 @@
         loc_raw = args.raw_repo if not None else raw_list
         loc_output = args.loc_out
 
-        config = configparser.RawConfigParser()
-        config.read(os.path.join(os.path.dirname(
-            os.path.realpath(sys.argv[0])), 'moff_setting.properties'))
+
         df = pd.read_csv(file_name, sep="\t")
         # add same safety checks len > 1
         # Flag for pride pipeline, or to set from second to minute as input rt time scale