From 732ac4d861e5e0c7396174827a87df2539624277 Mon Sep 17 00:00:00 2001
From: Robrecht Cannoodt <rcannood@gmail.com>
Date: Tue, 30 Jul 2024 01:10:35 +0200
Subject: [PATCH] output separate files

---
 .../config.vsh.yaml                           | 347 +++++++++++++++++-
 .../bd_rhapsody_sequence_analysis/script.py   | 140 +++----
 .../bd_rhapsody_sequence_analysis/test.sh     |  34 +-
 3 files changed, 431 insertions(+), 90 deletions(-)

diff --git a/src/bd_rhapsody/bd_rhapsody_sequence_analysis/config.vsh.yaml b/src/bd_rhapsody/bd_rhapsody_sequence_analysis/config.vsh.yaml
index 91146286..fd17476a 100644
--- a/src/bd_rhapsody/bd_rhapsody_sequence_analysis/config.vsh.yaml
+++ b/src/bd_rhapsody/bd_rhapsody_sequence_analysis/config.vsh.yaml
@@ -116,20 +116,317 @@ argument_groups:
         info:
           config_key: Supplemental_Reference
   - name: Outputs
+    description: Outputs for all pipeline runs
+    # based on https://bd-rhapsody-bioinfo-docs.genomics.bd.com/outputs/top_outputs.html
     arguments:
-      # TODO: split up into different outputs
-      # - ${run_name}_Bioproduct_Stats.csv
-      # - ${run_name}_Metrics_Summary.csv
-      # - ${run_name}_RSEC_MolsPerCell_Unfiltered_MEX.zip
-      # - ${run_name}_Logs/
-      # TODO: determine additional outputs for ABC, ATAC, VDJ, SMK
-      - name: "--output"
+      - name: "--output_dir"
         type: file
         direction: output
         alternatives: [-o]
-        description: "Output folder. Output still needs to be processed further."
+        description: "The unprocessed output directory containing all the outputs from the pipeline."
         required: true
         example: output_dir/
+      - name: "--output_seurat"
+        type: file
+        direction: output
+        description: "Single-cell analysis tool inputs. Seurat (.rds) input file containing RSEC molecules data table and all cell annotation metadata."
+        example: output_seurat.rds
+        required: false
+        info:
+          template: "[sample_name]_Seurat.rds"
+      - name: "--output_mudata"
+        type: file
+        direction: output
+        description: "Single-cell analysis tool inputs. Scanpy / Muon input file containing RSEC molecules data table and all cell annotation metadata."
+        example: output_mudata.h5mu
+        required: false
+        info:
+          template: "[sample_name].h5mu"
+      - name: "--metrics_summary"
+        type: file
+        direction: output
+        description: "Metrics Summary. Report containing sequencing, molecules, and cell metrics."
+        example: metrics_summary.csv
+        required: false
+        info:
+          template: "[sample_name]_Metrics_Summary.csv"
+      - name: "--pipeline_report"
+        type: file
+        direction: output
+        description: "Pipeline Report. Summary report containing the results from the sequencing analysis pipeline run."
+        example: pipeline_report.html
+        required: false
+        info:
+          template: "[sample_name]_Pipeline_Report.html"
+      - name: "--rsec_mols_per_cell"
+        type: file
+        direction: output
+        description: "Molecules per bioproduct per cell bassed on RSEC"
+        example: RSEC_MolsPerCell_MEX.zip 
+        required: false
+        info:
+          template: "[sample_name]_RSEC_MolsPerCell_MEX.zip"
+      - name: "--dbec_mols_per_cell"
+        type: file
+        direction: output
+        description: "Molecules per bioproduct per cell bassed on DBEC. DBEC data table is only output if the experiment includes targeted mRNA or AbSeq bioproducts."
+        example: DBEC_MolsPerCell_MEX.zip 
+        required: false
+        info:
+          template: "[sample_name]_DBEC_MolsPerCell_MEX.zip"
+      - name: "--rsec_mols_per_cell_unfiltered"
+        type: file
+        direction: output
+        description: "Unfiltered tables containing all cell labels with ≥10 reads."
+        example: RSEC_MolsPerCell_Unfiltered_MEX.zip 
+        required: false
+        info:
+          template: "[sample_name]_RSEC_MolsPerCell_Unfiltered_MEX.zip"
+      - name: "--bam"
+        type: file
+        direction: output
+        description: "Alignment file of R2 with associated R1 annotations for Bioproduct."
+        example: BioProduct.bam
+        required: false
+        info:
+          template: "[sample_name]_Bioproduct.bam"
+      - name: "--bam_inddex"
+        type: file
+        direction: output
+        description: "Index file for the alignment file."
+        example: BioProduct.bam.bai
+        required: false
+        info:
+          template: "[sample_name]_Bioproduct.bam.bai"
+      - name: "--bioproduct_stats"
+        type: file
+        direction: output
+        description: "Bioproduct Stats. Metrics from RSEC and DBEC Unique Molecular Identifier adjustment algorithms on a per-bioproduct basis."
+        example: Bioproduct_Stats.csv
+        required: false
+        info:
+          template: "[sample_name]_Bioproduct_Stats.csv"
+      - name: "--dimred_tsne"
+        type: file
+        direction: output
+        description: "t-SNE dimensionality reduction coordinates per cell index"
+        example: tSNE_coordinates.csv
+        required: false
+        info:
+          template: "[sample_name]_(assay)_tSNE_coordinates.csv"
+      - name: "--dimred_umap"
+        type: file
+        direction: output
+        description: "UMAP dimensionality reduction coordinates per cell index"
+        example: UMAP_coordinates.csv
+        required: false
+        info:
+          template: "[sample_name]_(assay)_UMAP_coordinates.csv"
+      - name: "--immune_cell_classification"
+        type: file
+        direction: output
+        description: "Immune Cell Classification. Cell type classification based on the expression of immune cell markers."
+        example: Immune_Cell_Classification.csv
+        required: false
+        info:
+          template: "[sample_name]_(assay)_cell_type_experimental.csv"
+  - name: Multiplex outputs
+    description: Outputs when multiplex option is selected
+    arguments:
+      - name: "--sample_tag_metrics"
+        type: file
+        direction: output
+        description: "Sample Tag Metrics. Metrics from the sample determination algorithm."
+        example: Sample_Tag_Metrics.csv
+        required: false
+        info:
+          template: "[sample_name]_Sample_Tag_Metrics.csv"
+      - name: "--sample_tag_calls"
+        type: file
+        direction: output
+        description: "Sample Tag Calls. Assigned Sample Tag for each putative cell"
+        example: Sample_Tag_Calls.csv
+        required: false
+        info:
+          template: "[sample_name]_Sample_Tag_Calls.csv"
+      - name: "--sample_tag_counts"
+        type: file
+        direction: output
+        description: "Sample Tag Counts. Separate data tables and metric summary for cells assigned to each sample tag. Note: For putative cells that could not be assigned a specific Sample Tag, a Multiplet_and_Undetermined.zip file is also output."
+        example: Sample_Tag1.zip
+        required: false
+        multiple: true
+        info:
+          template: "[sample_name]_Sample_Tag[number].zip"
+      - name: "--sample_tag_counts_unassigned"
+        type: file
+        direction: output
+        description: "Sample Tag Counts Unassigned. Data table and metric summary for cells that could not be assigned a specific Sample Tag."
+        example: Multiplet_and_Undetermined.zip
+        required: false
+        info:
+          template: "[sample_name]_Multiplet_and_Undetermined.zip"
+  - name: VDJ Outputs
+    description: Outputs when VDJ option selected
+    arguments:
+      - name: "--vdj_metrics"
+        type: file
+        direction: output
+        description: "VDJ Metrics. Overall metrics from the VDJ analysis."
+        example: VDJ_Metrics.csv
+        required: false
+        info:
+          template: "[sample_name]_VDJ_Metrics.csv"
+      - name: "--vdj_per_cell"
+        type: file
+        direction: output
+        description: "VDJ Per Cell. Cell specific read and molecule counts, VDJ gene segments, CDR3 sequences, paired chains, and cell type."
+        example: VDJ_perCell.csv
+        required: false
+        info:
+          template: "[sample_name]_VDJ_perCell.csv"
+      - name: "--vdj_per_cell_uncorrected"
+        type: file
+        direction: output
+        description: "VDJ Per Cell Uncorrected. Cell specific read and molecule counts, VDJ gene segments, CDR3 sequences, paired chains, and cell type."
+        example: VDJ_perCell_uncorrected.csv
+        required: false
+        info:
+          template: "[sample_name]_VDJ_perCell_uncorrected.csv"
+      - name: "--vdj_dominant_contigs"
+        type: file
+        direction: output
+        description: "VDJ Dominant Contigs. Dominant contig for each cell label chain type combination (putative cells only)."
+        example: VDJ_Dominant_Contigs_AIRR.csv
+        required: false
+        info:
+          template: "[sample_name]_VDJ_Dominant_Contigs_AIRR.csv"
+      - name: "--vdj_unfiltered_contigs"
+        type: file
+        direction: output
+        description: "VDJ Unfiltered Contigs. All contigs that were assembled and annotated successfully (all cells)."
+        example: VDJ_Unfiltered_Contigs_AIRR.csv
+        required: false
+        info:
+          template: "[sample_name]_VDJ_Unfiltered_Contigs_AIRR.csv"
+  - name: "ATAC-Seq outputs"
+    description: Outputs when ATAC-Seq option selected
+    arguments:
+      - name: "--atac_metrics"
+        type: file
+        direction: output
+        description: "ATAC Metrics. Overall metrics from the ATAC-Seq analysis."
+        example: ATAC_Metrics.csv
+        required: false
+        info:
+          template: "[sample_name]_ATAC_Metrics.csv"
+      - name: "--atac_metrics_json"
+        type: file
+        direction: output
+        description: "ATAC Metrics JSON. Overall metrics from the ATAC-Seq analysis in JSON format."
+        example: ATAC_Metrics.json
+        required: false
+        info:
+          template: "[sample_name]_ATAC_Metrics.json"
+      - name: "--atac_fragments"
+        type: file
+        direction: output
+        description: "ATAC Fragments. Chromosomal location, cell index, and read support for each fragment detected"
+        example: ATAC_Fragments.bed.gz
+        required: false
+        info:
+          template: "[sample_name]_ATAC_Fragments.bed.gz"
+      - name: "--atac_fragments_index"
+        type: file
+        direction: output
+        description: "Index of ATAC Fragments."
+        example: ATAC_Fragments.bed.gz.tbi
+        required: false
+        info:
+          template: "[sample_name]_ATAC_Fragments.bed.gz.tbi"
+      - name: "--atac_transposase_sites"
+        type: file
+        direction: output
+        description: "ATAC Transposase Sites. Chromosomal location, cell index, and read support for each transposase site detected"
+        example: ATAC_Transposase_Sites.bed.gz
+        required: false
+        info:
+          template: "[sample_name]_ATAC_Transposase_Sites.bed.gz"
+      - name: "--atac_transposase_sites_index"
+        type: file
+        direction: output
+        description: "Index of ATAC Transposase Sites."
+        example: ATAC_Transposase_Sites.bed.gz.tbi
+        required: false
+        info:
+          template: "[sample_name]_ATAC_Transposase_Sites.bed.gz.tbi"
+      - name: "--atac_peaks"
+        type: file
+        direction: output
+        description: "ATAC Peaks. Peak regions of transposase activity"
+        example: ATAC_Peaks.bed.gz
+        required: false
+        info:
+          template: "[sample_name]_ATAC_Peaks.bed.gz"
+      - name: "--atac_peaks_index"
+        type: file
+        direction: output
+        description: "Index of ATAC Peaks."
+        example: ATAC_Peaks.bed.gz.tbi
+        required: false
+        info:
+          template: "[sample_name]_ATAC_Peaks.bed.gz.tbi"
+      - name: "--atac_peak_annotation"
+        type: file
+        direction: output
+        description: "ATAC Peak Annotation. Estimated annotation of peak-to-gene connections"
+        example: peak_annotation.tsv.gz
+        required: false
+        info:
+          template: "[sample_name]_peak_annotation.tsv.gz"
+      - name: "--atac_cell_by_peak"
+        type: file
+        direction: output
+        description: "ATAC Cell by Peak. Peak regions of transposase activity per cell"
+        example: ATAC_Cell_by_Peak_MEX.zip
+        required: false
+        info:
+          template: "[sample_name]_ATAC_Cell_by_Peak_MEX.zip"
+      - name: "--atac_cell_by_peak_unfiltered"
+        type: file
+        direction: output
+        description: "ATAC Cell by Peak Unfiltered. Unfiltered file containing all cell labels with >=1 transposase sites in peaks."
+        example: ATAC_Cell_by_Peak_Unfiltered_MEX.zip
+        required: false
+        info:
+          template: "[sample_name]_ATAC_Cell_by_Peak_Unfiltered_MEX.zip"
+      - name: "--atac_bam"
+        type: file
+        direction: output
+        description: "ATAC BAM. Alignment file for R1 and R2 with associated I2 annotations for ATAC-Seq. Only output if the BAM generation flag is set to true."
+        example: ATAC.bam
+        required: false
+        info:
+          template: "[sample_name]_ATAC.bam"
+      - name: "--atac_bam_index"
+        type: file
+        direction: output
+        description: "Index of ATAC BAM."
+        example: ATAC.bam.bai
+        required: false
+        info:
+          template: "[sample_name]_ATAC.bam.bai"
+  - name: AbSeq Cell Calling outputs
+    description: Outputs when Cell Calling Abseq is selected
+    arguments:
+      - name: "--protein_aggregates_experimental"
+        type: file
+        direction: output
+        description: "Protein Aggregates Experimental"
+        example: Protein_Aggregates_Experimental.csv
+        required: false
+        info:
+          template: "[sample_name]_Protein_Aggregates_Experimental.csv"
   - name: Putative Cell Calling Settings
     arguments:
       - name: "--cell_calling_data"
@@ -298,9 +595,37 @@ argument_groups:
       - name: "--timestamps"
         type: boolean_true
         description: "Add timestamps to the errors, warnings, and notifications."
-      - name: "--dryrun"
-        type: boolean_true
-        description: "If true, the output directory will only contain the CWL input files, but the pipeline itself will not be executed."
+  - name: Undocumented arguments
+    arguments:
+      - name: --abseq_umi
+        type: integer
+        multiple: false
+        info:
+          config_key: AbSeq_UMI
+      - name: --target_analysis
+        type: boolean
+        multiple: false
+        info:
+          config_key: Target_analysis
+      - name: --vdj_jgene_evalue
+        type: double
+        description: |
+          e-value threshold for J gene. The e-value threshold for J gene call by IgBlast/PyIR, default is set as 0.001
+        multiple: false
+        info:
+          config_key: VDJ_JGene_Evalue
+      - name: --vdj_vgene_evalue
+        type: double
+        description: |
+          e-value threshold for V gene. The e-value threshold for V gene call by IgBlast/PyIR, default is set as 0.001
+        multiple: false
+        info:
+          config_key: VDJ_VGene_Evalue
+      - name: --write_filtered_reads
+        type: boolean
+        multiple: false
+        info:
+          config_key: Write_Filtered_Reads
 resources:
   - type: python_script
     path: script.py
diff --git a/src/bd_rhapsody/bd_rhapsody_sequence_analysis/script.py b/src/bd_rhapsody/bd_rhapsody_sequence_analysis/script.py
index 71eb4bc5..416cabb0 100644
--- a/src/bd_rhapsody/bd_rhapsody_sequence_analysis/script.py
+++ b/src/bd_rhapsody/bd_rhapsody_sequence_analysis/script.py
@@ -4,6 +4,8 @@
 import tempfile
 from typing import Any
 import yaml
+import shutil
+import glob
 
 ## VIASH START
 par = {
@@ -60,19 +62,20 @@ def read_config(path: str) -> dict[str, Any]:
     
     return config
 
-def logger(msg: str):
-    print(msg, flush=True)
-
 def strip_margin(text: str) -> str:
     return re.sub('(\n?)[ \t]*\|', '\\1', text)
 
-def process_params(par: dict[str, Any], config) -> str:
+def process_params(par: dict[str, Any], config, temp_dir: str) -> str:
     # check input parameters
     assert par["reads"] or par["reads_atac"], "Pass at least one set of inputs to --reads or --reads_atac."
 
+    # output to temp dir if output_dir was not passed
+    if not par["output_dir"]:
+        par["output_dir"] = os.path.join(temp_dir, "output")
+
     # checking sample prefix
     if par["run_name"] and re.match("[^A-Za-z0-9]", par["run_name"]):
-        logger("--run_name should only consist of letters, numbers or hyphens. Replacing all '[^A-Za-z0-9]' with '-'.")
+        print("--run_name should only consist of letters, numbers or hyphens. Replacing all '[^A-Za-z0-9]' with '-'.", flush=True)
         par["run_name"] = re.sub("[^A-Za-z0-9\\-]", "-", par["run_name"])
 
     # make paths absolute
@@ -121,13 +124,20 @@ def generate_config(par: dict[str, Any], config) -> str:
     ## Write config to file
     return ''.join(content_list)
 
-def generate_cwl_file(par: dict[str, Any], meta: dict[str, Any]) -> str:
+def generate_config_file(par: dict[str, Any], config: dict[str, Any], temp_dir: str) -> str:
+    config_file = os.path.join(temp_dir, "config.yml")
+    config_content = generate_config(par, config)
+    with open(config_file, "w") as f:
+        f.write(config_content)
+    return config_file
+
+def generate_cwl_file(meta: dict[str, Any], dir: str) -> str:
     # create cwl file (if need be)
     orig_cwl_file=os.path.join(meta["resources_dir"], "rhapsody_pipeline_2.2.1_nodocker.cwl")
 
     # Inject computational requirements into pipeline
     if meta["memory_mb"] or meta["cpus"]:
-        cwl_file = os.path.join(par["output"], "pipeline.cwl")
+        cwl_file = os.path.join(dir, "pipeline.cwl")
 
         # Read in the file
         with open(orig_cwl_file, 'r') as file :
@@ -148,23 +158,43 @@ def generate_cwl_file(par: dict[str, Any], meta: dict[str, Any]) -> str:
 
     return cwl_file
 
-def main(par: dict[str, Any], meta: dict[str, Any]):
+def copy_outputs(par: dict[str, Any], config: dict[str, Any]):
+    for arg in config["arguments"]:
+        par_value = par[arg["clean_name"]]
+        if par_value and arg["type"] == "file" and par["direction"] == "output":
+            # example template: '[sample_name]_(assay)_cell_type_experimental.csv'
+            template = arg.get("info", {}).get("template")
+            if template:
+                template_glob = template\
+                    .replace("[sample_name]", par["run_name"])\
+                    .replace("(assay)", "*")\
+                    .replace("[number]", "*")
+                files = glob.glob(os.path.join(par["output_dir"], template_glob))
+                if len(files) == 0 and arg["required"]:
+                    raise ValueError(f"Expected output file '{template_glob}' not found.")
+                elif len(files) > 1 and not arg["multiple"]:
+                    raise ValueError(f"Expected single output file '{template_glob}', but found multiple.")
+                
+                if not arg["multiple"]:
+                    shutil.copy(files[0], par_value)
+                else:
+                    # replace '*' in par_value with index
+                    for i, file in enumerate(files):
+                        shutil.copy(file, par_value.replace("*", str(i)))
+
+
+def main(par: dict[str, Any], meta: dict[str, Any], temp_dir: str):
     config = read_config(meta["config"])
-        
+    
     # Preprocess params
-    par = process_params(par, config)
-
-    # Create output dir if not exists
-    if not os.path.exists(par["output"]):
-        os.makedirs(par["output"])
+    par = process_params(par, config, temp_dir)
 
     ## Process parameters
     cmd = [
         "cwl-runner",
         "--no-container",
         "--preserve-entire-environment",
-        "--outdir",
-        par["output"]
+        "--outdir", par["output_dir"],
     ]
 
     if par["parallel"]:
@@ -174,56 +204,32 @@ def main(par: dict[str, Any], meta: dict[str, Any]):
         cmd.append("--timestamps")
 
     # Create cwl file (if need be)
-    cwl_file = generate_cwl_file(par, meta)
-
-    ## Run pipeline
-    if not par["dryrun"]:
-        with tempfile.TemporaryDirectory(prefix="cwl-bd_rhapsody_wta-", dir=meta["temp_dir"]) as temp_dir:
-            # Create params file
-            config_file = os.path.join(temp_dir, "config.yml")
-            config_content = generate_config(par, config)
-            with open(config_file, "w") as f:
-                f.write(config_content)
-            
-            # add cwl and params file to the cmd
-            cmd.extend([cwl_file, config_file])
-            
-            # keep environment variables but set TMPDIR to temp_dir
-            env = dict(os.environ)
-            env["TMPDIR"] = temp_dir
-
-            logger("> " + ' '.join(cmd))
-            _ = subprocess.check_call(
-                cmd,
-                cwd=os.path.dirname(config_file),
-                env=env
-            )
-
-    # maybe this won't be necessary anymore
-    # # extracting feature ids from references
-    # # extract info from reference files (while they still exist)
-    # feature_df = extract_feature_types(par)
-    # feature_types_file = os.path.join(par["output"], "feature_types.tsv")
-    # feature_df.to_csv(feature_types_file, sep="\t", index=False)
-
-    # if not par["dryrun"]:
-    #     # look for counts file
-    #     if not par["run_name"]:
-    #         par["run_name"] = "sample"
-    #     counts_filename = par["run_name"] + "_RSEC_MolsPerCell.csv"
-        
-    #     if par["sample_tags_version"]:
-    #         counts_filename = "Combined_" + counts_filename
-    #     counts_file = os.path.join(par["output"], counts_filename)
-        
-    #     if not os.path.exists(counts_file):
-    #         raise ValueError(f"Could not find output counts file '{counts_filename}'")
-
-    #     # look for metrics file
-    #     metrics_filename = par["run_name"] + "_Metrics_Summary.csv"
-    #     metrics_file = os.path.join(par["output"], metrics_filename)
-    #     if not os.path.exists(metrics_file):
-    #         raise ValueError(f"Could not find output metrics file '{metrics_filename}'")
+    cwl_file = generate_cwl_file(meta, temp_dir)
+    cmd.append(cwl_file)
+
+    # Create params file
+    config_file = generate_config_file(par, config, temp_dir)
+    cmd.append(config_file)
+    
+    # keep environment variables but set TMPDIR to temp_dir
+    env = dict(os.environ)
+    env["TMPDIR"] = temp_dir
+
+    # Create output dir if not exists
+    if not os.path.exists(par["output_dir"]):
+        os.makedirs(par["output_dir"])
+
+    # Run command
+    print("> " + ' '.join(cmd), flush=True)
+    _ = subprocess.check_call(
+        cmd,
+        cwd=os.path.dirname(config_file),
+        env=env
+    )
+
+    # Copy outputs
+    copy_outputs(par, config)
 
 if __name__ == "__main__":
-    main(par, meta)
+    with tempfile.TemporaryDirectory(prefix="cwl-bd_rhapsody-", dir=meta["temp_dir"]) as temp_dir:
+        main(par, meta, temp_dir)
diff --git a/src/bd_rhapsody/bd_rhapsody_sequence_analysis/test.sh b/src/bd_rhapsody/bd_rhapsody_sequence_analysis/test.sh
index 4cfc95a6..c23fda86 100644
--- a/src/bd_rhapsody/bd_rhapsody_sequence_analysis/test.sh
+++ b/src/bd_rhapsody/bd_rhapsody_sequence_analysis/test.sh
@@ -166,26 +166,36 @@ EOF
 
 #########################################################################################
 
+
+  # --reads ABCreads_R1.fq.gz \
+  # --reads ABCreads_R2.fq.gz \
+  # --abseq_reference bdabseq_smallpanel.fasta \
+
+  # --reads SMKreads_R1.fq.gz \
+  # --reads SMKreads_R2.fq.gz \
+  # --tag_names 1-Jurkat \
+  # --tag_names 2-Ramos \
+  # --tag_names 3-THP1 \
+
+
+  # --cell_calling_data mRNA \
+  # --target_analysis true \
+  # --write_filtered_reads true
+  # --expected_cell_count 2 \
+
 echo ">> Run $meta_name"
 "$meta_executable" \
   --reads WTAreads_R1.fq.gz \
   --reads WTAreads_R2.fq.gz \
-  --reads ABCreads_R1.fq.gz \
-  --reads ABCreads_R2.fq.gz \
-  --reads SMKreads_R1.fq.gz \
-  --reads SMKreads_R2.fq.gz \
   --reference_archive "$REFERENCE_FILE" \
-  --abseq_reference bdabseq_smallpanel.fasta \
-  --output output \
+  --output_dir output \
   ${meta_cpus:+---cpus $meta_cpus} \
   ${meta_memory_mb:+---memory ${meta_memory_mb}MB} \
-  --cell_calling_data mRNA \
+  --reads ABCreads_R1.fq.gz \
+  --reads ABCreads_R2.fq.gz \
+  --abseq_reference bdabseq_smallpanel.fasta \
   --exact_cell_count 2 \
-  --expected_cell_count 2 \
-  --exclude_intronic_reads false \
-  --tag_names 1-Jurkat \
-  --tag_names 2-Ramos \
-  --tag_names 3-THP1
+  --exclude_intronic_reads false
 
 # echo ">> Check if output exists"
 # assert_file_exists "output.bam"