Fix issue #57

metagenopolis · Oct 31, 2024 · 3a783c6 · 3a783c6
1 parent b1df6e0
commit 3a783c6
Show file tree

Hide file tree

Showing 16 changed files with 100 additions and 64 deletions.
diff --git a/meteor/counter.py b/meteor/counter.py
@@ -121,12 +121,17 @@ def get_aligned_nucleotides(self, element) -> Iterator[int]:
         """
         yield from (item[1] for item in element.cigartuples if item[0] < 3)
 
-    def set_counter_config(self, counted_reads):
-        """Define the count of reads"""
+    def set_counter_config(self, counted_reads: float, count_file: Path) -> dict:
+        """Save in the json essential info
+        :param counted_read: (float) Number of reads counted
+        :param count_file: (Path) Count file
+        :return: (Dict) dictionnary data
+        """
         return {
             "counting": {
-                "counted_reads": counted_reads,
+                "counted_reads": int(round(counted_reads, 0)),
                 "identity_threshold": round(self.identity_threshold, 2),
+                "count_file": count_file.name,
             }
         }
 
@@ -412,6 +417,7 @@ def launch_counting(
         count_file: Path,
         ref_json: dict,
         census_json: dict,
+        Stage1Json: Path,
     ):
         """Function that count reads from a cram file, using the given methods in count:
         "total" or "shared" or "unique".
@@ -493,13 +499,9 @@ def launch_counting(
                     catch_stdout=False,
                 )
                 total_read_count = self.write_table(cramfile_sorted, count_file)
-            config = self.set_counter_config(total_read_count)
-            Stage1Json = (
-                self.meteor.mapping_dir
-                / f"{census_json['sample_info']['sample_name']}_census_stage_1.json"
-            )
-
-            self.save_config(census_json.update(config), Stage1Json)
+            config = self.set_counter_config(total_read_count, count_file)
+            census_json.update(config)
+            self.save_config(census_json, Stage1Json)
             if self.keep_filtered_alignments:
                 cramfile_strain_unsorted = Path(mkstemp(dir=self.meteor.tmp_dir)[1])
                 self.save_cram_strain(
@@ -556,7 +558,6 @@ def execute(self) -> None:
 
             #  mapping of each sample against reference
             for library in census_json_files:
-                print(library)
                 census_json = self.read_json(library)
                 sample_info = census_json["sample_info"]
                 stage1_dir = self.meteor.mapping_dir / sample_info["sample_name"]
@@ -596,8 +597,19 @@ def execute(self) -> None:
                     / f"{sample_info['sample_name']}.tsv.xz"
                 )
                 start = perf_counter()
+                Stage1Json = (
+                    self.meteor.mapping_dir
+                    / sample_info["sample_name"]
+                    / f"{sample_info['sample_name']}_census_stage_1.json"
+                )
+                census_json = self.read_json(Stage1Json)
                 self.launch_counting(
-                    raw_cram_file, cram_file, count_file, ref_json, census_json
+                    raw_cram_file,
+                    cram_file,
+                    count_file,
+                    ref_json,
+                    census_json,
+                    Stage1Json,
                 )
                 logging.info("Completed counting in %f seconds", perf_counter() - start)
                 if not self.keep_all_alignments:

diff --git a/meteor/fastqimporter.py b/meteor/fastqimporter.py
@@ -163,7 +163,7 @@ def execute(self) -> None:
             samples_names.add(sample_name)
             sample_dir = self.meteor.fastq_dir / sample_name
             sample_dir.mkdir(exist_ok=True, parents=True)
-            sym_fastq = Path(sample_dir / fastq_file.name)
+            sym_fastq = Path(sample_dir / fastq_file.name).resolve()
             if not sym_fastq.is_symlink():
                 sym_fastq.symlink_to(fastq_file.resolve())
             # Create a configuration

diff --git a/meteor/mapper.py b/meteor/mapper.py
@@ -62,10 +62,10 @@ def set_mapping_config(
         :param cram_file: A path to the raw cram file
         :return: (Dict) A dict object with the census 1 config
         """
+        del self.census["census"]["sample_info"]["full_sample_name"]
         config = {
             "meteor_version": self.meteor.version,
             "sample_info": self.census["census"]["sample_info"],
-            "sample_file": self.census["census"]["sample_file"],
             "mapping": {
                 "mapping_tool": "bowtie2",
                 "mapping_tool_version": bowtie_version,
@@ -81,10 +81,8 @@ def set_mapping_config(
                 "overall_alignment_rate": round(
                     (mapping_data[2] + mapping_data[3]) / mapping_data[0] * 100, 2
                 ),
-                "fastq_files": ",".join(self.fastq_list),
-            },
-            "mapping_file": {
-                "bowtie_file": cram_file.name,
+                "fastq_files": self.fastq_list,
+                "mapping_file": cram_file.name,
             },
         }
         return config
@@ -176,7 +174,6 @@ def execute(self) -> None:
                 mapping_log = findall(r"([0-9]+)\s+\(", mapping_result)
                 assert len(mapping_log) == 4
                 mapping_data = [int(i) for i in mapping_log]
-                print(mapping_data)
             except AssertionError:
                 logging.error("Could not access the mapping result from bowtie2")
                 sys.exit(1)

diff --git a/meteor/merging.py b/meteor/merging.py
@@ -263,9 +263,11 @@ def execute(self) -> None:
                 "trim",
                 "alignment_number",
                 "mapping_type",
-                "identity_threshold",
                 "database_type",
             ],
+            "counting": [
+                "identity_threshold",
+            ],
             "profiling_parameters": [""],
         }
         all_information = {
@@ -284,7 +286,7 @@ def execute(self) -> None:
             # Force to taxo in no consensus
             database_type = "taxo"
 
-        # Merge ini information
+        # Merge json information
         logging.info("Merging json information...")
         # Get all values from all fields from all sections from all json files
         all_information_to_save = {

diff --git a/meteor/tests/test_counter.py b/meteor/tests/test_counter.py
@@ -310,7 +310,7 @@ def test_launch_counting_unique(counter_unique: Counter, datadir: Path, tmp_path
         counter_unique.meteor.ref_dir / "mock_reference.json"
     )
     counter_unique.launch_counting(
-        raw_cramfile, cramfile, countfile, ref_json, census_json
+        raw_cramfile, cramfile, countfile, ref_json, census_json, census_json_file
     )
     with countfile.open("rb") as out:
         assert md5(out.read()).hexdigest() == "f5bc528dcbf594b5089ad7f6228ebab5"
@@ -326,7 +326,7 @@ def test_launch_counting_total(counter_total: Counter, datadir: Path, tmp_path:
         counter_total.meteor.ref_dir / "mock_reference.json"
     )
     counter_total.launch_counting(
-        raw_cramfile, cramfile, countfile, ref_json, census_json
+        raw_cramfile, cramfile, countfile, ref_json, census_json, census_json_file
     )
     with countfile.open("rb") as out:
         assert md5(out.read()).hexdigest() == "f010e4136323ac408d4c127e243756c2"
@@ -346,7 +346,7 @@ def test_launch_counting_smart_shared(
         counter_smart_shared.meteor.ref_dir / "mock_reference.json"
     )
     counter_smart_shared.launch_counting(
-        raw_cramfile, cramfile, countfile, ref_json, census_json
+        raw_cramfile, cramfile, countfile, ref_json, census_json, census_json_file
     )
     # with countfile.open("rb") as out:
     #     assert md5(out.read()).hexdigest() == "4bdd7327cbad8e71d210feb0c6375077"

diff --git a/meteor/tests/test_counter/test/part2/part2_census_stage_1.json b/meteor/tests/test_counter/test/part2/part2_census_stage_1.json
@@ -28,11 +28,7 @@
     "matches": 10000,
     "is_local_mapping": 1,
     "mapping_software": "Meteor",
-    "mapping_software_version": "3.3"
-  },
-  "mapping_file": {
-    "mapping_file_count": 1,
-    "bowtie_file_1": "part2.bam",
-    "mapping_file_format": "sam"
+    "mapping_software_version": "3.3",
+    "mapping_file": "part2.bam"
   }
 }
diff --git a/meteor/tests/test_merging.py b/meteor/tests/test_merging.py
@@ -113,13 +113,13 @@ def test_extract_json_info(merging_profiles: Merging) -> None:
         config,
         param_dict={
             "profiling_parameters": ["msp_filter", "modules_def"],
-            "mapping_file": [""],
+            "mapping": ["mapping_file"],
         },
     )
     assert info == {
         "msp_filter": 0.1,
         "modules_def": "modules_definition.tsv",
-        "bowtie_file": "sample1.sam",
+        "mapping_file": "sample1.sam",
     }
 
 
@@ -228,6 +228,7 @@ def test_execute1(merging_profiles: Merging, datadir: Path) -> None:
         datadir / "expected_output" / "test_project_census_stage_2_report.tsv"
     )
     real_output_df = pd.read_table(real_output)
+
     expected_output_df = pd.read_table(expected_output)
     real_output_df = (
         real_output_df.sort_values(by=["sample"])

diff --git a/meteor/tests/test_merging/expected_output/test_project_census_stage_2_report.tsv b/meteor/tests/test_merging/expected_output/test_project_census_stage_2_report.tsv
@@ -1,4 +1,4 @@
-sample	sample_name	census_status	full_sample_name	fastq_file	mapping_tool	mapping_tool_version	mapping_date	reference_name	trim	alignment_number	mapping_type	meteor_version	identity_threshold	total_read_count	mapped_read_count	overall_alignment_rate	gene_count	msp_count	msp_signal	mustard_signal	fastq_files	database_type	bowtie_file	profiling_date	normalization	rarefaction_level	seed	msp_core_size	msp_filter	msp_def	mustard_filename	modules_db	modules_db_filenames	modules_def	module_completeness
-sample1	sample1	0	sample1_trimmed.Q17.converted.noHost	sample1_trimmed.Q17.converted.noHost.fastq.gz	bowtie2	2.5.1	2023-11-17	IGC2	80	10000	end-to-end	2.0.9	0.95	19234567	16234987	84.41	627516	297	0.63	0.0	['fastq1.fastq.gz', 'fastq2.fastq.gz']	complete	sample1.sam	2023-11-17	fpkm	5000000	1234	100	0.1	IGC2_1990MSPs.tsv	IGC2_mustard.tsv	kegg	IGC2_kegg_107.tsv	modules_definition.tsv	0.9
-sample2	sample2	0	sample2_trimmed.Q17.converted.noHost	sample2_trimmed.Q17.converted.noHost.fastq.gz	bowtie2	2.5.1	2023-11-17	IGC2	80	10000	end-to-end	2.0.9	0.95	15000000	10000000	66.67	687432	325	0.87	0.1	['fastq1.fastq.gz', 'fastq2.fastq.gz']	complete	sample2.sam	2023-11-17	fpkm	5000000	1234	100	0.1	IGC2_1990MSPs.tsv	IGC2_mustard.tsv	kegg	IGC2_kegg_107.tsv	modules_definition.tsv	0.9
-sample3	sample3	0	sample3_trimmed.Q17.converted.noHost	sample3_trimmed.Q17.converted.noHost.fastq.gz	bowtie2	2.5.1	2023-11-17	IGC2	80	10000	end-to-end	2.0.9	0.95	20000000	10000000	50.00	599999	354	0.56	0.3	['fastq1.fastq.gz', 'fastq2.fastq.gz']	complete	sample3.sam	2023-11-17	fpkm	5000000	1234	90	0.1	IGC2_1990MSPs.tsv	IGC2_mustard.tsv	kegg	IGC2_kegg_107.tsv	modules_definition.tsv	0.9
+sample	sample_name	census_status	full_sample_name	fastq_file	mapping_tool	mapping_tool_version	mapping_date	reference_name	trim	alignment_number	mapping_type	meteor_version	identity_threshold	total_read_count	mapped_read_count	overall_alignment_rate	gene_count	msp_count	msp_signal	mustard_signal	fastq_files	database_type	mapping_file	profiling_date	normalization	rarefaction_level	seed	msp_core_size	msp_filter	msp_def	mustard_filename	modules_db	modules_db_filenames	modules_def	module_completeness	count_file	counted_reads
+sample1	sample1	0	sample1_trimmed.Q17.converted.noHost	sample1_trimmed.Q17.converted.noHost.fastq.gz	bowtie2	2.5.1	2023-11-17	IGC2	80	10000	end-to-end	2.0.9	0.95	19234567	16234987	84.41	627516	297	0.63	0.0	['fastq1.fastq.gz', 'fastq2.fastq.gz']	complete	sample1.sam	2023-11-17	fpkm	5000000	1234	100	0.1	IGC2_1990MSPs.tsv	IGC2_mustard.tsv	kegg	IGC2_kegg_107.tsv	modules_definition.tsv	0.9	sample1.tsv.xz	14591228
+sample2	sample2	0	sample2_trimmed.Q17.converted.noHost	sample2_trimmed.Q17.converted.noHost.fastq.gz	bowtie2	2.5.1	2023-11-17	IGC2	80	10000	end-to-end	2.0.9	0.95	15000000	10000000	66.67	687432	325	0.87	0.1	['fastq1.fastq.gz', 'fastq2.fastq.gz']	complete	sample2.sam	2023-11-17	fpkm	5000000	1234	100	0.1	IGC2_1990MSPs.tsv	IGC2_mustard.tsv	kegg	IGC2_kegg_107.tsv	modules_definition.tsv	0.9	sample2.tsv.xz	10000000
+sample3	sample3	0	sample3_trimmed.Q17.converted.noHost	sample3_trimmed.Q17.converted.noHost.fastq.gz	bowtie2	2.5.1	2023-11-17	IGC2	80	10000	end-to-end	2.0.9	0.95	20000000	10000000	50.00	599999	354	0.56	0.3	['fastq1.fastq.gz', 'fastq2.fastq.gz']	complete	sample3.sam	2023-11-17	fpkm	5000000	1234	90	0.1	IGC2_1990MSPs.tsv	IGC2_mustard.tsv	kegg	IGC2_kegg_107.tsv	modules_definition.tsv	0.9	sample3.tsv.xz	10000000
diff --git a/meteor/tests/test_merging/mapping/sample1/sample1_census_stage_1.json b/meteor/tests/test_merging/mapping/sample1/sample1_census_stage_1.json
@@ -23,9 +23,7 @@
     "fastq_files": [
       "fastq1.fastq.gz",
       "fastq2.fastq.gz"
-    ]
-  },
-  "mapping_file": {
-    "bowtie_file": "sample1.sam"
+    ],
+    "mapping_file": "sample1.sam"
   }
 }
diff --git a/meteor/tests/test_merging/mapping/sample2/sample2_census_stage_1.json b/meteor/tests/test_merging/mapping/sample2/sample2_census_stage_1.json
@@ -23,9 +23,7 @@
     "fastq_files": [
       "fastq1.fastq.gz",
       "fastq2.fastq.gz"
-    ]
-  },
-  "mapping_file": {
-    "bowtie_file": "sample2.sam"
+    ],
+    "mapping_file": "sample2.sam"
   }
 }
diff --git a/meteor/tests/test_merging/mapping/sample3/sample3_census_stage_1.json b/meteor/tests/test_merging/mapping/sample3/sample3_census_stage_1.json
@@ -23,9 +23,7 @@
     "fastq_files": [
       "fastq1.fastq.gz",
       "fastq2.fastq.gz"
-    ]
-  },
-  "mapping_file": {
-    "bowtie_file": "sample3.sam"
+    ],
+    "mapping_file": "sample3.sam"
   }
 }
diff --git a/meteor/tests/test_merging/profiles/sample1/sample1_census_stage_2.json b/meteor/tests/test_merging/profiles/sample1/sample1_census_stage_2.json
@@ -15,18 +15,20 @@
     "trim": 80,
     "alignment_number": 10000,
     "mapping_type": "end-to-end",
-    "identity_threshold": 0.95,
     "total_read_count": 19234567,
     "mapped_read_count": 16234987,
     "overall_alignment_rate": 84.41,
     "fastq_files": [
       "fastq1.fastq.gz",
       "fastq2.fastq.gz"
     ],
-    "database_type": "complete"
+    "database_type": "complete",
+    "mapping_file": "sample1.sam"
   },
-  "mapping_file": {
-    "bowtie_file": "sample1.sam"
+  "counting": {
+    "counted_reads": 14591228,
+    "identity_threshold": 0.95,
+    "count_file": "sample1.tsv.xz"
   },
   "profiling_session": {
     "meteor_version": "2.0.9",

diff --git a/meteor/tests/test_merging/profiles/sample2/sample2_census_stage_2.json b/meteor/tests/test_merging/profiles/sample2/sample2_census_stage_2.json
@@ -15,18 +15,20 @@
     "trim": 80,
     "alignment_number": 10000,
     "mapping_type": "end-to-end",
-    "identity_threshold": 0.95,
     "total_read_count": 15000000,
     "mapped_read_count": 10000000,
     "overall_alignment_rate": 66.67,
     "fastq_files": [
       "fastq1.fastq.gz",
       "fastq2.fastq.gz"
     ],
-    "database_type": "complete"
+    "database_type": "complete",
+    "mapping_file": "sample2.sam"
   },
-  "mapping_file": {
-    "bowtie_file": "sample2.sam"
+  "counting": {
+    "counted_reads": 10000000,
+    "identity_threshold": 0.95,
+    "count_file": "sample2.tsv.xz"
   },
   "profiling_session": {
     "meteor_version": "2.0.9",

diff --git a/meteor/tests/test_merging/profiles/sample3/sample3_census_stage_2.json b/meteor/tests/test_merging/profiles/sample3/sample3_census_stage_2.json
@@ -23,10 +23,13 @@
       "fastq1.fastq.gz",
       "fastq2.fastq.gz"
     ],
-    "database_type": "complete"
+    "database_type": "complete",
+    "mapping_file": "sample3.sam"
   },
-  "mapping_file": {
-    "bowtie_file": "sample3.sam"
+  "counting": {
+    "counted_reads": 10000000,
+    "identity_threshold": 0.95,
+    "count_file": "sample3.tsv.xz"
   },
   "profiling_session": {
     "meteor_version": "2.0.9",

diff --git a/meteor/tests/test_strain/map/test/test_census_stage_1.json b/meteor/tests/test_strain/map/test/test_census_stage_1.json
@@ -1 +1,28 @@
-{"meteor_version": "2.0.6", "sample_info": {"sample_name": "test", "tag": "single", "full_sample_name": "test"}, "sample_file": {"fastq_file": "test.fastq.gz"}, "mapping": {"mapping_tool": "bowtie2", "mapping_tool_version": "2.5.3", "mapping_date": "2024-06-11", "reference_name": "mock", "trim": "80", "alignment_number": 10000, "mapping_type": "end-to-end", "identity_threshold": 0.95, "total_read_count": 827509, "mapped_read_count": 793977, "overall_alignment_rate": 95.95, "fastq_files": "fastq/test/test.fastq.gz"}, "mapping_file": {"bowtie_file": "test_raw.cram"}}
+{
+    "meteor_version": "2.0.6",
+    "sample_info": {
+        "sample_name": "test",
+        "tag": "single",
+        "full_sample_name": "test"
+    },
+    "sample_file": {
+        "fastq_file": "test.fastq.gz"
+    },
+    "mapping": {
+        "mapping_tool": "bowtie2",
+        "mapping_tool_version": "2.5.3",
+        "mapping_date": "2024-06-11",
+        "reference_name": "mock",
+        "trim": "80",
+        "alignment_number": 10000,
+        "mapping_type": "end-to-end",
+        "identity_threshold": 0.95,
+        "total_read_count": 827509,
+        "mapped_read_count": 793977,
+        "overall_alignment_rate": 95.95,
+        "fastq_files": [
+            "fastq/test/test.fastq.gz"
+        ],
+        "mapping_file": "test_raw.cram"
+    }
+}
diff --git a/meteor/tests/test_variantcalling/eva71_bench/eva71_bench_census_stage_1.json b/meteor/tests/test_variantcalling/eva71_bench/eva71_bench_census_stage_1.json
@@ -20,9 +20,9 @@
     "total_read_count": 1480,
     "mapped_read_count": 1480,
     "overall_alignment_rate": 100.0,
-    "fastq_files": ["eva71_bench.fq.gz"]
-  },
-  "mapping_file": {
-    "bowtie_file": "eva71_bench.sam"
+    "fastq_files": [
+      "eva71_bench.fq.gz"
+    ],
+    "mapping_file": "eva71_bench.sam"
   }
 }