Skip to content

Commit

Permalink
Fix issue #57
Browse files Browse the repository at this point in the history
  • Loading branch information
aghozlane committed Oct 31, 2024
1 parent b1df6e0 commit 3a783c6
Show file tree
Hide file tree
Showing 16 changed files with 100 additions and 64 deletions.
36 changes: 24 additions & 12 deletions meteor/counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,17 @@ def get_aligned_nucleotides(self, element) -> Iterator[int]:
"""
yield from (item[1] for item in element.cigartuples if item[0] < 3)

def set_counter_config(self, counted_reads):
"""Define the count of reads"""
def set_counter_config(self, counted_reads: float, count_file: Path) -> dict:
"""Save in the json essential info
:param counted_read: (float) Number of reads counted
:param count_file: (Path) Count file
:return: (Dict) dictionnary data
"""
return {
"counting": {
"counted_reads": counted_reads,
"counted_reads": int(round(counted_reads, 0)),
"identity_threshold": round(self.identity_threshold, 2),
"count_file": count_file.name,
}
}

Expand Down Expand Up @@ -412,6 +417,7 @@ def launch_counting(
count_file: Path,
ref_json: dict,
census_json: dict,
Stage1Json: Path,
):
"""Function that count reads from a cram file, using the given methods in count:
"total" or "shared" or "unique".
Expand Down Expand Up @@ -493,13 +499,9 @@ def launch_counting(
catch_stdout=False,
)
total_read_count = self.write_table(cramfile_sorted, count_file)
config = self.set_counter_config(total_read_count)
Stage1Json = (
self.meteor.mapping_dir
/ f"{census_json['sample_info']['sample_name']}_census_stage_1.json"
)

self.save_config(census_json.update(config), Stage1Json)
config = self.set_counter_config(total_read_count, count_file)
census_json.update(config)
self.save_config(census_json, Stage1Json)
if self.keep_filtered_alignments:
cramfile_strain_unsorted = Path(mkstemp(dir=self.meteor.tmp_dir)[1])
self.save_cram_strain(
Expand Down Expand Up @@ -556,7 +558,6 @@ def execute(self) -> None:

# mapping of each sample against reference
for library in census_json_files:
print(library)
census_json = self.read_json(library)
sample_info = census_json["sample_info"]
stage1_dir = self.meteor.mapping_dir / sample_info["sample_name"]
Expand Down Expand Up @@ -596,8 +597,19 @@ def execute(self) -> None:
/ f"{sample_info['sample_name']}.tsv.xz"
)
start = perf_counter()
Stage1Json = (
self.meteor.mapping_dir
/ sample_info["sample_name"]
/ f"{sample_info['sample_name']}_census_stage_1.json"
)
census_json = self.read_json(Stage1Json)
self.launch_counting(
raw_cram_file, cram_file, count_file, ref_json, census_json
raw_cram_file,
cram_file,
count_file,
ref_json,
census_json,
Stage1Json,
)
logging.info("Completed counting in %f seconds", perf_counter() - start)
if not self.keep_all_alignments:
Expand Down
2 changes: 1 addition & 1 deletion meteor/fastqimporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def execute(self) -> None:
samples_names.add(sample_name)
sample_dir = self.meteor.fastq_dir / sample_name
sample_dir.mkdir(exist_ok=True, parents=True)
sym_fastq = Path(sample_dir / fastq_file.name)
sym_fastq = Path(sample_dir / fastq_file.name).resolve()
if not sym_fastq.is_symlink():
sym_fastq.symlink_to(fastq_file.resolve())
# Create a configuration
Expand Down
9 changes: 3 additions & 6 deletions meteor/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ def set_mapping_config(
:param cram_file: A path to the raw cram file
:return: (Dict) A dict object with the census 1 config
"""
del self.census["census"]["sample_info"]["full_sample_name"]
config = {
"meteor_version": self.meteor.version,
"sample_info": self.census["census"]["sample_info"],
"sample_file": self.census["census"]["sample_file"],
"mapping": {
"mapping_tool": "bowtie2",
"mapping_tool_version": bowtie_version,
Expand All @@ -81,10 +81,8 @@ def set_mapping_config(
"overall_alignment_rate": round(
(mapping_data[2] + mapping_data[3]) / mapping_data[0] * 100, 2
),
"fastq_files": ",".join(self.fastq_list),
},
"mapping_file": {
"bowtie_file": cram_file.name,
"fastq_files": self.fastq_list,
"mapping_file": cram_file.name,
},
}
return config
Expand Down Expand Up @@ -176,7 +174,6 @@ def execute(self) -> None:
mapping_log = findall(r"([0-9]+)\s+\(", mapping_result)
assert len(mapping_log) == 4
mapping_data = [int(i) for i in mapping_log]
print(mapping_data)
except AssertionError:
logging.error("Could not access the mapping result from bowtie2")
sys.exit(1)
Expand Down
6 changes: 4 additions & 2 deletions meteor/merging.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,11 @@ def execute(self) -> None:
"trim",
"alignment_number",
"mapping_type",
"identity_threshold",
"database_type",
],
"counting": [
"identity_threshold",
],
"profiling_parameters": [""],
}
all_information = {
Expand All @@ -284,7 +286,7 @@ def execute(self) -> None:
# Force to taxo in no consensus
database_type = "taxo"

# Merge ini information
# Merge json information
logging.info("Merging json information...")
# Get all values from all fields from all sections from all json files
all_information_to_save = {
Expand Down
6 changes: 3 additions & 3 deletions meteor/tests/test_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def test_launch_counting_unique(counter_unique: Counter, datadir: Path, tmp_path
counter_unique.meteor.ref_dir / "mock_reference.json"
)
counter_unique.launch_counting(
raw_cramfile, cramfile, countfile, ref_json, census_json
raw_cramfile, cramfile, countfile, ref_json, census_json, census_json_file
)
with countfile.open("rb") as out:
assert md5(out.read()).hexdigest() == "f5bc528dcbf594b5089ad7f6228ebab5"
Expand All @@ -326,7 +326,7 @@ def test_launch_counting_total(counter_total: Counter, datadir: Path, tmp_path:
counter_total.meteor.ref_dir / "mock_reference.json"
)
counter_total.launch_counting(
raw_cramfile, cramfile, countfile, ref_json, census_json
raw_cramfile, cramfile, countfile, ref_json, census_json, census_json_file
)
with countfile.open("rb") as out:
assert md5(out.read()).hexdigest() == "f010e4136323ac408d4c127e243756c2"
Expand All @@ -346,7 +346,7 @@ def test_launch_counting_smart_shared(
counter_smart_shared.meteor.ref_dir / "mock_reference.json"
)
counter_smart_shared.launch_counting(
raw_cramfile, cramfile, countfile, ref_json, census_json
raw_cramfile, cramfile, countfile, ref_json, census_json, census_json_file
)
# with countfile.open("rb") as out:
# assert md5(out.read()).hexdigest() == "4bdd7327cbad8e71d210feb0c6375077"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,7 @@
"matches": 10000,
"is_local_mapping": 1,
"mapping_software": "Meteor",
"mapping_software_version": "3.3"
},
"mapping_file": {
"mapping_file_count": 1,
"bowtie_file_1": "part2.bam",
"mapping_file_format": "sam"
"mapping_software_version": "3.3",
"mapping_file": "part2.bam"
}
}
5 changes: 3 additions & 2 deletions meteor/tests/test_merging.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,13 @@ def test_extract_json_info(merging_profiles: Merging) -> None:
config,
param_dict={
"profiling_parameters": ["msp_filter", "modules_def"],
"mapping_file": [""],
"mapping": ["mapping_file"],
},
)
assert info == {
"msp_filter": 0.1,
"modules_def": "modules_definition.tsv",
"bowtie_file": "sample1.sam",
"mapping_file": "sample1.sam",
}


Expand Down Expand Up @@ -228,6 +228,7 @@ def test_execute1(merging_profiles: Merging, datadir: Path) -> None:
datadir / "expected_output" / "test_project_census_stage_2_report.tsv"
)
real_output_df = pd.read_table(real_output)

expected_output_df = pd.read_table(expected_output)
real_output_df = (
real_output_df.sort_values(by=["sample"])
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
sample sample_name census_status full_sample_name fastq_file mapping_tool mapping_tool_version mapping_date reference_name trim alignment_number mapping_type meteor_version identity_threshold total_read_count mapped_read_count overall_alignment_rate gene_count msp_count msp_signal mustard_signal fastq_files database_type bowtie_file profiling_date normalization rarefaction_level seed msp_core_size msp_filter msp_def mustard_filename modules_db modules_db_filenames modules_def module_completeness
sample1 sample1 0 sample1_trimmed.Q17.converted.noHost sample1_trimmed.Q17.converted.noHost.fastq.gz bowtie2 2.5.1 2023-11-17 IGC2 80 10000 end-to-end 2.0.9 0.95 19234567 16234987 84.41 627516 297 0.63 0.0 ['fastq1.fastq.gz', 'fastq2.fastq.gz'] complete sample1.sam 2023-11-17 fpkm 5000000 1234 100 0.1 IGC2_1990MSPs.tsv IGC2_mustard.tsv kegg IGC2_kegg_107.tsv modules_definition.tsv 0.9
sample2 sample2 0 sample2_trimmed.Q17.converted.noHost sample2_trimmed.Q17.converted.noHost.fastq.gz bowtie2 2.5.1 2023-11-17 IGC2 80 10000 end-to-end 2.0.9 0.95 15000000 10000000 66.67 687432 325 0.87 0.1 ['fastq1.fastq.gz', 'fastq2.fastq.gz'] complete sample2.sam 2023-11-17 fpkm 5000000 1234 100 0.1 IGC2_1990MSPs.tsv IGC2_mustard.tsv kegg IGC2_kegg_107.tsv modules_definition.tsv 0.9
sample3 sample3 0 sample3_trimmed.Q17.converted.noHost sample3_trimmed.Q17.converted.noHost.fastq.gz bowtie2 2.5.1 2023-11-17 IGC2 80 10000 end-to-end 2.0.9 0.95 20000000 10000000 50.00 599999 354 0.56 0.3 ['fastq1.fastq.gz', 'fastq2.fastq.gz'] complete sample3.sam 2023-11-17 fpkm 5000000 1234 90 0.1 IGC2_1990MSPs.tsv IGC2_mustard.tsv kegg IGC2_kegg_107.tsv modules_definition.tsv 0.9
sample sample_name census_status full_sample_name fastq_file mapping_tool mapping_tool_version mapping_date reference_name trim alignment_number mapping_type meteor_version identity_threshold total_read_count mapped_read_count overall_alignment_rate gene_count msp_count msp_signal mustard_signal fastq_files database_type mapping_file profiling_date normalization rarefaction_level seed msp_core_size msp_filter msp_def mustard_filename modules_db modules_db_filenames modules_def module_completeness count_file counted_reads
sample1 sample1 0 sample1_trimmed.Q17.converted.noHost sample1_trimmed.Q17.converted.noHost.fastq.gz bowtie2 2.5.1 2023-11-17 IGC2 80 10000 end-to-end 2.0.9 0.95 19234567 16234987 84.41 627516 297 0.63 0.0 ['fastq1.fastq.gz', 'fastq2.fastq.gz'] complete sample1.sam 2023-11-17 fpkm 5000000 1234 100 0.1 IGC2_1990MSPs.tsv IGC2_mustard.tsv kegg IGC2_kegg_107.tsv modules_definition.tsv 0.9 sample1.tsv.xz 14591228
sample2 sample2 0 sample2_trimmed.Q17.converted.noHost sample2_trimmed.Q17.converted.noHost.fastq.gz bowtie2 2.5.1 2023-11-17 IGC2 80 10000 end-to-end 2.0.9 0.95 15000000 10000000 66.67 687432 325 0.87 0.1 ['fastq1.fastq.gz', 'fastq2.fastq.gz'] complete sample2.sam 2023-11-17 fpkm 5000000 1234 100 0.1 IGC2_1990MSPs.tsv IGC2_mustard.tsv kegg IGC2_kegg_107.tsv modules_definition.tsv 0.9 sample2.tsv.xz 10000000
sample3 sample3 0 sample3_trimmed.Q17.converted.noHost sample3_trimmed.Q17.converted.noHost.fastq.gz bowtie2 2.5.1 2023-11-17 IGC2 80 10000 end-to-end 2.0.9 0.95 20000000 10000000 50.00 599999 354 0.56 0.3 ['fastq1.fastq.gz', 'fastq2.fastq.gz'] complete sample3.sam 2023-11-17 fpkm 5000000 1234 90 0.1 IGC2_1990MSPs.tsv IGC2_mustard.tsv kegg IGC2_kegg_107.tsv modules_definition.tsv 0.9 sample3.tsv.xz 10000000
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
"fastq_files": [
"fastq1.fastq.gz",
"fastq2.fastq.gz"
]
},
"mapping_file": {
"bowtie_file": "sample1.sam"
],
"mapping_file": "sample1.sam"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
"fastq_files": [
"fastq1.fastq.gz",
"fastq2.fastq.gz"
]
},
"mapping_file": {
"bowtie_file": "sample2.sam"
],
"mapping_file": "sample2.sam"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
"fastq_files": [
"fastq1.fastq.gz",
"fastq2.fastq.gz"
]
},
"mapping_file": {
"bowtie_file": "sample3.sam"
],
"mapping_file": "sample3.sam"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,20 @@
"trim": 80,
"alignment_number": 10000,
"mapping_type": "end-to-end",
"identity_threshold": 0.95,
"total_read_count": 19234567,
"mapped_read_count": 16234987,
"overall_alignment_rate": 84.41,
"fastq_files": [
"fastq1.fastq.gz",
"fastq2.fastq.gz"
],
"database_type": "complete"
"database_type": "complete",
"mapping_file": "sample1.sam"
},
"mapping_file": {
"bowtie_file": "sample1.sam"
"counting": {
"counted_reads": 14591228,
"identity_threshold": 0.95,
"count_file": "sample1.tsv.xz"
},
"profiling_session": {
"meteor_version": "2.0.9",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,20 @@
"trim": 80,
"alignment_number": 10000,
"mapping_type": "end-to-end",
"identity_threshold": 0.95,
"total_read_count": 15000000,
"mapped_read_count": 10000000,
"overall_alignment_rate": 66.67,
"fastq_files": [
"fastq1.fastq.gz",
"fastq2.fastq.gz"
],
"database_type": "complete"
"database_type": "complete",
"mapping_file": "sample2.sam"
},
"mapping_file": {
"bowtie_file": "sample2.sam"
"counting": {
"counted_reads": 10000000,
"identity_threshold": 0.95,
"count_file": "sample2.tsv.xz"
},
"profiling_session": {
"meteor_version": "2.0.9",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,13 @@
"fastq1.fastq.gz",
"fastq2.fastq.gz"
],
"database_type": "complete"
"database_type": "complete",
"mapping_file": "sample3.sam"
},
"mapping_file": {
"bowtie_file": "sample3.sam"
"counting": {
"counted_reads": 10000000,
"identity_threshold": 0.95,
"count_file": "sample3.tsv.xz"
},
"profiling_session": {
"meteor_version": "2.0.9",
Expand Down
29 changes: 28 additions & 1 deletion meteor/tests/test_strain/map/test/test_census_stage_1.json
Original file line number Diff line number Diff line change
@@ -1 +1,28 @@
{"meteor_version": "2.0.6", "sample_info": {"sample_name": "test", "tag": "single", "full_sample_name": "test"}, "sample_file": {"fastq_file": "test.fastq.gz"}, "mapping": {"mapping_tool": "bowtie2", "mapping_tool_version": "2.5.3", "mapping_date": "2024-06-11", "reference_name": "mock", "trim": "80", "alignment_number": 10000, "mapping_type": "end-to-end", "identity_threshold": 0.95, "total_read_count": 827509, "mapped_read_count": 793977, "overall_alignment_rate": 95.95, "fastq_files": "fastq/test/test.fastq.gz"}, "mapping_file": {"bowtie_file": "test_raw.cram"}}
{
"meteor_version": "2.0.6",
"sample_info": {
"sample_name": "test",
"tag": "single",
"full_sample_name": "test"
},
"sample_file": {
"fastq_file": "test.fastq.gz"
},
"mapping": {
"mapping_tool": "bowtie2",
"mapping_tool_version": "2.5.3",
"mapping_date": "2024-06-11",
"reference_name": "mock",
"trim": "80",
"alignment_number": 10000,
"mapping_type": "end-to-end",
"identity_threshold": 0.95,
"total_read_count": 827509,
"mapped_read_count": 793977,
"overall_alignment_rate": 95.95,
"fastq_files": [
"fastq/test/test.fastq.gz"
],
"mapping_file": "test_raw.cram"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
"total_read_count": 1480,
"mapped_read_count": 1480,
"overall_alignment_rate": 100.0,
"fastq_files": ["eva71_bench.fq.gz"]
},
"mapping_file": {
"bowtie_file": "eva71_bench.sam"
"fastq_files": [
"eva71_bench.fq.gz"
],
"mapping_file": "eva71_bench.sam"
}
}

0 comments on commit 3a783c6

Please sign in to comment.