diff --git a/pyproject.toml b/pyproject.toml index 9de0611..f678f48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "phenotype2phenopacket" -version = "0.6.11" +version = "0.6.12" description = "" authors = ["Yasemin Bridges "] readme = "README.md" diff --git a/src/phenotype2phenopacket/create/create.py b/src/phenotype2phenopacket/create/create.py index 0c80399..f60922e 100644 --- a/src/phenotype2phenopacket/create/create.py +++ b/src/phenotype2phenopacket/create/create.py @@ -11,6 +11,7 @@ from phenotype2phenopacket.utils.utils import ( filter_diseases, load_ontology, + read_omim_id_list, return_phenotype_annotation_data, ) @@ -74,7 +75,14 @@ def create_synthetic_patients( grouped_omim_diseases = filter_diseases( num_disease, omim_id, omim_id_list, phenotype_annotation_data ) - for omim_disease in grouped_omim_diseases: + omim_ids = ( + read_omim_id_list(omim_id_list) if omim_id_list else [None] * len(grouped_omim_diseases) + ) + for omim_id, omim_disease in zip(omim_ids, grouped_omim_diseases): + if len(omim_disease) == 0: + id_message = f" for {omim_id}!" if omim_id else "!" + print(f"Skipping... Could not find any phenotype entries{id_message}") + continue create_synthetic_patient_phenopacket( human_phenotype_ontology, omim_disease, diff --git a/src/phenotype2phenopacket/utils/phenopacket_utils.py b/src/phenotype2phenopacket/utils/phenopacket_utils.py index 2bfa6b4..23fdf0f 100644 --- a/src/phenotype2phenopacket/utils/phenopacket_utils.py +++ b/src/phenotype2phenopacket/utils/phenopacket_utils.py @@ -366,9 +366,9 @@ def worker(): if len(self.filtered_df) == 0: return frequency_df.sample(n=max_number) else: - return pl.from_dicts(self.filtered_df) + return pl.from_dicts(self.filtered_df, infer_schema_length=len(self.filtered_df)) else: - return pl.from_dicts(self.filtered_df) + return pl.from_dicts(self.filtered_df, infer_schema_length=len(self.filtered_df)) def get_patient_terms(self) -> pl.DataFrame: """ @@ -570,7 +570,7 @@ def patient_term_annotation_set(self) -> pl.DataFrame: patient_terms, patient_terms_sub_sample ) final_patient_terms = patient_terms_filtered.to_dicts() + new_phenotype_terms - return pl.from_dicts(final_patient_terms) + return pl.from_dicts(final_patient_terms, infer_schema_length=len(final_patient_terms)) class PhenotypeAnnotationToPhenopacketConverter: