From 407317ef5094e7a6771e688e00920dc7d019a7b4 Mon Sep 17 00:00:00 2001 From: Malte Benedikt Kuehl Date: Thu, 22 Aug 2024 22:52:08 +0200 Subject: [PATCH] Add warning for change of fields in transcript gene mapping --- pytximport/utils/_create_transcript_to_gene_map.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pytximport/utils/_create_transcript_to_gene_map.py b/pytximport/utils/_create_transcript_to_gene_map.py index fd49237..65e7030 100644 --- a/pytximport/utils/_create_transcript_to_gene_map.py +++ b/pytximport/utils/_create_transcript_to_gene_map.py @@ -1,6 +1,7 @@ import re +from logging import warning from pathlib import Path -from typing import Literal, Union +from typing import Any, Dict, Literal, Union import numpy as np import pandas as pd @@ -11,6 +12,7 @@ def create_transcript_to_gene_map( host: str = "http://www.ensembl.org", source_field: Literal["ensembl_transcript_id", "external_transcript_name"] = "ensembl_transcript_id", target_field: Literal["ensembl_gene_id", "external_gene_name", "external_transcript_name"] = "ensembl_gene_id", + **kwargs: Dict[str, Any], ) -> pd.DataFrame: """Create a mapping from transcript ids to gene ids using the Ensembl Biomart. @@ -27,6 +29,9 @@ def create_transcript_to_gene_map( """ from pybiomart import Dataset + if "field" in kwargs: + warning("The field argument is deprecated. Please use the source_field and target_field arguments instead.") + if species == "human": dataset = Dataset(name="hsapiens_gene_ensembl", host=host) elif species == "mouse": @@ -51,6 +56,7 @@ def create_transcript_to_gene_map_from_gtf_annotation( target_field: Literal["gene_id", "gene_name"] = "gene_id", chunk_size: int = 100000, keep_biotype: bool = False, + **kwargs: Dict[str, Any], ) -> pd.DataFrame: """Create a mapping from transcript ids to gene ids using a GTF annotation file. @@ -66,6 +72,9 @@ def create_transcript_to_gene_map_from_gtf_annotation( """ transcript_gene_map = pd.DataFrame(columns=["transcript_id", "gene_id", "gene_name", "gene_biotype"]) + if "field" in kwargs: + warning("The field argument is deprecated. Please use the source_field and target_field arguments instead.") + for chunk in pd.read_csv(file_path, sep="\t", chunksize=chunk_size, header=None, comment="#"): # see: https://www.ensembl.org/info/website/upload/gff.html chunk.columns = ["seqname", "source", "feature", "start", "end", "score", "strand", "frame", "attribute"]