From 407317ef5094e7a6771e688e00920dc7d019a7b4 Mon Sep 17 00:00:00 2001
From: Malte Benedikt Kuehl <malte.kuehl@clin.au.dk>
Date: Thu, 22 Aug 2024 22:52:08 +0200
Subject: [PATCH] Add warning for change of fields in transcript gene mapping

---
 pytximport/utils/_create_transcript_to_gene_map.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pytximport/utils/_create_transcript_to_gene_map.py b/pytximport/utils/_create_transcript_to_gene_map.py
index fd49237..65e7030 100644
--- a/pytximport/utils/_create_transcript_to_gene_map.py
+++ b/pytximport/utils/_create_transcript_to_gene_map.py
@@ -1,6 +1,7 @@
 import re
+from logging import warning
 from pathlib import Path
-from typing import Literal, Union
+from typing import Any, Dict, Literal, Union
 
 import numpy as np
 import pandas as pd
@@ -11,6 +12,7 @@ def create_transcript_to_gene_map(
     host: str = "http://www.ensembl.org",
     source_field: Literal["ensembl_transcript_id", "external_transcript_name"] = "ensembl_transcript_id",
     target_field: Literal["ensembl_gene_id", "external_gene_name", "external_transcript_name"] = "ensembl_gene_id",
+    **kwargs: Dict[str, Any],
 ) -> pd.DataFrame:
     """Create a mapping from transcript ids to gene ids using the Ensembl Biomart.
 
@@ -27,6 +29,9 @@ def create_transcript_to_gene_map(
     """
     from pybiomart import Dataset
 
+    if "field" in kwargs:
+        warning("The field argument is deprecated. Please use the source_field and target_field arguments instead.")
+
     if species == "human":
         dataset = Dataset(name="hsapiens_gene_ensembl", host=host)
     elif species == "mouse":
@@ -51,6 +56,7 @@ def create_transcript_to_gene_map_from_gtf_annotation(
     target_field: Literal["gene_id", "gene_name"] = "gene_id",
     chunk_size: int = 100000,
     keep_biotype: bool = False,
+    **kwargs: Dict[str, Any],
 ) -> pd.DataFrame:
     """Create a mapping from transcript ids to gene ids using a GTF annotation file.
 
@@ -66,6 +72,9 @@ def create_transcript_to_gene_map_from_gtf_annotation(
     """
     transcript_gene_map = pd.DataFrame(columns=["transcript_id", "gene_id", "gene_name", "gene_biotype"])
 
+    if "field" in kwargs:
+        warning("The field argument is deprecated. Please use the source_field and target_field arguments instead.")
+
     for chunk in pd.read_csv(file_path, sep="\t", chunksize=chunk_size, header=None, comment="#"):
         # see: https://www.ensembl.org/info/website/upload/gff.html
         chunk.columns = ["seqname", "source", "feature", "start", "end", "score", "strand", "frame", "attribute"]