scverse · ivirshup · Sep 20, 2023 · Sep 20, 2023 · Sep 20, 2023 · Apr 2, 2024
diff --git a/docs/api.md b/docs/api.md
@@ -27,7 +27,7 @@
     filters.GeneBioTypeFilter
     filters.GeneNameFilter
     filters.SeqNameFilter
-    filters.GeneRangesFilter
+    filters.GeneRangeFilter
     filters.TxIDFilter
     filters.TxBioTypeFilter
     filters.ExonIDFilter

diff --git a/docs/notebooks/basic_usage.ipynb b/docs/notebooks/basic_usage.ipynb
@@ -765,7 +765,7 @@
     }
    ],
    "source": [
-    "ensdb.genes(filter=gf.filters.GeneRangesFilter(\"1:10000-20000\"))"
+    "ensdb.genes(filter=gf.filters.GeneRangeFilter(\"1:10000-20000\"))"
    ]
   },
   {
@@ -855,7 +855,7 @@
    "source": [
     "ensdb.genes(\n",
     "    filter=gf.filters.GeneBioTypeFilter(\"lncRNA\")\n",
-    "    & gf.filters.GeneRangesFilter(\"1:10000-20000\")\n",
+    "    & gf.filters.GeneRangeFilter(\"1:10000-20000\")\n",
     ")"
    ]
   },
@@ -870,7 +870,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Using the `cols` argument, you can get annotations from other tables in the database."
+    "Using the `columns` argument, you can get annotations from other tables in the database."
    ]
   },
   {
@@ -966,7 +966,9 @@
     }
    ],
    "source": [
-    "ensdb.genes(cols=[\"gene_id\", \"tx_id\", \"gene_name\", \"protein_id\", \"uniprot_id\"]).head()"
+    "ensdb.genes(\n",
+    "    columns=[\"gene_id\", \"tx_id\", \"gene_name\", \"protein_id\", \"uniprot_id\"]\n",
+    ").head()"
    ]
   },
   {

diff --git a/src/genomic_features/_core/filters.py b/src/genomic_features/_core/filters.py
@@ -198,7 +198,7 @@ def columns(self) -> set[str]:
         return {"gene_name"}
 
 
-class GeneRangesFilter(AbstractFilterRangeExpr):
+class GeneRangeFilter(AbstractFilterRangeExpr):
     """
     Filter features within a genomic range
 
@@ -251,7 +251,7 @@ class CanonicalTxFilter(AbstractFilterExpr):
 
     >>> ensdb.transcripts(filter=gf.filters.CanonicalTxFilter())
     >>> ensdb.exons(
-    ...     cols=["tx_id", "exon_id", "seq_name", "exon_seq_start", "exon_seq_end"],
+    ...     columns=["tx_id", "exon_id", "seq_name", "exon_seq_start", "exon_seq_end"],
     ...     filter=gf.filters.CanonicalTxFilter(),
     ... )
     """

diff --git a/src/genomic_features/ensembl/ensembldb.py b/src/genomic_features/ensembl/ensembldb.py
@@ -144,88 +144,90 @@ def __repr__(self) -> str:
 
     def genes(
         self,
-        cols: list[str] | None = None,
+        columns: list[str] | None = None,
         filter: _filters.AbstractFilterExpr = filters.EmptyFilter(),
         join_type: Literal["inner", "left"] = "inner",
     ) -> DataFrame:
         """Get gene annotations.
 
         Parameters
         ----------
-        cols
+        columns
             Which columns to retrieve from the database. Can be from other tables.
             Returns all gene columns if None.
         filters
             Filters to apply to the query.
         join_type
-            How to perform joins during the query (if cols or filters requires them).
+            How to perform joins during the query (if columns or filters requires them).
 
 
         Usage
         -----
-        >>> ensdb.genes(cols=["gene_id", "gene_name", "tx_id"])
+        >>> ensdb.genes(columns=["gene_id", "gene_name", "tx_id"])
         """
         table: Final = "gene"
-        if cols is None:
+        if columns is None:
             # TODO: check why R adds entrezid
-            cols = self.list_columns(table)  # get all columns
+            columns = self.list_columns(table)  # get all columns
 
-        cols = cols.copy()
-        if "gene_id" not in cols:  # genes always needs gene_id
-            cols.append("gene_id")
+        columns = columns.copy()
+        if "gene_id" not in columns:  # genes always needs gene_id
+            columns.append("gene_id")
 
-        query = self._build_query(table, cols, filter, join_type)
+        query = self._build_query(table, columns, filter, join_type)
         return self._execute_query(query)
 
     def transcripts(
         self,
-        cols: list[str] | None = None,
+        columns: list[str] | None = None,
         filter: _filters.AbstractFilterExpr = filters.EmptyFilter(),
         join_type: Literal["inner", "left"] = "inner",
     ) -> DataFrame:
         """Get transcript annotations.
 
         Parameters
         ----------
-        cols
+        columns
             Which columns to retrieve from the database. Can be from other tables.
             Returns all transcript columns if None.
         filters
             Filters to apply to the query.
         join_type
-            How to perform joins during the query (if cols or filters requires them).
+            How to perform joins during the query (if columns or filters requires them).
 
 
         Usage
         -----
-        >>> ensdb.transcripts(cols=["tx_id", "tx_name", "gene_id"])
+        >>> ensdb.transcripts(columns=["tx_id", "tx_name", "gene_id"])
         """
         table: Final = "tx"
-        if cols is None:
-            cols = self.list_columns(table)  # get all columns
+        if columns is None:
+            columns = self.list_columns(table)  # get all columns
 
-        cols = cols.copy()
+        columns = columns.copy()
         # Require primary key in output
-        if "tx_id" not in cols:
-            cols.append("tx_id")
+        if "tx_id" not in columns:
+            columns.append("tx_id")
         # seq_name is required for genomic range operations
-        if ("tx_seq_start" in cols or "tx_seq_end" in cols) and "seq_name" not in cols:
-            cols.append("seq_name")
+        if (
+            "tx_seq_start" in columns or "tx_seq_end" in columns
+        ) and "seq_name" not in columns:
+            columns.append("seq_name")
 
-        query = self._build_query(table, cols, filter, join_type)
+        query = self._build_query(table, columns, filter, join_type)
         return self._execute_query(query)
 
     def exons(
         self,
-        cols: list[str] | None = None,
+        columns: list[str] | None = None,
         filter: _filters.AbstractFilterExpr = filters.EmptyFilter(),
         join_type: Literal["inner", "left"] = "inner",
     ) -> DataFrame:
         """Get exons table.
 
         Parameters
         ----------
-        cols
+        columns
             Which columns to retrieve from the database. Can be from other tables.
             Returns all exon columns if None.
         filter
@@ -239,20 +241,20 @@ def exons(
         >>> ensdb.exons()
         """
         table: Final = "exon"
-        if cols is None:
-            cols = self.list_columns(table)  # get all columns
+        if columns is None:
+            columns = self.list_columns(table)  # get all columns
 
-        cols = cols.copy()
+        columns = columns.copy()
         # Require primary key in output
-        if "exon_id" not in cols:
-            cols.append("exon_id")
+        if "exon_id" not in columns:
+            columns.append("exon_id")
         # seq_name is required for genomic range operations
         if (
-            "exon_seq_start" in cols or "exon_seq_end" in cols
-        ) and "seq_name" not in cols:
-            cols.append("seq_name")
+            "exon_seq_start" in columns or "exon_seq_end" in columns
+        ) and "seq_name" not in columns:
+            columns.append("seq_name")
 
-        query = self._build_query(table, cols, filter, join_type)
+        query = self._build_query(table, columns, filter, join_type)
         return self._execute_query(query)
 
     def _execute_query(self, query: IbisTable) -> DataFrame:
@@ -272,19 +274,19 @@ def chromosomes(self) -> DataFrame:
     def _build_query(
         self,
         table: Literal["gene", "tx", "exon"],
-        cols: list[str],
+        columns: list[str],
         filter: _filters.AbstractFilterExpr,
         join_type: Literal["inner", "left"] = "inner",
     ) -> IbisTable:
         """Build a query for the genomic features table."""
-        # Finalize cols
-        self._clean_columns(cols)
+        # Finalize columns
+        self._clean_columns(columns)
         for col in filter.columns():
-            if col not in cols:
-                cols.append(col)
+            if col not in columns:
+                columns.append(col)
 
         # check if join is required
-        tables = self._get_required_tables(self._tables_for_columns(cols))
+        tables = self._get_required_tables(self._tables_for_columns(columns))
 
         # Basically just to make sure exons stay in the query
         if table not in tables:
@@ -295,7 +297,7 @@ def _build_query(
         else:
             query = self.db.table(table)
         # add filter
-        query = query.filter(filter.convert()).select(cols)
+        query = query.filter(filter.convert()).select(columns)
         return query
 
     def _join_query(
@@ -438,26 +440,26 @@ def _clean_columns(self, columns: list[str]) -> list[str]:
             columns = [columns]
 
         valid_columns = set(self.list_columns())
-        cols = list(filter(lambda c: c in valid_columns, columns))
+        output_columns = list(filter(lambda c: c in valid_columns, columns))
         invalid_columns = set(columns) - valid_columns
         if invalid_columns:
             raise ValueError(
                 f"The following columns are not found in any database: {invalid_columns}"
             )
-        if not cols:
+        if not output_columns:
             raise ValueError("No valid columns were found.")
-        return cols
+        return output_columns
 
-    def _tables_for_columns(self, cols: list, start_with: str | None = None) -> list:
+    def _tables_for_columns(self, columns: list, start_with: str | None = None) -> list:
         """
         Return a list of tables that contain the specified columns.
 
         Parameters
         ----------
-        cols
+        columns
             Columns that we're looking for.
         """
-        cols = self._clean_columns(cols)
+        columns = self._clean_columns(columns)
         table_list = self._tables_by_degree()  # list of table names
 
         # remove start_with from table_list and add it to the beginning of the list
@@ -472,14 +474,14 @@ def _tables_for_columns(self, cols: list, start_with: str | None = None) -> list
         tables = []
         for t in table_list:
             # check if all columns are in one table
-            if set(cols).issubset(self.db.table(t).columns):
+            if set(columns).issubset(self.db.table(t).columns):
                 tables.append(t)
                 return tables
             else:
                 # check if a single column is in the table
-                for c in cols.copy():
+                for c in columns.copy():
                     if c in self.db.table(t).columns:
                         if t not in tables:
                             tables.append(t)
-                        cols.remove(c)  # remove column from list
+                        columns.remove(c)  # remove column from list
         return tables
diff --git a/src/genomic_features/filters.py b/src/genomic_features/filters.py
@@ -5,7 +5,7 @@
     GeneBioTypeFilter,
     GeneIDFilter,
     GeneNameFilter,
-    GeneRangesFilter,
+    GeneRangeFilter,
     SeqNameFilter,
     TxBioTypeFilter,
     TxIDFilter,
@@ -18,7 +18,7 @@
     "CanonicalTxFilter",
     "GeneIDFilter",
     "GeneBioTypeFilter",
-    "GeneRangesFilter",
+    "GeneRangeFilter",
     "EmptyFilter",
     "ExonIDFilter",
     "GeneNameFilter",

diff --git a/tests/test_basic.py b/tests/test_basic.py
@@ -27,7 +27,9 @@ def test_repr():
 
 def test_invalid_join():
     with pytest.raises(ValueError, match=r"Invalid join type: flarb"):
-        gf.ensembl.annotation("Hsapiens", 108).genes(cols=["tx_id"], join_type="flarb")
+        gf.ensembl.annotation("Hsapiens", 108).genes(
+            columns=["tx_id"], join_type="flarb"
+        )
 
 
 def test_exons():

diff --git a/tests/test_columns.py b/tests/test_columns.py
@@ -25,7 +25,8 @@ def test_tables_by_degree(hsapiens108):
     ]
     result = hsapiens108._tables_by_degree(tab=["protein", "exon"])
     assert result == ["exon", "protein"]
-    result = hsapiens108._tables_by_degree(tab=["protein", "invalid_table"])
+    with pytest.warns(UserWarning, match="not in the database: invalid_table"):
+        result = hsapiens108._tables_by_degree(tab=["protein", "invalid_table"])
     assert result == ["protein"]
 
 
@@ -60,7 +61,7 @@ def test_required_tables(hsapiens108):
 
 # Test simple subsetting to columns in one table gene
 def test_simple_subsetting(hsapiens108):
-    result = hsapiens108.genes(cols=["gene_id", "gene_name"])
+    result = hsapiens108.genes(columns=["gene_id", "gene_name"])
     assert result.shape == (70616, 2)
     assert result.columns.tolist() == ["gene_id", "gene_name"]
 
@@ -69,15 +70,15 @@ def test_simple_subsetting(hsapiens108):
 def test_multiple_table_subsetting(hsapiens108):
     # table genes and transcripts
     result = hsapiens108.genes(
-        cols=["gene_id", "gene_name", "tx_id"],
+        columns=["gene_id", "gene_name", "tx_id"],
         join_type="inner",
     )
     assert result.shape == (275721, 3)
     assert list(result.columns) == ["gene_id", "gene_name", "tx_id"]
 
     # table genes and transcripts with filter
     result = hsapiens108.genes(
-        cols=["gene_id", "gene_name", "tx_id"],
+        columns=["gene_id", "gene_name", "tx_id"],
         join_type="inner",
         filter=gf.filters.GeneBioTypeFilter(["protein_coding"]),
     )
@@ -86,7 +87,7 @@ def test_multiple_table_subsetting(hsapiens108):
 
     # table genes, transcripts and exons and filter
     result = hsapiens108.genes(
-        cols=["gene_id", "gene_name", "exon_id"],
+        columns=["gene_id", "gene_name", "exon_id"],
         join_type="inner",
         filter=gf.filters.GeneIDFilter(["ENSG00000139618"]),
     )
@@ -97,7 +98,7 @@ def test_multiple_table_subsetting(hsapiens108):
     # test left join
     # table genes and transcripts
     result = hsapiens108.genes(
-        cols=["gene_id", "gene_name", "protein_id"],
+        columns=["gene_id", "gene_name", "protein_id"],
         join_type="left",
         filter=gf.filters.GeneBioTypeFilter(["protein_coding"]),
     )
@@ -115,7 +116,7 @@ def test_multiple_table_subsetting(hsapiens108):
 
 def test_chromosome_columns(hsapiens108):
     # https://github.com/scverse/genomic-features/pull/44/files#r1196331705
-    result = hsapiens108.genes(cols=["gene_id", "seq_name", "seq_length"])
+    result = hsapiens108.genes(columns=["gene_id", "seq_name", "seq_length"])
     assert result.shape[0] == hsapiens108.db.table("gene").count().execute()
 
     chroms = hsapiens108.chromosomes()