ship necessary bioframe code with the library

Phlya · Jan 11, 2024 · c4d147c · c4d147c
1 parent e31ef1f
commit c4d147c
Show file tree

Hide file tree

Showing 4 changed files with 165 additions and 7 deletions.
diff --git a/adjustText/__init__.py b/adjustText/__init__.py
@@ -4,7 +4,6 @@
 from matplotlib import pyplot as plt
 from matplotlib.patches import FancyArrowPatch
 from matplotlib.path import get_path_collection_extents
-import bioframe as bf
 import scipy.spatial.distance
 import logging
 from timeit import default_timer as timer
@@ -15,6 +14,7 @@
 except ImportError:
     matplot_get_renderer = None
 
+from .arrops import overlap_intervals
 from ._version import __version__
 
 
@@ -140,11 +140,11 @@ def get_2d_coordinates(objs):
 
 def get_shifts_texts(coords):
     N = coords.shape[0]
-    xoverlaps = bf.core.arrops.overlap_intervals(
+    xoverlaps = overlap_intervals(
         coords[:, 0], coords[:, 1], coords[:, 0], coords[:, 1]
     )
     xoverlaps = xoverlaps[xoverlaps[:, 0] != xoverlaps[:, 1]]
-    yoverlaps = bf.core.arrops.overlap_intervals(
+    yoverlaps = overlap_intervals(
         coords[:, 2], coords[:, 3], coords[:, 2], coords[:, 3]
     )
     yoverlaps = yoverlaps[yoverlaps[:, 0] != yoverlaps[:, 1]]
@@ -162,10 +162,10 @@ def get_shifts_texts(coords):
 def get_shifts_extra(coords, extra_coords):
     N = coords.shape[0]
 
-    xoverlaps = bf.core.arrops.overlap_intervals(
+    xoverlaps = overlap_intervals(
         coords[:, 0], coords[:, 1], extra_coords[:, 0], extra_coords[:, 1]
     )
-    yoverlaps = bf.core.arrops.overlap_intervals(
+    yoverlaps = overlap_intervals(
         coords[:, 2], coords[:, 3], extra_coords[:, 2], extra_coords[:, 3]
     )
     overlaps = yoverlaps[(yoverlaps[:, None] == xoverlaps).all(-1).any(-1)]

diff --git a/adjustText/arrops.py b/adjustText/arrops.py
@@ -0,0 +1,147 @@
+# Code copied from bioframe.core.arrops
+import numpy as np
+import pandas as pd
+import warnings
+
+def arange_multi(starts, stops=None, lengths=None):
+    """
+    Create concatenated ranges of integers for multiple start/length.
+
+    Parameters
+    ----------
+    starts : numpy.ndarray
+        Starts for each range
+    stops : numpy.ndarray
+        Stops for each range
+    lengths : numpy.ndarray
+        Lengths for each range. Either stops or lengths must be provided.
+
+    Returns
+    -------
+    concat_ranges : numpy.ndarray
+        Concatenated ranges.
+
+    Notes
+    -----
+    See the following illustrative example:
+
+    starts = np.array([1, 3, 4, 6])
+    stops = np.array([1, 5, 7, 6])
+
+    print arange_multi(starts, lengths)
+    >>> [3 4 4 5 6]
+
+    From: https://codereview.stackexchange.com/questions/83018/vectorized-numpy-version-of-arange-with-multiple-start-stop
+
+    """
+
+    if (stops is None) == (lengths is None):
+        raise ValueError("Either stops or lengths must be provided!")
+
+    if lengths is None:
+        lengths = stops - starts
+
+    if np.isscalar(starts):
+        starts = np.full(len(stops), starts)
+
+    # Repeat start position index length times and concatenate
+    cat_start = np.repeat(starts, lengths)
+
+    # Create group counter that resets for each start/length
+    cat_counter = np.arange(lengths.sum()) - np.repeat(
+        lengths.cumsum() - lengths, lengths
+    )
+
+    # Add group counter to group specific starts
+    cat_range = cat_start + cat_counter
+
+    return cat_range
+
+def overlap_intervals(starts1, ends1, starts2, ends2, closed=False, sort=False):
+    """
+    Take two sets of intervals and return the indices of pairs of overlapping intervals.
+
+    Parameters
+    ----------
+    starts1, ends1, starts2, ends2 : numpy.ndarray
+        Interval coordinates. Warning: if provided as pandas.Series, indices
+        will be ignored.
+
+    closed : bool
+        If True, then treat intervals as closed and report single-point overlaps.
+    Returns
+    -------
+    overlap_ids : numpy.ndarray
+        An Nx2 array containing the indices of pairs of overlapping intervals.
+        The 1st column contains ids from the 1st set, the 2nd column has ids
+        from the 2nd set.
+
+    """
+
+    for vec in [starts1, ends1, starts2, ends2]:
+        if isinstance(vec, pd.Series):
+            warnings.warn(
+                "One of the inputs is provided as pandas.Series and its index "
+                "will be ignored.",
+                SyntaxWarning,
+            )
+
+    starts1 = np.asarray(starts1)
+    ends1 = np.asarray(ends1)
+    starts2 = np.asarray(starts2)
+    ends2 = np.asarray(ends2)
+
+    # Concatenate intervals lists
+    n1 = len(starts1)
+    n2 = len(starts2)
+    ids1 = np.arange(0, n1)
+    ids2 = np.arange(0, n2)
+
+    # Sort all intervals together
+    order1 = np.lexsort([ends1, starts1])
+    order2 = np.lexsort([ends2, starts2])
+    starts1, ends1, ids1 = starts1[order1], ends1[order1], ids1[order1]
+    starts2, ends2, ids2 = starts2[order2], ends2[order2], ids2[order2]
+
+    # Find interval overlaps
+    match_2in1_starts = np.searchsorted(starts2, starts1, "left")
+    match_2in1_ends = np.searchsorted(starts2, ends1, "right" if closed else "left")
+    # "right" is intentional here to avoid duplication
+    match_1in2_starts = np.searchsorted(starts1, starts2, "right")
+    match_1in2_ends = np.searchsorted(starts1, ends2, "right" if closed else "left")
+
+    # Ignore self-overlaps
+    match_2in1_mask = match_2in1_ends > match_2in1_starts
+    match_1in2_mask = match_1in2_ends > match_1in2_starts
+    match_2in1_starts, match_2in1_ends = (
+        match_2in1_starts[match_2in1_mask],
+        match_2in1_ends[match_2in1_mask],
+    )
+    match_1in2_starts, match_1in2_ends = (
+        match_1in2_starts[match_1in2_mask],
+        match_1in2_ends[match_1in2_mask],
+    )
+
+    # Generate IDs of pairs of overlapping intervals
+    overlap_ids = np.block(
+        [
+            [
+                np.repeat(ids1[match_2in1_mask], match_2in1_ends - match_2in1_starts)[
+                    :, None
+                ],
+                ids2[arange_multi(match_2in1_starts, match_2in1_ends)][:, None],
+            ],
+            [
+                ids1[arange_multi(match_1in2_starts, match_1in2_ends)][:, None],
+                np.repeat(ids2[match_1in2_mask], match_1in2_ends - match_1in2_starts)[
+                    :, None
+                ],
+            ],
+        ]
+    )
+
+    if sort:
+        # Sort overlaps according to the 1st
+        overlap_ids = overlap_ids[np.lexsort([overlap_ids[:, 1], overlap_ids[:, 0]])]
+
+    return overlap_ids
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,3 @@
+numpy
+matplotlib
+scipy
diff --git a/setup.py b/setup.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 import os
 from setuptools import setup
-
+import io
 
 def get_version(path):
     with open(path, "r") as f:
@@ -12,6 +12,14 @@ def get_version(path):
 def read(fname):
     return open(os.path.join(os.path.dirname(__file__), fname)).read()
 
+def _read(*parts, **kwargs):
+    filepath = os.path.join(os.path.dirname(__file__), *parts)
+    encoding = kwargs.pop("encoding", "utf-8")
+    with io.open(filepath, encoding=encoding) as fh:
+        text = fh.read()
+    return text
+
+install_requires = [l for l in _read("requirements.txt").split("\n") if l]
 
 setup(name='adjustText',
       version=get_version(os.path.join(
@@ -27,7 +35,7 @@ def read(fname):
           'Documentation': 'https://adjusttext.readthedocs.io/',
       },
       packages=['adjustText'],
-      install_requires=["numpy", "matplotlib", "bioframe", "scipy"],
+      install_requires=install_requires,
       include_package_data=True,
       long_description=read("README.md"),
       long_description_content_type="text/markdown",