From c4d147cd1d9fb9663feb4271ec4ec87881b9eade Mon Sep 17 00:00:00 2001 From: Phlya Date: Thu, 11 Jan 2024 13:36:35 +0100 Subject: [PATCH] ship necessary bioframe code with the library --- adjustText/__init__.py | 10 +-- adjustText/arrops.py | 147 +++++++++++++++++++++++++++++++++++++++++ requirements.txt | 3 + setup.py | 12 +++- 4 files changed, 165 insertions(+), 7 deletions(-) create mode 100644 adjustText/arrops.py create mode 100644 requirements.txt diff --git a/adjustText/__init__.py b/adjustText/__init__.py index a5a2cc4..d93ac8b 100644 --- a/adjustText/__init__.py +++ b/adjustText/__init__.py @@ -4,7 +4,6 @@ from matplotlib import pyplot as plt from matplotlib.patches import FancyArrowPatch from matplotlib.path import get_path_collection_extents -import bioframe as bf import scipy.spatial.distance import logging from timeit import default_timer as timer @@ -15,6 +14,7 @@ except ImportError: matplot_get_renderer = None +from .arrops import overlap_intervals from ._version import __version__ @@ -140,11 +140,11 @@ def get_2d_coordinates(objs): def get_shifts_texts(coords): N = coords.shape[0] - xoverlaps = bf.core.arrops.overlap_intervals( + xoverlaps = overlap_intervals( coords[:, 0], coords[:, 1], coords[:, 0], coords[:, 1] ) xoverlaps = xoverlaps[xoverlaps[:, 0] != xoverlaps[:, 1]] - yoverlaps = bf.core.arrops.overlap_intervals( + yoverlaps = overlap_intervals( coords[:, 2], coords[:, 3], coords[:, 2], coords[:, 3] ) yoverlaps = yoverlaps[yoverlaps[:, 0] != yoverlaps[:, 1]] @@ -162,10 +162,10 @@ def get_shifts_texts(coords): def get_shifts_extra(coords, extra_coords): N = coords.shape[0] - xoverlaps = bf.core.arrops.overlap_intervals( + xoverlaps = overlap_intervals( coords[:, 0], coords[:, 1], extra_coords[:, 0], extra_coords[:, 1] ) - yoverlaps = bf.core.arrops.overlap_intervals( + yoverlaps = overlap_intervals( coords[:, 2], coords[:, 3], extra_coords[:, 2], extra_coords[:, 3] ) overlaps = yoverlaps[(yoverlaps[:, None] == xoverlaps).all(-1).any(-1)] diff --git a/adjustText/arrops.py b/adjustText/arrops.py new file mode 100644 index 0000000..cc7d503 --- /dev/null +++ b/adjustText/arrops.py @@ -0,0 +1,147 @@ +# Code copied from bioframe.core.arrops +import numpy as np +import pandas as pd +import warnings + +def arange_multi(starts, stops=None, lengths=None): + """ + Create concatenated ranges of integers for multiple start/length. + + Parameters + ---------- + starts : numpy.ndarray + Starts for each range + stops : numpy.ndarray + Stops for each range + lengths : numpy.ndarray + Lengths for each range. Either stops or lengths must be provided. + + Returns + ------- + concat_ranges : numpy.ndarray + Concatenated ranges. + + Notes + ----- + See the following illustrative example: + + starts = np.array([1, 3, 4, 6]) + stops = np.array([1, 5, 7, 6]) + + print arange_multi(starts, lengths) + >>> [3 4 4 5 6] + + From: https://codereview.stackexchange.com/questions/83018/vectorized-numpy-version-of-arange-with-multiple-start-stop + + """ + + if (stops is None) == (lengths is None): + raise ValueError("Either stops or lengths must be provided!") + + if lengths is None: + lengths = stops - starts + + if np.isscalar(starts): + starts = np.full(len(stops), starts) + + # Repeat start position index length times and concatenate + cat_start = np.repeat(starts, lengths) + + # Create group counter that resets for each start/length + cat_counter = np.arange(lengths.sum()) - np.repeat( + lengths.cumsum() - lengths, lengths + ) + + # Add group counter to group specific starts + cat_range = cat_start + cat_counter + + return cat_range + +def overlap_intervals(starts1, ends1, starts2, ends2, closed=False, sort=False): + """ + Take two sets of intervals and return the indices of pairs of overlapping intervals. + + Parameters + ---------- + starts1, ends1, starts2, ends2 : numpy.ndarray + Interval coordinates. Warning: if provided as pandas.Series, indices + will be ignored. + + closed : bool + If True, then treat intervals as closed and report single-point overlaps. + Returns + ------- + overlap_ids : numpy.ndarray + An Nx2 array containing the indices of pairs of overlapping intervals. + The 1st column contains ids from the 1st set, the 2nd column has ids + from the 2nd set. + + """ + + for vec in [starts1, ends1, starts2, ends2]: + if isinstance(vec, pd.Series): + warnings.warn( + "One of the inputs is provided as pandas.Series and its index " + "will be ignored.", + SyntaxWarning, + ) + + starts1 = np.asarray(starts1) + ends1 = np.asarray(ends1) + starts2 = np.asarray(starts2) + ends2 = np.asarray(ends2) + + # Concatenate intervals lists + n1 = len(starts1) + n2 = len(starts2) + ids1 = np.arange(0, n1) + ids2 = np.arange(0, n2) + + # Sort all intervals together + order1 = np.lexsort([ends1, starts1]) + order2 = np.lexsort([ends2, starts2]) + starts1, ends1, ids1 = starts1[order1], ends1[order1], ids1[order1] + starts2, ends2, ids2 = starts2[order2], ends2[order2], ids2[order2] + + # Find interval overlaps + match_2in1_starts = np.searchsorted(starts2, starts1, "left") + match_2in1_ends = np.searchsorted(starts2, ends1, "right" if closed else "left") + # "right" is intentional here to avoid duplication + match_1in2_starts = np.searchsorted(starts1, starts2, "right") + match_1in2_ends = np.searchsorted(starts1, ends2, "right" if closed else "left") + + # Ignore self-overlaps + match_2in1_mask = match_2in1_ends > match_2in1_starts + match_1in2_mask = match_1in2_ends > match_1in2_starts + match_2in1_starts, match_2in1_ends = ( + match_2in1_starts[match_2in1_mask], + match_2in1_ends[match_2in1_mask], + ) + match_1in2_starts, match_1in2_ends = ( + match_1in2_starts[match_1in2_mask], + match_1in2_ends[match_1in2_mask], + ) + + # Generate IDs of pairs of overlapping intervals + overlap_ids = np.block( + [ + [ + np.repeat(ids1[match_2in1_mask], match_2in1_ends - match_2in1_starts)[ + :, None + ], + ids2[arange_multi(match_2in1_starts, match_2in1_ends)][:, None], + ], + [ + ids1[arange_multi(match_1in2_starts, match_1in2_ends)][:, None], + np.repeat(ids2[match_1in2_mask], match_1in2_ends - match_1in2_starts)[ + :, None + ], + ], + ] + ) + + if sort: + # Sort overlaps according to the 1st + overlap_ids = overlap_ids[np.lexsort([overlap_ids[:, 1], overlap_ids[:, 0]])] + + return overlap_ids \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8dd0546 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +numpy +matplotlib +scipy \ No newline at end of file diff --git a/setup.py b/setup.py index 6f961ea..5974678 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import os from setuptools import setup - +import io def get_version(path): with open(path, "r") as f: @@ -12,6 +12,14 @@ def get_version(path): def read(fname): return open(os.path.join(os.path.dirname(__file__), fname)).read() +def _read(*parts, **kwargs): + filepath = os.path.join(os.path.dirname(__file__), *parts) + encoding = kwargs.pop("encoding", "utf-8") + with io.open(filepath, encoding=encoding) as fh: + text = fh.read() + return text + +install_requires = [l for l in _read("requirements.txt").split("\n") if l] setup(name='adjustText', version=get_version(os.path.join( @@ -27,7 +35,7 @@ def read(fname): 'Documentation': 'https://adjusttext.readthedocs.io/', }, packages=['adjustText'], - install_requires=["numpy", "matplotlib", "bioframe", "scipy"], + install_requires=install_requires, include_package_data=True, long_description=read("README.md"), long_description_content_type="text/markdown",