diff --git a/metaloci/VERSION b/metaloci/VERSION index 28af839..08456a4 100644 --- a/metaloci/VERSION +++ b/metaloci/VERSION @@ -1 +1 @@ -0.2.5 \ No newline at end of file +0.2.8 \ No newline at end of file diff --git a/metaloci/plot/plot.py b/metaloci/plot/plot.py index 4653609..bab99ef 100644 --- a/metaloci/plot/plot.py +++ b/metaloci/plot/plot.py @@ -14,13 +14,12 @@ from scipy.ndimage import rotate from scipy.stats import linregress, zscore from shapely.geometry import Point -from shapely.geometry.multipolygon import MultiPolygon from metaloci import mlo from metaloci.misc import misc -def get_kk_plot(mlobject: mlo.MetalociObject, restraints: bool = True): +def get_kk_plot(mlobject: mlo.MetalociObject, restraints: bool = True, neighbourhood: float = None): """ Generate Kamada-Kawai plot from pre-calculated restraints. @@ -36,7 +35,6 @@ def get_kk_plot(mlobject: mlo.MetalociObject, restraints: bool = True): matplotlib.pyplot.figure.Figure Kamada-Kawai layout plot object. """ - PLOTSIZE = 10 POINTSIZE = PLOTSIZE * 5 @@ -48,7 +46,7 @@ def get_kk_plot(mlobject: mlo.MetalociObject, restraints: bool = True): options = {"node_size": 50, "edge_color": "black", "linewidths": 0.1, "width": 0.05} if restraints == True: - + nx.draw( mlobject.kk_graph, mlobject.kk_nodes, @@ -69,8 +67,15 @@ def get_kk_plot(mlobject: mlo.MetalociObject, restraints: bool = True): g.annotate(f" {mlobject.chrom}:{mlobject.start}", (xs[0], ys[0]), size=8) g.annotate(f" {mlobject.chrom}:{mlobject.end}", (xs[len(xs) - 1], ys[len(ys) - 1]), size=8) + poi_x, poi_y = xs[mlobject.poi], ys[mlobject.poi] + + if neighbourhood: + + circle = plt.Circle((poi_x, poi_y), neighbourhood, color="red", fill=False, lw=1, zorder=3) + plt.gca().add_patch(circle) + sns.scatterplot( - x=[xs[mlobject.poi]], y=[ys[mlobject.poi]], s=POINTSIZE * 1.5, ec="lime", fc="none", zorder=3 + x=[poi_x], y=[poi_y], s=POINTSIZE * 1.5, ec="lime", fc="none", zorder=4 ) return kk_plt @@ -207,8 +212,6 @@ def get_gaudi_signal_plot(mlobject: mlo.MetalociObject, lmi_geometry: pd.DataFra matplotlib figure containing the Gaudi signal plot. """ - poi = lmi_geometry.loc[lmi_geometry["moran_index"] == mlobject.poi, "bin_index"].iloc[0] - cmap = "PuOr_r" min_value = lmi_geometry.signal.min() max_value = lmi_geometry.signal.max() @@ -216,8 +219,7 @@ def get_gaudi_signal_plot(mlobject: mlo.MetalociObject, lmi_geometry: pd.DataFra gsp, ax = plt.subplots(figsize=(12, 10), subplot_kw={"aspect": "equal"}) lmi_geometry.plot(column="signal", cmap=cmap, linewidth=2, edgecolor="white", ax=ax) - sns.scatterplot(x=[lmi_geometry.X[poi]], y=[lmi_geometry.Y[poi]], s=50, ec="none", fc="lime") - + sns.scatterplot(x=[lmi_geometry.X[mlobject.poi]], y=[lmi_geometry.Y[mlobject.poi]], s=50, ec="none", fc="lime") sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=min_value, vmax=max_value)) sm.set_array([1, 2, 3, 4]) @@ -262,7 +264,6 @@ def get_gaudi_type_plot( gtp : matplotlib.pyplot.figure.Figure matplotlib figure containing the Gaudi type plot. """ - poi = lmi_geometry.loc[lmi_geometry["moran_index"] == mlobject.poi, "bin_index"].iloc[0] legend_elements = [ Line2D([0], [0], marker="o", color="w", markerfacecolor=colors[1], label="HH", markersize=20), @@ -272,15 +273,15 @@ def get_gaudi_type_plot( ] cmap = LinearSegmentedColormap.from_list("Custom cmap", [colors[nu] for nu in colors], len(colors)) - alpha = [1.0 if quad <= signipval else 0.3 for quad in lmi_geometry.LMI_pvalue] + alpha = [1.0 if pval <= signipval else 0.3 for pval in lmi_geometry.LMI_pvalue] gtp, ax = plt.subplots(figsize=(12, 10), subplot_kw={"aspect": "equal"}) lmi_geometry.plot(column="moran_quadrant", cmap=cmap, alpha=alpha, linewidth=2, edgecolor="white", ax=ax) plt.axis("off") sns.scatterplot( - x=[lmi_geometry.X[poi]], - y=[lmi_geometry.Y[poi]], + x=[lmi_geometry.X[mlobject.poi]], + y=[lmi_geometry.Y[mlobject.poi]], s=50, ec="none", fc="lime", @@ -368,6 +369,94 @@ def get_lmi_scatterplot( return scatter, r_value_scat, p_value_scat +# def signal_bed( +# mlobject: mlo.MetalociObject, +# lmi_geometry: pd.DataFrame, +# neighbourhood: float, +# quadrants: list = [1, 3], +# signipval: float = 0.05, +# ): +# """ +# signal_bed _summary_ + +# Parameters +# ---------- +# mlobject : mlo.MetalociObject +# _description_ +# lmi_geometry : pd.DataFrame +# _description_ +# neighbourhood : float +# _description_ +# quadrants : list, optional +# _description_, by default [1, 3] +# signipval : float, optional +# _description_, by default 0.05 + +# Returns +# ------- +# _type_ +# _description_ +# """ + +# poi_distance = mlobject.kk_distances[mlobject.poi] + +# # Select the polygons that are in the quadrant of interest and are significative. +# ml_indexes = lmi_geometry[ +# (lmi_geometry.moran_quadrant.isin(quadrants)) & (lmi_geometry.LMI_pvalue <= signipval) +# ].bin_index.values.tolist() + +# # Make a big polygon from the small poligons that are significant. +# metalocis = lmi_geometry[lmi_geometry.bin_index.isin(ml_indexes)].unary_union + +# if metalocis and metalocis.geom_type == "Polygon": + +# metalocis = MultiPolygon([metalocis]) # Need a multipolygon in order for the code to work. + +# poi_point = Point( +# (lmi_geometry[lmi_geometry.moran_index == mlobject.poi].X, lmi_geometry[lmi_geometry.moran_index == mlobject.poi].Y) +# ) + +# metalocis_bed = [] + +# bed_data = defaultdict(list) + +# try: + +# for metaloci in metalocis.geoms: + +# metaloci = gpd.GeoSeries(metaloci) + +# if poi_point.within(metaloci[0]): + +# for _, row_ml in lmi_geometry.iterrows(): + +# adjacent_point = Point((row_ml.X, row_ml.Y)) + +# if adjacent_point.within(metaloci[0]): + +# metalocis_bed.append(row_ml.bin_index) + +# # Add close particles +# metalocis_bed.sort() +# close_bins = [i for i, distance in enumerate(poi_distance) if distance <= neighbourhood / 2] +# metalocis_bed = np.sort(list(set(close_bins + metalocis_bed))) + +# for point in metalocis_bed: + +# bed_data["chr"].append(lmi_geometry.bin_chr[point]) +# bed_data["start"].append(lmi_geometry.bin_start[point]) +# bed_data["end"].append(lmi_geometry.bin_end[point]) +# bed_data["bin"].append(point) + +# except: + +# pass + +# bed_data = pd.DataFrame(bed_data) + +# return bed_data, metalocis_bed + + def signal_bed( mlobject: mlo.MetalociObject, @@ -375,6 +464,7 @@ def signal_bed( neighbourhood: float, quadrants: list = [1, 3], signipval: float = 0.05, + metaloci_only: bool = False, ): """ signal_bed _summary_ @@ -391,6 +481,8 @@ def signal_bed( _description_, by default [1, 3] signipval : float, optional _description_, by default 0.05 + metaloci_only : bool, optional + _description_, by default False Returns ------- @@ -398,65 +490,57 @@ def signal_bed( _description_ """ - poi_distance = mlobject.kk_distances[mlobject.poi] + metalocis_bed = [] - # Select the polygons that are in the quadrant of interest and are significative. - ml_indexes = lmi_geometry[ - (lmi_geometry.moran_quadrant.isin(quadrants)) & (lmi_geometry.LMI_pvalue <= signipval) - ].bin_index.values.tolist() + bed_data = defaultdict(list) - # Make a big polygon from the small poligons that are significant. - metalocis = lmi_geometry[lmi_geometry.bin_index.isin(ml_indexes)].unary_union + if metaloci_only == False: - if metalocis and metalocis.geom_type == "Polygon": + for _, row_ml in lmi_geometry.iterrows(): - metalocis = MultiPolygon([metalocis]) # Need a multipolygon in order for the code to work. + if row_ml.LMI_pvalue <= signipval and row_ml.moran_quadrant in quadrants: - poi_point = Point( - (lmi_geometry[lmi_geometry.bin_index == mlobject.poi].X, lmi_geometry[lmi_geometry.bin_index == mlobject.poi].Y) - ) + metalocis_bed.append(row_ml.bin_index) - metalocis_bed = [] + else: - bed_data = defaultdict(list) + poi_row = lmi_geometry[lmi_geometry.bin_index == mlobject.poi].squeeze() - try: + # print(lmi_geometry[lmi_geometry.bin_index == mlobject.poi]) - for metaloci in metalocis.geoms: + if poi_row.LMI_pvalue <= signipval and poi_row.moran_quadrant in quadrants: - metaloci = gpd.GeoSeries(metaloci) + poi_point = Point( + (lmi_geometry[lmi_geometry.bin_index == mlobject.poi].X, lmi_geometry[lmi_geometry.bin_index == mlobject.poi].Y) + ) - if poi_point.within(metaloci[0]): + poi_point = Point(lmi_geometry.loc[lmi_geometry["bin_index"] == mlobject.poi, ["X", "Y"]].iloc[0]) - for _, row_ml in lmi_geometry.iterrows(): + for _, row_ml in lmi_geometry.iterrows(): - adjacent_point = Point((row_ml.X, row_ml.Y)) + adjacent_point = Point((row_ml.X, row_ml.Y)) - if adjacent_point.within(metaloci[0]): + if adjacent_point.distance(poi_point) <= neighbourhood: - metalocis_bed.append(row_ml.bin_index) + metalocis_bed.append(row_ml.bin_index) - # Add close particles - metalocis_bed.sort() - close_bins = [i for i, distance in enumerate(poi_distance) if distance <= neighbourhood / 2] - metalocis_bed = np.sort(list(set(close_bins + metalocis_bed))) + metalocis_bed.sort() - for point in metalocis_bed: + # print(metalocis_bed) - bed_data["chr"].append(lmi_geometry.bin_chr[point]) - bed_data["start"].append(lmi_geometry.bin_start[point]) - bed_data["end"].append(lmi_geometry.bin_end[point]) - bed_data["bin"].append(point) + for point in metalocis_bed: - except: + bed_data["chr"].append(lmi_geometry.bin_chr[point]) + bed_data["start"].append(lmi_geometry.bin_start[point]) + bed_data["end"].append(lmi_geometry.bin_end[point]) + bed_data["bin"].append(point) + bed_data["quadrant"].append(lmi_geometry.moran_quadrant[point]) - pass bed_data = pd.DataFrame(bed_data) return bed_data, metalocis_bed - def signal_plot(mlobject: mlo.MetalociObject, lmi_geometry: pd.DataFrame, metalocis, bins_sig, coords_sig): sig_plt = plt.figure(figsize=(10, 1.5)) diff --git a/metaloci/spatial_stats/lmi.py b/metaloci/spatial_stats/lmi.py index 68f8dae..670b0cd 100644 --- a/metaloci/spatial_stats/lmi.py +++ b/metaloci/spatial_stats/lmi.py @@ -18,7 +18,6 @@ from metaloci import mlo from metaloci.misc import misc - warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning) @@ -85,8 +84,8 @@ def construct_voronoi(mlobject: mlo.MetalociObject, buffer: float): df_geometry["bin_index"].append(x) df_geometry["moran_index"].append(i) - df_geometry["X"].append(mlobject.kk_coords[i][0]) - df_geometry["Y"].append(mlobject.kk_coords[i][1]) + df_geometry["X"].append(mlobject.kk_coords[x][0]) + df_geometry["Y"].append(mlobject.kk_coords[x][1]) df_geometry["geometry"].append(geometry_data.loc[i, "geometry"]) df_geometry = pd.DataFrame(df_geometry) @@ -197,9 +196,16 @@ def load_region_signals(mlobject: mlo.MetalociObject, signal_data: dict, signal_ """ # Read signal file. Will only process the signals present in this list. - with open(signal_file) as signals_handler: - signal_types = [line.rstrip() for line in signals_handler] + if os.path.isfile(signal_file): + + with open(signal_file) as signals_handler: + + signal_types = [line.rstrip() for line in signals_handler] + + else: + + signal_types = [signal_file] # region_signal = signal_data[mlobject.chrom][ # (signal_data[mlobject.chrom]["start"] >= int(mlobject.start / mlobject.resolution) * mlobject.resolution) diff --git a/metaloci/tools/figure.py b/metaloci/tools/figure.py index 2a61c07..a8a2fd1 100644 --- a/metaloci/tools/figure.py +++ b/metaloci/tools/figure.py @@ -5,7 +5,7 @@ import pathlib import pickle import re -from argparse import (ArgumentParser, HelpFormatter, RawTextHelpFormatter) +from argparse import HelpFormatter from datetime import timedelta from time import time @@ -75,11 +75,13 @@ def populate_args(parser): ) optional_arg.add_argument( - "-l", - "--log", - dest="log", + "-m", + "--metalocis", + dest="metalocis", action="store_true", - help="Flag to write bed with metaloci, for each region.", + help="Flag to select highlightning of the signal plots. If True, only the neighbouring bins from the point of interest will be " + "highlighted (independently of the quadrant and significance of those bins, but only if the point of interest is significant). " + "If False, all significant regions that correspond to the quadrant selected with -q will be highlighted (default: False).", ) optional_arg.add_argument( @@ -125,7 +127,7 @@ def run(opts): work_dir = opts.work_dir regions = opts.regions signals = opts.signals - metaloci_bed = opts.log + metaloci_bed = opts.metalocis quadrants = opts.quart signipval = opts.signipval rmtypes = opts.rm_types @@ -270,6 +272,7 @@ def run(opts): buffer * BFACT, quadrants, signipval, + metaloci_bed ) # selmetaloci = [] @@ -314,7 +317,7 @@ def run(opts): yticks_signal = ax.get_yticks()[1:-1] max_chr_yax = max(len(str(max(yticks_signal))), len(str(min(yticks_signal)))) - signal_left = {3 : 45, 4 : 30, 5 : 20, 6 : 9} + signal_left = {3 : 41, 4 : 30, 5 : 20, 6 : 9} if max_chr_yax not in signal_left.keys(): @@ -338,14 +341,14 @@ def run(opts): plt.close() print(f"\t\tFinal composite figure for region '{region}' and signal '{signal}' -> done.") - if metaloci_bed and len(bed_data) > 0: + if len(bed_data) > 0: metaloci_bed_path = f"{work_dir}{mlobject.chrom}/metalocis_log/{signal}" pathlib.Path(metaloci_bed_path).mkdir(parents=True, exist_ok=True) - bed_data.to_csv(f"{metaloci_bed_path}/{mlobject.region}_{mlobject.poi}_{signal}_metalocis.txt", sep="\t", index=False) + bed_data.to_csv(f"{metaloci_bed_path}/{mlobject.region}_{mlobject.poi}_{signal}_metalocis.bed", sep="\t", index=False) print(f"\t\tBed file with metalocis location saved to: " - f"{metaloci_bed_path}/{mlobject.region}_{mlobject.poi}_{signal}.txt") + f"{metaloci_bed_path}/{mlobject.region}_{mlobject.poi}_{signal}.bed") # Log for signal_key, df in mlobject.lmi_info.items(): @@ -392,6 +395,6 @@ def run(opts): os.remove(f"{plot_filename}_gsp.{ext}") os.remove(f"{plot_filename}_gtp.{ext}") - print(f"\nInformation saved to {os.path.join(work_dir, 'moran_info.txt')}") + print(f"\nInformation saved to: '{os.path.join(work_dir, 'moran_info.txt')}'") print(f"\nTotal time spent: {timedelta(seconds=round(time() - start_timer))}") print("\nall done.") diff --git a/metaloci/tools/layout.py b/metaloci/tools/layout.py index 4724b5a..66c5a14 100644 --- a/metaloci/tools/layout.py +++ b/metaloci/tools/layout.py @@ -237,16 +237,28 @@ def get_region_layout(row, opts, progress=None, silent: bool = True): if silent == False: print(f"\tPlotting Kamada-Kawai...") pathlib.Path(os.path.join(work_dir, region_chrom), "plots", "KK").mkdir(parents=True, exist_ok=True) + pathlib.Path(os.path.join(work_dir, region_chrom), "plots", "mixed_matrices").mkdir( + parents=True, exist_ok=True + ) - kk_plt = plot.get_kk_plot(mlobject) - - fig_save_path = os.path.join( + plot.get_kk_plot(mlobject).savefig(os.path.join( work_dir, region_chrom, f"plots/KK/{region_coords}_" f"{mlobject.kk_cutoff}_KK.pdf", + ), + dpi=300) + + plt.close() + + plot.mixed_matrices_plot(mlobject).savefig( + os.path.join( + work_dir, + region_chrom, + f"plots/mixed_matrices/{region_coords}_" f"{mlobject.kk_cutoff}_mixed-matrices.pdf", + ), + dpi=300, ) - kk_plt.savefig(fig_save_path, dpi=300) plt.close() if progress is not None: progress["plots"] = True @@ -306,24 +318,6 @@ def get_region_layout(row, opts, progress=None, silent: bool = True): # Get submatrix of restraints restraints_matrix, mlobject = kk.get_restraints_matrix(mlobject) - if save_plots: - - mixed_matrices_plot = plot.mixed_matrices_plot(mlobject) - - pathlib.Path(os.path.join(work_dir, region_chrom), "plots", "mixed_matrices").mkdir( - parents=True, exist_ok=True - ) - - mixed_matrices_plot.savefig( - os.path.join( - work_dir, - region_chrom, - f"plots/mixed_matrices/{region_coords}_" f"{mlobject.kk_cutoff}_mixed-matrices.pdf", - ), - dpi=300, - ) - - plt.close() if silent == False: @@ -336,17 +330,31 @@ def get_region_layout(row, opts, progress=None, silent: bool = True): if len(cutoffs) > 1 or save_plots: - pathlib.Path(os.path.join(work_dir, region_chrom), "plots", "KK").mkdir(parents=True, exist_ok=True) + if silent == False: print(f"\tPlotting Kamada-Kawai...") - kk_plt = plot.get_kk_plot(mlobject) + pathlib.Path(os.path.join(work_dir, region_chrom), "plots", "KK").mkdir(parents=True, exist_ok=True) + pathlib.Path(os.path.join(work_dir, region_chrom), "plots", "mixed_matrices").mkdir( + parents=True, exist_ok=True + ) - fig_save_path = os.path.join( + plot.get_kk_plot(mlobject).savefig(os.path.join( work_dir, region_chrom, f"plots/KK/{region_coords}_" f"{mlobject.kk_cutoff}_KK.pdf", + ), + dpi=300) + + plt.close() + + plot.mixed_matrices_plot(mlobject).savefig( + os.path.join( + work_dir, + region_chrom, + f"plots/mixed_matrices/{region_coords}_" f"{mlobject.kk_cutoff}_mixed-matrices.pdf", + ), + dpi=300, ) - kk_plt.savefig(fig_save_path, dpi=300) plt.close() if progress is not None: progress["plots"] = True @@ -359,8 +367,8 @@ def get_region_layout(row, opts, progress=None, silent: bool = True): if silent == False: print( - f"\tKamada-Kawai layout of region {mlobject.region} " - f"at {int(cutoff * 100)} % cutoff saved to file: {mlobject.save_path}" + f"\tKamada-Kawai layout of region '{mlobject.region}' " + f"at {int(cutoff * 100)} % cutoff saved to file: '{mlobject.save_path}'" ) # Write to file a list of bad regions, according to the filters defined in clean_matrix(). diff --git a/metaloci/tools/ml.py b/metaloci/tools/ml.py index 5b070a5..d76acf3 100644 --- a/metaloci/tools/ml.py +++ b/metaloci/tools/ml.py @@ -3,13 +3,13 @@ """ import multiprocessing as mp import os +import pathlib import pickle import re import subprocess as sp from argparse import HelpFormatter from datetime import timedelta from time import time -import pathlib import pandas as pd @@ -80,11 +80,11 @@ def populate_args(parser): ) optional_arg.add_argument( - "-l", - "--log", - dest="log", + "-i", + "--info", + dest="info", action="store_true", - help="Flag to unpickle LMI log.", + help="Flag to unpickle LMI info.", ) optional_arg.add_argument( @@ -114,7 +114,7 @@ def get_lmi(region_iter, opts, signal_data, progress=None, i=None, silent: bool regions = opts.region_file n_permutations = opts.perms signipval = opts.signipval - moran_log = opts.log + moran_info = opts.info force = opts.force if not work_dir.endswith("/"): @@ -184,7 +184,7 @@ def get_lmi(region_iter, opts, signal_data, progress=None, i=None, silent: bool if progress is not None: progress["done"] = True - if moran_log: + if moran_info: for signal, df in mlobject.lmi_info.items(): @@ -229,7 +229,7 @@ def get_lmi(region_iter, opts, signal_data, progress=None, i=None, silent: bool mlobject.save(hamlo_namendle) - if moran_log: + if moran_info: for signal, df in mlobject.lmi_info.items(): @@ -260,7 +260,7 @@ def run(opts): regions = opts.region_file multiprocess = opts.multiprocess cores = opts.threads - moran_log = opts.log + moran_info = opts.info if opts.threads is None: @@ -305,15 +305,15 @@ def run(opts): print("\tSome regions had already been computed and have been skipped.", end="") - if moran_log: + if moran_info: - print(f"\n\tLog saved to: {work_dir}chrN/moran_log", end="") + print(f"\n\tLog saved to: '{work_dir}chr/moran_log/'", end="") except Exception: if progress["missing_signal"] is not None: - print(f"\tSignal {progress['missing_signal']} is in the signal list but has not been processed with prep.\n" + print(f"\tSignal '{progress['missing_signal']}' is in the signal list but has not been processed with prep.\n" "\tprocess that signal or remove it from the signal list.\nExiting...") pool.close() diff --git a/metaloci/tools/prep.py b/metaloci/tools/prep.py index e379296..11623b5 100644 --- a/metaloci/tools/prep.py +++ b/metaloci/tools/prep.py @@ -7,14 +7,15 @@ import subprocess as sp import warnings from argparse import HelpFormatter -from pybedtools import BedTool -import pandas as pd -from metaloci.misc import misc -from numba.core.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning -import warnings import h5py import hicstraw +import pandas as pd +from numba.core.errors import (NumbaDeprecationWarning, + NumbaPendingDeprecationWarning) +from pybedtools import BedTool + +from metaloci.misc import misc warnings.simplefilter('ignore', category=NumbaDeprecationWarning) warnings.simplefilter('ignore', category=NumbaPendingDeprecationWarning) diff --git a/setup.py b/setup.py index 34b08f2..db5cf98 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,10 @@ def read_requirements(path): long_description_content_type="text/markdown", author="Leo Zuber, Iago Maceda, Juan Antonio Rodríguez and Marc Martí-Renom", author_email="martirenom@cnag.eu", + classifiers=["Programming Language :: Python :: 3.9", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Operating System :: POSIX :: Linux",], packages=find_packages(exclude=["tests", ".github"]), install_requires=read_requirements("requirements.txt"), entry_points={