diff --git a/docs/conf.py b/docs/conf.py index 462b62a..a9175a4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,7 +11,9 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os +import sys +import os +import pybdm # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -22,8 +24,6 @@ parent = os.path.dirname(cwd) sys.path.insert(0, parent) -import pybdm - # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. @@ -40,6 +40,8 @@ 'sphinxcontrib.bibtex' ] +# Bibtex reference directory +bibtex_bibfiles = ['references.bib'] # Napoleon settings napoleon_google_docstring = False @@ -74,8 +76,8 @@ master_doc = 'index' # General information about the project. -project = u'PyBDM' -copyright = u'2019, Szymon Talaga' +project = 'PyBDM' +copyright = '2019, Szymon Talaga' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -157,7 +159,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +# html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. @@ -220,8 +222,8 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'pybdm.tex', u'PyBDM Documentation', - u'Szymon Talaga', 'manual'), + ('index', 'pybdm.tex', 'PyBDM Documentation', + 'Szymon Talaga', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of @@ -250,8 +252,8 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ - ('index', 'pybdm', u'PyBDM Documentation', - [u'Szymon Talaga'], 1) + ('index', 'pybdm', 'PyBDM Documentation', + ['Szymon Talaga'], 1) ] # If true, show URL addresses after external links. @@ -264,8 +266,8 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'pybdm', u'PyBDM Documentation', - u'Szymon Talaga', 'pybdm', 'One line description of project.', + ('index', 'pybdm', 'PyBDM Documentation', + 'Szymon Talaga', 'pybdm', 'One line description of project.', 'Miscellaneous'), ] diff --git a/docs/roadmap.rst b/docs/roadmap.rst index f080afa..237df74 100644 --- a/docs/roadmap.rst +++ b/docs/roadmap.rst @@ -9,7 +9,6 @@ Next major release * Perturbation experiment for growing/shrinking systems * Implement Bayesian framework for approximating probability of a stochastic generating source -* Add a partition algorithm with the periodic boundary condition * Use integer-based conding of dataset blocks (to lower memory-footprint). This will be done only if it will be possible to use integer coding without significantly negative impact on the performance. diff --git a/docs/theory.rst b/docs/theory.rst index 11c1633..c136828 100644 --- a/docs/theory.rst +++ b/docs/theory.rst @@ -187,9 +187,48 @@ until its rightmost column or lowest row contain the values on the boundary of the original matrix. The condition can yield different results for small objects, but are consistent -asymptotically in the limit of large object sizes. Detailed discussion of -boundary conditions in BDM can be found in :cite:`zenil_decomposition_2018`. +asymptotically in the limit of large object sizes. +If the periodic condition was used, the missing columns would be supplied by +wrapping around the first column up to the required missing columns and this is +the same for rows, we would get four slices: + ++--+--+--+ +|1 |2 |3 | ++--+--+--+ +|6 |7 |8 | ++--+--+--+ +|11|12|13| ++--+--+--+ + ++--+--+--+ +|4 |5 |1 | ++--+--+--+ +|9 |10|6 | ++--+--+--+ +|14|15|11| ++--+--+--+ + ++--+--+--+ +|16|17|18| ++--+--+--+ +|21|22|23| ++--+--+--+ +|1 |2 |3 | ++--+--+--+ + ++--+--+--+ +|19|20|16| ++--+--+--+ +|24|25|21| ++--+--+--+ +|4 |5 |1 | ++--+--+--+ + +This ensures that the matrices have equal sizes when computing the BDM. + +Detailed discussion of boundary conditions in BDM can be found +in :cite:`zenil_decomposition_2018`. Normalized BDM -------------- diff --git a/docs/usage.rst b/docs/usage.rst index 58537a5..a2959e3 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -14,13 +14,13 @@ Different boundary conditions (see :doc:`/theory`) are implemented by .. code-block:: python from pybdm import BDM - from pybdm import PartitionIgnore, PartitionRecursive, PartitionCorrelated + from pybdm import PartitionIgnore, PartitionRecursive, PartitionCorrelated, PartitionPeriodic bdm_ignore = BDM(ndim=1, partition=PartitionIgnore) # This is default so it is equivalent to bdm_ignore = BDM(ndim=1) - bdm_recurisve = BDM(ndim=1, partition=PartitionRecursive, min_length=2) + bdm_recursive = BDM(ndim=1, partition=PartitionRecursive, min_length=2) # Minimum size is specified as length, since only symmetric slices # are accepted in the case of multidimensional objects. @@ -28,6 +28,10 @@ Different boundary conditions (see :doc:`/theory`) are implemented by # Step-size defaults to 1, so this is equivalent to bdm_correlated = BDM(ndim=1, partition=PartitionCorrelated, shift=1) + bdm_periodic = BDM(ndim=1, partition=PartitionPeriodic) + # This is similar to PartitionIgnore but the dataset is extended + # depending on the shape of the partition + Normalized BDM -------------- diff --git a/pybdm/algorithms.py b/pybdm/algorithms.py index 694440e..9e2a113 100644 --- a/pybdm/algorithms.py +++ b/pybdm/algorithms.py @@ -1,8 +1,46 @@ """Algorithms based on ``BDM`` objects.""" from itertools import product +from dataclasses import dataclass from random import choice import numpy as np +@dataclass +class DeconvolutionResult: + """A deconvolution result data class. + + The deconvolution of a graph results in producing information signatures + of each edge and their differences when sorted from maximum information + signature to minimum. Using an auxilliary cutoff, the edges for deletion + can be determined if the difference of one edge from another is greater + than auxilliary cutoff + log(2) (theoretically). + + Attributes: + auxiliary_cutoff (float): + The cutoff value used to determine which edges are to be deleted. + info_loss_values (np.array): + A *Numpy* array containing the information signature loss of + each edge. This is retrieved when an edge is perturbed. + info_loss_edges (np.array): + A *Numpy* array containing the edges of each loss value in + ``info_loss_values``. The ith edge in this list corresponds to the + ith information loss value in ``info_loss_values``. + differences (np.array): + A *Numpy* array that is produced after sorting the ``info_loss_values`` + from maximum to minimum, these are the differences of the ith + information loss value minus the i+1th information loss value. + edges_for_deletion (np.array): + A *Numpy* array that is produced based on the ``auxiliary_cutoff`` + and if the differences of edges are greater than this cuttoff. + difference_filter (np.array): + A *Numpy* array containing ``True`` or ``False`` if the ith edge + in ``info_loss_edges`` is included in ``edges_for_deletion`` + """ + auxiliary_cutoff: float + info_loss_values: np.array + info_loss_edges: np.array + differences : np.array + edges_for_deletion: np.array + difference_filter: np.array class PerturbationExperiment: """Perturbation experiment class. @@ -68,7 +106,7 @@ def __init__(self, bdm, X=None, metric='bdm'): def __repr__(self): cn = self.__class__.__name__ bdm = str(self.bdm)[1:-1] - return "<{}(metric={}) with {}>".format(cn, self.metric, bdm) + return f"<{cn}(metric={self.metric}) with {bdm}>" @property def size(self): @@ -207,7 +245,7 @@ def perturb(self, idx, value=-1, keep_changes=False): >>> X = np.ones((30, ), dtype=int) >>> perturbation = PerturbationExperiment(bdm, X) >>> perturbation.perturb((10, ), -1) # doctest: +FLOAT_CMP - 26.91763012739709 + np.float64(26.91763012739709) """ old_value = self.X[idx] if value < 0: @@ -258,7 +296,7 @@ def run(self, idx=None, values=None, keep_changes=False): """ if idx is None: indexes = [ range(k) for k in self.X.shape ] - idx = np.array([ x for x in product(*indexes) ], dtype=int) + idx = np.array(list(product(*indexes)), dtype=int) if values is None: values = np.full((idx.shape[0], ), -1, dtype=int) return np.apply_along_axis( @@ -266,3 +304,125 @@ def run(self, idx=None, values=None, keep_changes=False): axis=1, arr=np.column_stack((idx, values)) ) + + def _sort_info_loss_values(self, info_loss_values, info_loss_edges): + + sorted_values = np.argsort(-info_loss_values[:,0]) + info_loss_values = info_loss_values[sorted_values] + info_loss_edges = info_loss_edges[sorted_values] + + return info_loss_values, info_loss_edges + + def _compute_differences(self, info_loss_values): + return np.diff(info_loss_values[:, -1]) * -1 + + def _filter_by_differences(self, auxiliary_cutoff, info_loss_edges, differences, is_directed): + + difference_filter = list(np.isin( + np.arange(len(differences)), + np.where(abs(differences - np.log2(2)) > auxiliary_cutoff) + )) + difference_filter.extend([False]) + + edges_for_deletion = info_loss_edges[difference_filter] + + if not is_directed: + edges_for_deletion = np.array([*edges_for_deletion, *edges_for_deletion[:, [1,0]]], dtype=int) + + return edges_for_deletion, difference_filter + + def _process_deconvolution(self, auxiliary_cutoff, info_loss_values, info_loss_edges, is_directed): + + info_loss_values, info_loss_edges = self._sort_info_loss_values(info_loss_values, info_loss_edges) + differences = self._compute_differences(info_loss_values) + edges_for_deletion, difference_filter = self._filter_by_differences( + auxiliary_cutoff, info_loss_edges, differences, is_directed + ) + + return DeconvolutionResult( + auxiliary_cutoff, info_loss_values, info_loss_edges, + differences, edges_for_deletion, difference_filter + ) + + def deconvolve(self, auxiliary_cutoff, is_directed=False, keep_changes=False): + """Run causal deconvolution. + + Parameters + ---------- + auxiliary_cutoff : float + Value to be used as the cutoff when cutting edges + based on their information signature differences. + is_directed : bool + If ``True`` then considers the dataset to be a directed + graph and thus retrieves the information signatures of all + edges. + If ``False`` then considers the dataset to be an undirected + graph and thus considers edges (i,j) and (j,i) the same when + retrieving the information signatures of each edge. + keep_changes: bool + If ``True`` then changes in the dataset are persistent, + so all the edges that have been cut will be applied to the dataset. + + Returns + ------- + DeconvolutionResult + a dataclass that contains different values for evaluation. + + Examples + -------- + >>> from pybdm import BDM + >>> bdm = BDM(ndim=2,shape=(4,4)) + >>> X = np.array([[0, 1, 1, 1, 1, 0, 0, 0], + ... [1, 0, 1, 1, 0, 0, 0, 0], + ... [1, 1, 0, 1, 0, 0, 0, 0], + ... [1, 1, 1, 0, 0, 0, 0, 0], + ... [1, 0, 0, 0, 0, 1, 1, 1], + ... [0, 0, 0, 0, 1, 0, 1, 1], + ... [0, 0, 0, 0, 1, 1, 0, 1], + ... [0, 0, 0, 0, 1, 1, 1, 0]]) + >>> perturbation = PerturbationExperiment(bdm, X) + >>> result = perturbation.deconvolve(auxiliary_cutoff=20, is_directed=False) + >>> result.edges_for_deletion + array([[0, 4], [4, 0]]) + """ + if self.X.ndim != 2: + raise ValueError('Deconvolution is only supported for 2D datasets') + + info_loss_values = np.empty((0, 1), dtype=float) + info_loss_edges = np.empty((0, 2), dtype=int) + deleted_edge_graph = np.copy(self.X) + + nonzero_edges = deleted_edge_graph if is_directed else np.triu(deleted_edge_graph) + nonzero_edges = np.column_stack(np.nonzero(nonzero_edges)) + original_bdm = self.bdm.bdm(self.X) + + for edge in nonzero_edges: + + edges_to_perturb = ((edge[0], edge[1])) if is_directed else (edge, edge[::-1]) + + deleted_edge_graph[edges_to_perturb] = 0 + + deleted_edge_bdm = self.bdm.bdm(deleted_edge_graph) + info_loss = original_bdm - deleted_edge_bdm + + info_loss_edges = np.vstack((info_loss_edges, np.array([edge]))) + info_loss_values = np.vstack((info_loss_values, np.array([info_loss]))) + + deleted_edge_graph[edges_to_perturb] = 1 + + deconvolution_result = self._process_deconvolution( + auxiliary_cutoff, info_loss_values, info_loss_edges, is_directed + ) + + if deconvolution_result.edges_for_deletion.size == 0: + return deconvolution_result + + if not keep_changes: + deleted_edge_graph[ + deconvolution_result.edges_for_deletion[:,0], + deconvolution_result.edges_for_deletion[:,1] + ] = 0 + return deconvolution_result + + self.run(idx=deconvolution_result.edges_for_deletion,keep_changes=keep_changes) + return deconvolution_result diff --git a/pybdm/bdm.py b/pybdm/bdm.py index 8bc6939..40ea3cb 100644 --- a/pybdm/bdm.py +++ b/pybdm/bdm.py @@ -129,21 +129,19 @@ def __init__(self, ndim, nsymbols=2, shape=None, partition=PartitionIgnore, self.ndim = ndim try: self.ctmname = ctmname if ctmname else self._ndim_to_ctm[(ndim, nsymbols)] - except KeyError: - msg = "no CTM dataset for 'ndim={}' and 'nsymbols={}'".format( - ndim, nsymbols - ) - raise CTMDatasetNotFoundError(msg) + except KeyError as ke: + msg = f"no CTM dataset for 'ndim={ndim}' and 'nsymbols={nsymbols}'" + raise CTMDatasetNotFoundError(msg) from ke try: nsymbols, _shape = self.ctmname.split('-')[-2:] - except ValueError: + except ValueError as ve: msg = "incorrect 'ctmname'; it should be in format " + \ "'name-b-d'" - raise BDMConfigurationError(msg) + raise BDMConfigurationError(msg) from ve self.nsymbols = int(nsymbols[1:]) if shape is None: shape = tuple(int(x) for x in _shape[1:].split('x')) - if any([ x != shape[0] for x in shape ]): + if any( x != shape[0] for x in shape ): raise BDMConfigurationError("'shape' has to be equal in each dimension") ctm, ctm_missing = get_ctm_dataset(self.ctmname) self._ctm = ctm @@ -155,9 +153,7 @@ def __init__(self, ndim, nsymbols=2, shape=None, partition=PartitionIgnore, def __repr__(self): partition = str(self.partition)[1:-1] cn = self.__class__.__name__ - return "<{}(ndim={}, nsymbols={}) with {}>".format( - cn, self.ndim, self.nsymbols, partition - ) + return f"<{cn}(ndim={self.ndim}, nsymbols={self.nsymbols}) with {partition}>" def decompose(self, X): """Decompose a dataset into blocks. @@ -240,9 +236,7 @@ def lookup(self, blocks, lookup_ctm=True): except KeyError: cmx = self._ctm_missing[sh] if self.warn_if_missing_ctm and options.get('warn_if_missing_ctm'): - msg = "CTM dataset does not contain object '{}' of shape {}".format( - key, sh - ) + msg = f"CTM dataset does not contain object '{key}' of shape {sh}" warnings.warn(msg, BDMRuntimeWarning, stacklevel=2) yield key, cmx @@ -385,9 +379,8 @@ def bdm(self, X, normalized=False, check_data=True): self._check_data(X) if normalized and isinstance(self.partition, PartitionCorrelated): raise NotImplementedError( - "normalized BDM not implemented for '{}' partition".format( - PartitionCorrelated.name - )) + f"normalized BDM not implemented for '{PartitionCorrelated.name}' partition" + ) counter = self.decompose_and_count(X) cmx = self.compute_bdm(counter) if self.raise_if_zero and options.get('raise_if_zero') and cmx == 0: @@ -429,7 +422,7 @@ def compute_ent(self, *counters): >>> c1 = Counter([('111111111111', 1.95207842085224e-08)]) >>> c2 = Counter([('000000000000', 1.95207842085224e-08)]) >>> bdm.compute_ent(c1, c2) # doctest: +FLOAT_CMP - 1.0 + np.float64(1.0) """ counter = reduce(lambda x, y: x+y, counters) ncounts = sum(counter.values()) @@ -474,15 +467,14 @@ def ent(self, X, normalized=False, check_data=True): >>> import numpy as np >>> bdm = BDM(ndim=2) >>> bdm.ent(np.ones((12, 12), dtype=int)) # doctest: +FLOAT_CMP - 0.0 + np.float64(0.0) """ if check_data: self._check_data(X) if normalized and isinstance(self.partition, PartitionCorrelated): raise NotImplementedError( - "normalized entropy not implemented for '{}' partition".format( - PartitionCorrelated.name - )) + f"normalized entropy not implemented for '{PartitionCorrelated.name}' partition" + ) counter = self.decompose_and_count(X, lookup_ctm=False) ent = self.compute_ent(counter) if normalized: @@ -511,16 +503,14 @@ def _check_data(self, X): raise TypeError("'X' has to be an integer array") symbols = np.unique(X) if symbols.size > self.nsymbols: - raise ValueError("'X' has more than {} unique symbols".format( - self.nsymbols - )) - valid_symbols = np.array([ _ for _ in range(self.nsymbols) ]) + raise ValueError(f"'X' has more than {self.nsymbols} unique symbols") + valid_symbols = np.array(list(range(self.nsymbols))) bad_symbols = symbols[~np.isin(symbols, valid_symbols)] if bad_symbols.size > 0: - raise ValueError("'X' contains symbols outside of [0, {}]: {}".format( - str(self.nsymbols-1), - ", ".join(str(s) for s in bad_symbols) - )) + bad_symbols_str = ", ".join(str(s) for s in bad_symbols) + raise ValueError( + f"'X' contains symbols outside of [0, {str(self.nsymbols-1)}]: {bad_symbols_str}" + ) def _cycle_parts(self, shape): """Cycle over all possible dataset parts sorted by complexity.""" diff --git a/pybdm/encoding.py b/pybdm/encoding.py index 818ece4..0b75060 100644 --- a/pybdm/encoding.py +++ b/pybdm/encoding.py @@ -101,19 +101,19 @@ def encode_sequence(seq, base=2): Examples -------- >>> encode_sequence(np.array([1, 0, 0])) - 4 + np.int64(4) """ if seq.size == 0: return 0 if seq.ndim != 1: raise AttributeError("'seq' has to be a 1D array") - if seq.dtype != np.int: + if seq.dtype != np.int_: raise TypeError("'seq' has to be of integer dtype") if not (seq >= 0).all(): raise ValueError("'seq' has to conisist of non-negative integers") proper_values = np.arange(base) if not np.isin(seq, proper_values).all(): - raise ValueError("There are symbol codes greater than {}".format(base-1)) + raise ValueError(f"There are symbol codes greater than {base-1}") code = 0 for i, x in enumerate(reversed(seq)): if x > 0: @@ -196,7 +196,7 @@ def decode_array(code, shape, base=2, **kwds): length = prod(shape) seq = decode_sequence(code, base=base, min_length=length) if seq.size > length: - raise ValueError("{} does not encode array of shape {}".format(code, shape)) + raise ValueError(f"{code} does not encode array of shape {shape}") arr = seq.reshape(shape, **kwds) return arr diff --git a/pybdm/options.py b/pybdm/options.py index 73c0ea8..6cdb0e2 100644 --- a/pybdm/options.py +++ b/pybdm/options.py @@ -59,5 +59,5 @@ def get(name=None): return _options.copy() try: return _options[name] - except KeyError: - raise KeyError("there is no '{}' option".format(name)) + except KeyError as ke: + raise KeyError(f"there is no '{name}' option") from ke diff --git a/pybdm/partitions.py b/pybdm/partitions.py index 7495c8d..45514e5 100644 --- a/pybdm/partitions.py +++ b/pybdm/partitions.py @@ -9,9 +9,9 @@ so it is well-specified what approach exactly is to be used. """ # pylint: disable=unused-argument +import numpy as np from .utils import decompose_dataset, iter_part_shapes - class _Partition: """Partition algorithm base class. @@ -28,11 +28,11 @@ def __init__(self, shape): def __repr__(self): cn = self.__class__.__name__ - return "<{}({})>".format(cn, ", ".join(self.params)) + return f"<{cn}({', '.join(self.params)})>" @property def params(self): - return [ "shape={}".format(self.shape) ] + return [ "shape={self.shape}" ] def decompose(self, X): """Decompose a dataset into blocks. @@ -48,7 +48,7 @@ def decompose(self, X): Dataset blocks. """ cn = self.__class__.__name__ - raise NotImplementedError("'{}' is not meant for a direct use".format(cn)) + raise NotImplementedError(f"'{cn}' is not meant for a direct use") def _iter_shapes(self, X): yield from iter_part_shapes(X, shape=self.shape, shift=0) @@ -112,7 +112,7 @@ def __init__(self, shape, shift=1): @property def params(self): - return super().params + [ "shift={}".format(self.shift) ] + return super().params + [ f"shift={self.shift}" ] def decompose(self, X): """Decompose with the 'correlated' boundary. @@ -155,7 +155,7 @@ def __init__(self, shape, min_length=2): @property def params(self): - return super().params + [ "min_length={}".format(self.min_length) ] + return super().params + [ f"min_length={self.min_length}" ] def _decompose(self, X, shape): for part in decompose_dataset(X, shape=shape, shift=0): @@ -174,3 +174,89 @@ def decompose(self, X): .. automethod:: _Partition.decompose """ yield from self._decompose(X, shape=self.shape) + +class PartitionPeriodic(PartitionIgnore): + """Partition with the 'periodic' boundary condition. + Attributes + ---------- + shape : tuple + Part shape. + Notes + ----- + See :doc:`theory` for a detailed description. + """ + + name = 'periodic' + + def _extend_2d_dataset(self, X): + + row_multiplier = 0 + col_multipler = 0 + + dataset_row_size = X.shape[0] + dataset_col_size = X.shape[1] + + block_row_size = self.shape[0] + block_col_size = self.shape[1] + + row_idx = block_row_size - (dataset_row_size % block_row_size) + row_idx = row_idx % block_row_size + + col_idx = block_col_size - (dataset_col_size % block_col_size) + col_idx = col_idx % block_col_size + + if dataset_row_size < row_idx: + row_multiplier = row_idx // dataset_row_size + row_idx = dataset_row_size + + if dataset_col_size < col_idx: + col_multipler = col_idx // dataset_col_size + col_idx = col_idx % dataset_col_size + + periodic_cols = np.hstack(( + np.tile(X, col_multipler), + X[:,:col_idx] + )) + periodic_rows = np.hstack(( + X[:row_idx, :], + np.tile(X[:row_idx, :], row_multiplier), + X[:row_idx, :col_idx] + )) + + extended_dataset = np.hstack((X, periodic_cols)) + extended_dataset = np.vstack((extended_dataset, periodic_rows)) + + return extended_dataset + + def _extend_1d_dataset(self, X): + + multiplier = 0 + dataset_size = X.shape[0] + block_size = self.shape[0] + + row_idx = block_size - (dataset_size % block_size) + row_idx = row_idx % block_size + + if dataset_size < row_idx: + multiplier = row_idx // dataset_size + row_idx = row_idx % dataset_size + + extended_dataset = np.hstack(( + X, np.tile(X, multiplier), X[:row_idx] + )) + + return extended_dataset + + def decompose(self, X): + """Decompose with the 'periodic' boundary but using an extended matrix. + .. automethod:: _Partition.decompose + """ + + if X.ndim == 1: + extended_X = self._extend_1d_dataset(X) + else: + extended_X = self._extend_2d_dataset(X) + + for part in decompose_dataset(extended_X, shape=self.shape, shift=0): + if part.shape == self.shape: + yield part diff --git a/pybdm/utils.py b/pybdm/utils.py index 922ecd6..17648ac 100644 --- a/pybdm/utils.py +++ b/pybdm/utils.py @@ -66,7 +66,7 @@ def iter_slices(X, shape, shift=0): [(slice(0, 3, None), slice(0, 3, None)), (slice(3, 5, None), slice(0, 3, None))] """ if len(set(shape)) != 1: - raise AttributeError("Partition shape is not symmetric {}".format(shape)) + raise AttributeError(f"Partition shape is not symmetric {shape}") if len(shape) != X.ndim: raise AttributeError( "dataset and slice shape does not have the same number of axes" @@ -150,7 +150,7 @@ def list_ctm_datasets(): >>> list_ctm_datasets() ['CTM-B2-D12', 'CTM-B2-D4x4', 'CTM-B4-D12', 'CTM-B5-D12', 'CTM-B6-D12', 'CTM-B9-D12'] """ - return [ x for x in sorted(_ctm_datasets.keys()) ] + return list(sorted(_ctm_datasets.keys())) @lru_cache(maxsize=2**int(np.ceil(np.log2(len(_ctm_datasets))))) def get_ctm_dataset(name): @@ -175,7 +175,7 @@ def get_ctm_dataset(name): If non-existent CTM dataset is requested. """ if name not in _ctm_datasets: - raise ValueError("There is no {} CTM dataset".format(name)) + raise ValueError(f"There is no {name} CTM dataset") with resource_stream(_ctmdata_path, _ctm_datasets[name]) as stream: dct = pickle.loads(gzip.decompress(stream.read())) for key in dct: diff --git a/pylintrc b/pylintrc index 2a6b73d..08c7ef1 100644 --- a/pylintrc +++ b/pylintrc @@ -65,10 +65,14 @@ disable=E201, E226, E302, E731, + E0015, C0111, C0326, + R0022, R0201, R0901, + W0602, + W0012, invalid-name, print-statement, parameter-unpacking, diff --git a/requirements.txt b/requirements.txt index ac8d183..5f504ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,13 @@ -setuptools>=40.5.0,<50.0.0 -numpy>=1.15.4 -wheel>=0.22 -pylint>=1.9.5 -pytest>=3.5.1 -pytest-runner>=4.2 -pytest-pylint>=0.12.2 -pytest-doctestplus>=0.2.0 -pytest-cov>=2.7.1 -coverage>=4.5.1 -codecov>=2.0.15 -joblib>=0.13.0 +setuptools>=71 +numpy>=2.0 +wheel>=0.43 +pylint>=3.2.5 +pytest>=7 +pytest-runner>=6.0.1 +pytest-pylint>=0.21 +pytest-doctestplus>=1.2.1 +pytest-cov>=5 +coverage>=7.6 +codecov>=2.1.13 +joblib>=1.4.2 +networkx>=3.3 diff --git a/setup.cfg b/setup.cfg index 99ea8cd..089f4ce 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,3 +26,5 @@ doctest_plus = enabled filterwarnings = ignore:CTM dataset does not contain object ignore:Using or importing the ABCs +markers = + slow \ No newline at end of file diff --git a/setup.py b/setup.py index 4c21b1b..71563a3 100644 --- a/setup.py +++ b/setup.py @@ -2,24 +2,22 @@ import os import sys - -try: - from setuptools import setup, find_packages -except ImportError: - from distutils.core import setup - +from setuptools import setup, find_packages if sys.argv[-1] == 'publish': os.system('python setup.py sdist upload') sys.exit() -readme = open('README.rst').read() +with open('README.rst', encoding='utf-8') as file: + readme = file.read() doclink = """ Documentation ------------- The full documentation is at http://pybdm-docs.rtfd.org.""" -history = open('HISTORY.rst').read().replace('.. :changelog:', '') + +with open('HISTORY.rst', encoding='utf-8') as file: + history = file.read().replace('.. :changelog:', '') setup( name='pybdm', diff --git a/tests/test_bdm.py b/tests/test_bdm.py index 681ed3b..ef5c076 100755 --- a/tests/test_bdm.py +++ b/tests/test_bdm.py @@ -24,7 +24,7 @@ _dirpath = os.path.join(os.path.split(__file__)[0], 'data') # Get test input data and expected values bdm2_test_input = [] -with open(os.path.join(_dirpath, 'bdm-b2-d4x4-test-input.tsv'), 'r') as stream: +with open(os.path.join(_dirpath, 'bdm-b2-d4x4-test-input.tsv'), 'r', encoding='utf-8') as stream: for line in stream: string, bdm = line.strip().split("\t") bdm = float(bdm.strip()) @@ -40,7 +40,7 @@ (array_from_string(s2, (48,)), 2.0)] ent2_test_input = [] -with open(os.path.join(_dirpath, 'ent-b2-d4x4-test-input.tsv'), 'r') as stream: +with open(os.path.join(_dirpath, 'ent-b2-d4x4-test-input.tsv'), 'r', encoding='utf-8') as stream: for line in stream: string, ent2 = line.strip().split(",") ent2 = float(ent2.strip()) diff --git a/tests/test_partitions.py b/tests/test_partitions.py index dfe5044..95dc54a 100644 --- a/tests/test_partitions.py +++ b/tests/test_partitions.py @@ -1,11 +1,11 @@ """Unit tests for BDM partition algorithms.""" import pytest import numpy as np -from pybdm.partitions import PartitionIgnore, PartitionCorrelated, PartitionRecursive +from pybdm.partitions import PartitionIgnore, PartitionCorrelated, PartitionRecursive, PartitionPeriodic def _test_decompose(partition, X, expected): - output = [ p for p in partition.decompose(X) ] + output = list(partition.decompose(X)) assert len(output) == len(expected) assert all(np.array_equal(o, e) for o, e in zip(output, expected)) @@ -42,3 +42,22 @@ def test_partition_correlated(X, shape, shift, expected): def test_partition_recursive(X, shape, min_length, expected): partition = PartitionRecursive(shape=shape, min_length=min_length) _test_decompose(partition, X, expected) + +@pytest.mark.parametrize('X,shape,expected',[ + (np.ones((2,2)), (2,2), [ np.ones((2,2)) ]), + (np.ones((5,5)), (2,2), [ np.ones((2,2)) for _ in range(9) ]), + (np.ones((5,5)), (3,3), [ np.ones((3,3)) for _ in range(4) ]) +]) +def test_2d_partition_periodic(X, shape, expected): + partition = PartitionPeriodic(shape=shape) + _test_decompose(partition, X, expected) + +@pytest.mark.parametrize('X,shape,expected',[ + (np.arange(0,6), (6, ), [ np.arange(0,6) ]), + (np.arange(0,6), (3, ), [ np.arange(0,3), np.arange(3,6) ]), + (np.arange(0,3), (6, ), [ np.hstack((np.arange(0,3), np.arange(0,3))) ]), + (np.arange(0,3), (8, ), [ np.hstack((np.arange(0,3), np.arange(0,3), np.arange(0,2))) ]), +]) +def test_1d_partition_periodic(X, shape, expected): + partition = PartitionPeriodic(shape=shape) + _test_decompose(partition, X, expected) diff --git a/tests/test_perturbation.py b/tests/test_perturbation.py index b625d8f..8b5f59e 100644 --- a/tests/test_perturbation.py +++ b/tests/test_perturbation.py @@ -5,8 +5,9 @@ import pytest from pytest import approx import numpy as np +import networkx as nx from pybdm.bdm import BDM -from pybdm.partitions import PartitionCorrelated +from pybdm.partitions import PartitionCorrelated, PartitionPeriodic from pybdm.algorithms import PerturbationExperiment from pybdm.utils import prod @@ -69,6 +70,17 @@ def perturbation_d1_ent_overlap(): bdm = BDM(ndim=1, partition=PartitionCorrelated, shift=1) return PerturbationExperiment(bdm, X, metric='ent') +@pytest.fixture(scope='function') +def deconvolve_ladder_complete_graph(): + graphs = { + 'L-': nx.ladder_graph(10), + 'C-': nx.complete_graph(10), + } + X = nx.union_all(graphs.values(), graphs.keys()) + X = nx.to_numpy_array(X, dtype=int) + X[(0,20), (20,0)] = 1 + bdm = BDM(ndim=2, partition=PartitionPeriodic) + return PerturbationExperiment(bdm, X) @pytest.mark.slow class TestPerturbationExperiment: @@ -82,7 +94,7 @@ class TestPerturbationExperiment: ]) def test_idx_to_parts(self, perturbation, idx, expected): expected = [ perturbation.X[s] for s in expected ] - output = [ x for x in perturbation._idx_to_parts(idx) ] + output = list(perturbation._idx_to_parts(idx)) assert len(output) == len(expected) for o, e in zip(output, expected): assert np.array_equal(o, e) @@ -104,7 +116,7 @@ def test_idx_to_parts(self, perturbation, idx, expected): def test_idx_to_parts_overlap(self, perturbation_overlap, idx, expected): perturbation = perturbation_overlap expected = [ perturbation.X[s] for s in expected ] - output = [ x for x in perturbation._idx_to_parts(idx) ] + output = list(perturbation._idx_to_parts(idx)) assert len(output) == len(expected) for o, e in zip(output, expected): assert np.array_equal(o, e) @@ -139,6 +151,10 @@ def _assert_perturb(self, perturbation, idx, value, keep_changes, metric='bdm'): assert np.array_equal(X0, perturbation.X) assert perturbation._counter == C0 + def _assert_deconvolution(self, perturbation, auxiliary_cutoff, is_directed, expected): + result = perturbation.deconvolve(auxiliary_cutoff=auxiliary_cutoff, is_directed=is_directed) + assert np.array_equal(result.edges_for_deletion, expected) + @pytest.mark.parametrize('idx', [(0, 0), (1, 0), (10, 15), (24, 24)]) @pytest.mark.parametrize('value', [1, 0, -1]) @pytest.mark.parametrize('keep_changes', [True, False]) @@ -303,3 +319,14 @@ def test_run_ent_d1(self, perturbation_d1_ent, idx, values, keep_changes): @pytest.mark.parametrize('keep_changes', [True, False]) def test_run_ent_d1_overlap(self, perturbation_d1_ent_overlap, idx, values, keep_changes): self._assert_run(perturbation_d1_ent_overlap, idx, values, keep_changes) + + @pytest.mark.parametrize('auxiliary_cutoff,is_directed,expected', [ + (20, False, np.array([[0,20],[20,0]])), + (20, True, np.array([[20,0]])) + ]) + def test_deconvolve_ladder_complete( + self, deconvolve_ladder_complete_graph, auxiliary_cutoff, is_directed, expected + ): + self._assert_deconvolution( + deconvolve_ladder_complete_graph, auxiliary_cutoff, is_directed, expected + ) diff --git a/tox.ini b/tox.ini index 302d0be..2502a5f 100644 --- a/tox.ini +++ b/tox.ini @@ -3,7 +3,7 @@ # --- # pip install tox-conda>=0.2.0 [tox] -envlist = py35, py36, py37, style, docs +envlist = py310, py311, py312, style, docs [testenv] setenv =