sztal · gabopotestades · Jul 13, 2024 · Jul 20, 2024 · Jul 23, 2024 · Jul 25, 2024
diff --git a/docs/conf.py b/docs/conf.py
@@ -11,7 +11,9 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-import sys, os
+import sys
+import os
+import pybdm
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
@@ -22,8 +24,6 @@
 parent = os.path.dirname(cwd)
 sys.path.insert(0, parent)
 
-import pybdm
-
 # -- General configuration -----------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
@@ -40,6 +40,8 @@
     'sphinxcontrib.bibtex'
 ]
 
+# Bibtex reference directory
+bibtex_bibfiles = ['references.bib']
 
 # Napoleon settings
 napoleon_google_docstring = False
@@ -74,8 +76,8 @@
 master_doc = 'index'
 
 # General information about the project.
-project = u'PyBDM'
-copyright = u'2019, Szymon Talaga'
+project = 'PyBDM'
+copyright = '2019, Szymon Talaga'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -157,7 +159,7 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+# html_static_path = ['_static']
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
@@ -220,8 +222,8 @@
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
-  ('index', 'pybdm.tex', u'PyBDM Documentation',
-   u'Szymon Talaga', 'manual'),
+  ('index', 'pybdm.tex', 'PyBDM Documentation',
+   'Szymon Talaga', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -250,8 +252,8 @@
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    ('index', 'pybdm', u'PyBDM Documentation',
-     [u'Szymon Talaga'], 1)
+    ('index', 'pybdm', 'PyBDM Documentation',
+     ['Szymon Talaga'], 1)
 ]
 
 # If true, show URL addresses after external links.
@@ -264,8 +266,8 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-  ('index', 'pybdm', u'PyBDM Documentation',
-   u'Szymon Talaga', 'pybdm', 'One line description of project.',
+  ('index', 'pybdm', 'PyBDM Documentation',
+   'Szymon Talaga', 'pybdm', 'One line description of project.',
    'Miscellaneous'),
 ]
 

diff --git a/docs/roadmap.rst b/docs/roadmap.rst
@@ -9,7 +9,6 @@ Next major release
 * Perturbation experiment for growing/shrinking systems
 * Implement Bayesian framework for approximating probability of
   a stochastic generating source
-* Add a partition algorithm with the periodic boundary condition
 * Use integer-based conding of dataset blocks
   (to lower memory-footprint). This will be done only if it will be possible
   to use integer coding without significantly negative impact on the performance.

diff --git a/docs/theory.rst b/docs/theory.rst
@@ -187,9 +187,48 @@ until its rightmost column or lowest row contain the values on the boundary
 of the original matrix.
 
 The condition can yield different results for small objects, but are consistent
-asymptotically in the limit of large object sizes. Detailed discussion of
-boundary conditions in BDM can be found in :cite:`zenil_decomposition_2018`.
+asymptotically in the limit of large object sizes. 
 
+If the periodic condition was used, the missing columns would be supplied by
+wrapping around the first column up to the required missing columns and this is
+the same for rows, we would get four slices:
+
++--+--+--+
+|1 |2 |3 |
++--+--+--+
+|6 |7 |8 |
++--+--+--+
+|11|12|13|
++--+--+--+
+
++--+--+--+
+|4 |5 |1 |
++--+--+--+
+|9 |10|6 |
++--+--+--+
+|14|15|11|
++--+--+--+
+
++--+--+--+
+|16|17|18|
++--+--+--+
+|21|22|23|
++--+--+--+
+|1 |2 |3 |
++--+--+--+
+
++--+--+--+
+|19|20|16|
++--+--+--+
+|24|25|21|
++--+--+--+
+|4 |5 |1 |
++--+--+--+
+
+This ensures that the matrices have equal sizes when computing the BDM.
+
+Detailed discussion of boundary conditions in BDM can be found
+in :cite:`zenil_decomposition_2018`.
 
 Normalized BDM
 --------------

diff --git a/docs/usage.rst b/docs/usage.rst
@@ -14,20 +14,24 @@ Different boundary conditions (see :doc:`/theory`) are implemented by
 .. code-block:: python
 
     from pybdm import BDM
-    from pybdm import PartitionIgnore, PartitionRecursive, PartitionCorrelated
+    from pybdm import PartitionIgnore, PartitionRecursive, PartitionCorrelated, PartitionPeriodic
 
     bdm_ignore = BDM(ndim=1, partition=PartitionIgnore)
     # This is default so it is equivalent to
     bdm_ignore = BDM(ndim=1)
 
-    bdm_recurisve = BDM(ndim=1, partition=PartitionRecursive, min_length=2)
+    bdm_recursive = BDM(ndim=1, partition=PartitionRecursive, min_length=2)
     # Minimum size is specified as length, since only symmetric slices
     # are accepted in the case of multidimensional objects.
 
     bdm_correlated = BDM(ndim=1, partition=PartitionCorrelated)
     # Step-size defaults to 1, so this is equivalent to
     bdm_correlated = BDM(ndim=1, partition=PartitionCorrelated, shift=1)
 
+    bdm_periodic = BDM(ndim=1, partition=PartitionPeriodic)
+    # This is similar to PartitionIgnore but the dataset is extended 
+    # depending on the shape of the partition
+
 
 Normalized BDM
 --------------

diff --git a/pybdm/algorithms.py b/pybdm/algorithms.py
@@ -1,8 +1,46 @@
 """Algorithms based on ``BDM`` objects."""
 from itertools import product
+from dataclasses import dataclass
 from random import choice
 import numpy as np
 
+@dataclass
+class DeconvolutionResult:
+    """A deconvolution result data class.
+
+    The deconvolution of a graph results in producing information signatures 
+    of each edge and their differences when sorted from maximum information
+    signature to minimum. Using an auxilliary cutoff, the edges for deletion 
+    can be determined if the difference of one edge from another is greater 
+    than auxilliary cutoff + log(2) (theoretically).
+
+    Attributes:
+        auxiliary_cutoff (float): 
+            The cutoff value used to determine which edges are to be deleted.
+        info_loss_values (np.array): 
+            A *Numpy* array containing the information signature loss of 
+            each edge. This is retrieved when an edge is perturbed.
+        info_loss_edges (np.array):
+            A *Numpy* array containing the edges of each loss value in 
+            ``info_loss_values``. The ith edge in this list corresponds to the
+            ith information loss value in ``info_loss_values``. 
+        differences (np.array):
+            A *Numpy* array that is produced after sorting the ``info_loss_values``
+            from maximum to minimum, these are the differences of the ith 
+            information loss value minus the i+1th information loss value.
+        edges_for_deletion (np.array):
+            A *Numpy* array that is produced based on the ``auxiliary_cutoff``
+            and if the differences of edges are greater than this cuttoff.
+        difference_filter (np.array):
+            A *Numpy* array containing ``True`` or ``False`` if the ith edge
+            in ``info_loss_edges`` is included in ``edges_for_deletion``
+    """
+    auxiliary_cutoff: float
+    info_loss_values: np.array
+    info_loss_edges: np.array
+    differences : np.array
+    edges_for_deletion: np.array
+    difference_filter: np.array
 
 class PerturbationExperiment:
     """Perturbation experiment class.
@@ -68,7 +106,7 @@ def __init__(self, bdm, X=None, metric='bdm'):
     def __repr__(self):
         cn = self.__class__.__name__
         bdm = str(self.bdm)[1:-1]
-        return "<{}(metric={}) with {}>".format(cn, self.metric, bdm)
+        return f"<{cn}(metric={self.metric}) with {bdm}>"
 
     @property
     def size(self):
@@ -207,7 +245,7 @@ def perturb(self, idx, value=-1, keep_changes=False):
         >>> X = np.ones((30, ), dtype=int)
         >>> perturbation = PerturbationExperiment(bdm, X)
         >>> perturbation.perturb((10, ), -1) # doctest: +FLOAT_CMP
-        26.91763012739709
+        np.float64(26.91763012739709)
         """
         old_value = self.X[idx]
         if value < 0:
@@ -258,11 +296,133 @@ def run(self, idx=None, values=None, keep_changes=False):
         """
         if idx is None:
             indexes = [ range(k) for k in self.X.shape ]
-            idx = np.array([ x for x in product(*indexes) ], dtype=int)
+            idx = np.array(list(product(*indexes)), dtype=int)
         if values is None:
             values = np.full((idx.shape[0], ), -1, dtype=int)
         return np.apply_along_axis(
             lambda r: self.perturb(tuple(r[:-1]), r[-1], keep_changes=keep_changes),
             axis=1,
             arr=np.column_stack((idx, values))
         )
+
+    def _sort_info_loss_values(self, info_loss_values, info_loss_edges):
+
+        sorted_values = np.argsort(-info_loss_values[:,0])
+        info_loss_values = info_loss_values[sorted_values]
+        info_loss_edges = info_loss_edges[sorted_values]
+
+        return info_loss_values, info_loss_edges
+
+    def _compute_differences(self, info_loss_values):
+        return np.diff(info_loss_values[:, -1]) * -1
+
+    def _filter_by_differences(self, auxiliary_cutoff, info_loss_edges, differences, is_directed):
+
+        difference_filter = list(np.isin(
+            np.arange(len(differences)),
+            np.where(abs(differences - np.log2(2)) > auxiliary_cutoff)
+        ))
+        difference_filter.extend([False])
+
+        edges_for_deletion = info_loss_edges[difference_filter]
+
+        if not is_directed:
+            edges_for_deletion = np.array([*edges_for_deletion, *edges_for_deletion[:, [1,0]]], dtype=int)
+
+        return edges_for_deletion, difference_filter
+
+    def _process_deconvolution(self, auxiliary_cutoff, info_loss_values, info_loss_edges, is_directed):
+
+        info_loss_values, info_loss_edges = self._sort_info_loss_values(info_loss_values, info_loss_edges)
+        differences = self._compute_differences(info_loss_values)
+        edges_for_deletion, difference_filter = self._filter_by_differences(
+            auxiliary_cutoff, info_loss_edges, differences, is_directed
+        )
+
+        return DeconvolutionResult(
+            auxiliary_cutoff, info_loss_values, info_loss_edges,
+            differences, edges_for_deletion, difference_filter
+        )
+
+    def deconvolve(self, auxiliary_cutoff, is_directed=False, keep_changes=False):
+        """Run causal deconvolution.
+
+        Parameters
+        ----------
+        auxiliary_cutoff : float
+            Value to be used as the cutoff when cutting edges
+            based on their information signature differences.
+        is_directed : bool
+            If ``True`` then considers the dataset to be a directed
+            graph and thus retrieves the information signatures of all
+            edges.
+            If ``False`` then considers the dataset to be an undirected
+            graph and thus considers edges (i,j) and (j,i) the same when
+            retrieving the information signatures of each edge.
+        keep_changes: bool
+            If ``True`` then changes in the dataset are persistent,
+            so all the edges that have been cut will be applied to the dataset.
+
+        Returns
+        -------
+        DeconvolutionResult
+            a dataclass that contains different values for evaluation.
+
+        Examples
+        --------
+        >>> from pybdm import BDM
+        >>> bdm = BDM(ndim=2,shape=(4,4))
+        >>> X = np.array([[0, 1, 1, 1, 1, 0, 0, 0],
+        ...               [1, 0, 1, 1, 0, 0, 0, 0],
+        ...               [1, 1, 0, 1, 0, 0, 0, 0],
+        ...               [1, 1, 1, 0, 0, 0, 0, 0],
+        ...               [1, 0, 0, 0, 0, 1, 1, 1],
+        ...               [0, 0, 0, 0, 1, 0, 1, 1],
+        ...               [0, 0, 0, 0, 1, 1, 0, 1],
+        ...               [0, 0, 0, 0, 1, 1, 1, 0]])
+        >>> perturbation = PerturbationExperiment(bdm, X)
+        >>> result = perturbation.deconvolve(auxiliary_cutoff=20, is_directed=False)
+        >>> result.edges_for_deletion
+        array([[0, 4], [4, 0]])
+        """
+        if self.X.ndim != 2:
+            raise ValueError('Deconvolution is only supported for 2D datasets')
+
+        info_loss_values = np.empty((0, 1), dtype=float)
+        info_loss_edges = np.empty((0, 2), dtype=int)
+        deleted_edge_graph = np.copy(self.X)
+
+        nonzero_edges = deleted_edge_graph if is_directed else np.triu(deleted_edge_graph)
+        nonzero_edges = np.column_stack(np.nonzero(nonzero_edges))
+        original_bdm = self.bdm.bdm(self.X)
+
+        for edge in nonzero_edges:
+
+            edges_to_perturb = ((edge[0], edge[1])) if is_directed else (edge, edge[::-1])
+
+            deleted_edge_graph[edges_to_perturb] = 0
+
+            deleted_edge_bdm = self.bdm.bdm(deleted_edge_graph)
+            info_loss =  original_bdm - deleted_edge_bdm
+
+            info_loss_edges = np.vstack((info_loss_edges, np.array([edge])))
+            info_loss_values = np.vstack((info_loss_values, np.array([info_loss])))
+
+            deleted_edge_graph[edges_to_perturb] = 1
+
+        deconvolution_result = self._process_deconvolution(
+            auxiliary_cutoff, info_loss_values, info_loss_edges, is_directed
+        )
+
+        if deconvolution_result.edges_for_deletion.size == 0:
+            return deconvolution_result
+
+        if not keep_changes:
+            deleted_edge_graph[
+                deconvolution_result.edges_for_deletion[:,0],
+                deconvolution_result.edges_for_deletion[:,1]
+            ] = 0
+            return deconvolution_result
+
+        self.run(idx=deconvolution_result.edges_for_deletion,keep_changes=keep_changes)
+        return deconvolution_result