Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of Causal Deconvolution and Update of Tox Build #19

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
# All configuration values have a default; values that are commented out
# serve to show the default.

import sys, os
import sys
import os
import pybdm

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
Expand All @@ -22,8 +24,6 @@
parent = os.path.dirname(cwd)
sys.path.insert(0, parent)

import pybdm

# -- General configuration -----------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
Expand All @@ -40,6 +40,8 @@
'sphinxcontrib.bibtex'
]

# Bibtex reference directory
bibtex_bibfiles = ['references.bib']

# Napoleon settings
napoleon_google_docstring = False
Expand Down Expand Up @@ -74,8 +76,8 @@
master_doc = 'index'

# General information about the project.
project = u'PyBDM'
copyright = u'2019, Szymon Talaga'
project = 'PyBDM'
copyright = '2019, Szymon Talaga'

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
Expand Down Expand Up @@ -157,7 +159,7 @@
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
# html_static_path = ['_static']

# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
# using the given strftime format.
Expand Down Expand Up @@ -220,8 +222,8 @@
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'pybdm.tex', u'PyBDM Documentation',
u'Szymon Talaga', 'manual'),
('index', 'pybdm.tex', 'PyBDM Documentation',
'Szymon Talaga', 'manual'),
]

# The name of an image file (relative to this directory) to place at the top of
Expand Down Expand Up @@ -250,8 +252,8 @@
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'pybdm', u'PyBDM Documentation',
[u'Szymon Talaga'], 1)
('index', 'pybdm', 'PyBDM Documentation',
['Szymon Talaga'], 1)
]

# If true, show URL addresses after external links.
Expand All @@ -264,8 +266,8 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
('index', 'pybdm', u'PyBDM Documentation',
u'Szymon Talaga', 'pybdm', 'One line description of project.',
('index', 'pybdm', 'PyBDM Documentation',
'Szymon Talaga', 'pybdm', 'One line description of project.',
'Miscellaneous'),
]

Expand Down
1 change: 0 additions & 1 deletion docs/roadmap.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ Next major release
* Perturbation experiment for growing/shrinking systems
* Implement Bayesian framework for approximating probability of
a stochastic generating source
* Add a partition algorithm with the periodic boundary condition
* Use integer-based conding of dataset blocks
(to lower memory-footprint). This will be done only if it will be possible
to use integer coding without significantly negative impact on the performance.
Expand Down
43 changes: 41 additions & 2 deletions docs/theory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,48 @@ until its rightmost column or lowest row contain the values on the boundary
of the original matrix.

The condition can yield different results for small objects, but are consistent
asymptotically in the limit of large object sizes. Detailed discussion of
boundary conditions in BDM can be found in :cite:`zenil_decomposition_2018`.
asymptotically in the limit of large object sizes.

If the periodic condition was used, the missing columns would be supplied by
wrapping around the first column up to the required missing columns and this is
the same for rows, we would get four slices:

+--+--+--+
|1 |2 |3 |
+--+--+--+
|6 |7 |8 |
+--+--+--+
|11|12|13|
+--+--+--+

+--+--+--+
|4 |5 |1 |
+--+--+--+
|9 |10|6 |
+--+--+--+
|14|15|11|
+--+--+--+

+--+--+--+
|16|17|18|
+--+--+--+
|21|22|23|
+--+--+--+
|1 |2 |3 |
+--+--+--+

+--+--+--+
|19|20|16|
+--+--+--+
|24|25|21|
+--+--+--+
|4 |5 |1 |
+--+--+--+

This ensures that the matrices have equal sizes when computing the BDM.

Detailed discussion of boundary conditions in BDM can be found
in :cite:`zenil_decomposition_2018`.

Normalized BDM
--------------
Expand Down
8 changes: 6 additions & 2 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,24 @@ Different boundary conditions (see :doc:`/theory`) are implemented by
.. code-block:: python

from pybdm import BDM
from pybdm import PartitionIgnore, PartitionRecursive, PartitionCorrelated
from pybdm import PartitionIgnore, PartitionRecursive, PartitionCorrelated, PartitionPeriodic

bdm_ignore = BDM(ndim=1, partition=PartitionIgnore)
# This is default so it is equivalent to
bdm_ignore = BDM(ndim=1)

bdm_recurisve = BDM(ndim=1, partition=PartitionRecursive, min_length=2)
bdm_recursive = BDM(ndim=1, partition=PartitionRecursive, min_length=2)
# Minimum size is specified as length, since only symmetric slices
# are accepted in the case of multidimensional objects.

bdm_correlated = BDM(ndim=1, partition=PartitionCorrelated)
# Step-size defaults to 1, so this is equivalent to
bdm_correlated = BDM(ndim=1, partition=PartitionCorrelated, shift=1)

bdm_periodic = BDM(ndim=1, partition=PartitionPeriodic)
# This is similar to PartitionIgnore but the dataset is extended
# depending on the shape of the partition


Normalized BDM
--------------
Expand Down
166 changes: 163 additions & 3 deletions pybdm/algorithms.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,46 @@
"""Algorithms based on ``BDM`` objects."""
from itertools import product
from dataclasses import dataclass
from random import choice
import numpy as np

@dataclass
class DeconvolutionResult:
"""A deconvolution result data class.

The deconvolution of a graph results in producing information signatures
of each edge and their differences when sorted from maximum information
signature to minimum. Using an auxilliary cutoff, the edges for deletion
can be determined if the difference of one edge from another is greater
than auxilliary cutoff + log(2) (theoretically).

Attributes:
auxiliary_cutoff (float):
The cutoff value used to determine which edges are to be deleted.
info_loss_values (np.array):
A *Numpy* array containing the information signature loss of
each edge. This is retrieved when an edge is perturbed.
info_loss_edges (np.array):
A *Numpy* array containing the edges of each loss value in
``info_loss_values``. The ith edge in this list corresponds to the
ith information loss value in ``info_loss_values``.
differences (np.array):
A *Numpy* array that is produced after sorting the ``info_loss_values``
from maximum to minimum, these are the differences of the ith
information loss value minus the i+1th information loss value.
edges_for_deletion (np.array):
A *Numpy* array that is produced based on the ``auxiliary_cutoff``
and if the differences of edges are greater than this cuttoff.
difference_filter (np.array):
A *Numpy* array containing ``True`` or ``False`` if the ith edge
in ``info_loss_edges`` is included in ``edges_for_deletion``
"""
auxiliary_cutoff: float
info_loss_values: np.array
info_loss_edges: np.array
differences : np.array
edges_for_deletion: np.array
difference_filter: np.array

class PerturbationExperiment:
"""Perturbation experiment class.
Expand Down Expand Up @@ -68,7 +106,7 @@ def __init__(self, bdm, X=None, metric='bdm'):
def __repr__(self):
cn = self.__class__.__name__
bdm = str(self.bdm)[1:-1]
return "<{}(metric={}) with {}>".format(cn, self.metric, bdm)
return f"<{cn}(metric={self.metric}) with {bdm}>"

@property
def size(self):
Expand Down Expand Up @@ -207,7 +245,7 @@ def perturb(self, idx, value=-1, keep_changes=False):
>>> X = np.ones((30, ), dtype=int)
>>> perturbation = PerturbationExperiment(bdm, X)
>>> perturbation.perturb((10, ), -1) # doctest: +FLOAT_CMP
26.91763012739709
np.float64(26.91763012739709)
"""
old_value = self.X[idx]
if value < 0:
Expand Down Expand Up @@ -258,11 +296,133 @@ def run(self, idx=None, values=None, keep_changes=False):
"""
if idx is None:
indexes = [ range(k) for k in self.X.shape ]
idx = np.array([ x for x in product(*indexes) ], dtype=int)
idx = np.array(list(product(*indexes)), dtype=int)
if values is None:
values = np.full((idx.shape[0], ), -1, dtype=int)
return np.apply_along_axis(
lambda r: self.perturb(tuple(r[:-1]), r[-1], keep_changes=keep_changes),
axis=1,
arr=np.column_stack((idx, values))
)

def _sort_info_loss_values(self, info_loss_values, info_loss_edges):

sorted_values = np.argsort(-info_loss_values[:,0])
info_loss_values = info_loss_values[sorted_values]
info_loss_edges = info_loss_edges[sorted_values]

return info_loss_values, info_loss_edges

def _compute_differences(self, info_loss_values):
return np.diff(info_loss_values[:, -1]) * -1

def _filter_by_differences(self, auxiliary_cutoff, info_loss_edges, differences, is_directed):

difference_filter = list(np.isin(
np.arange(len(differences)),
np.where(abs(differences - np.log2(2)) > auxiliary_cutoff)
))
difference_filter.extend([False])

edges_for_deletion = info_loss_edges[difference_filter]

if not is_directed:
edges_for_deletion = np.array([*edges_for_deletion, *edges_for_deletion[:, [1,0]]], dtype=int)

return edges_for_deletion, difference_filter

def _process_deconvolution(self, auxiliary_cutoff, info_loss_values, info_loss_edges, is_directed):

info_loss_values, info_loss_edges = self._sort_info_loss_values(info_loss_values, info_loss_edges)
differences = self._compute_differences(info_loss_values)
edges_for_deletion, difference_filter = self._filter_by_differences(
auxiliary_cutoff, info_loss_edges, differences, is_directed
)

return DeconvolutionResult(
auxiliary_cutoff, info_loss_values, info_loss_edges,
differences, edges_for_deletion, difference_filter
)

def deconvolve(self, auxiliary_cutoff, is_directed=False, keep_changes=False):
"""Run causal deconvolution.

Parameters
----------
auxiliary_cutoff : float
Value to be used as the cutoff when cutting edges
based on their information signature differences.
is_directed : bool
If ``True`` then considers the dataset to be a directed
graph and thus retrieves the information signatures of all
edges.
If ``False`` then considers the dataset to be an undirected
graph and thus considers edges (i,j) and (j,i) the same when
retrieving the information signatures of each edge.
keep_changes: bool
If ``True`` then changes in the dataset are persistent,
so all the edges that have been cut will be applied to the dataset.

Returns
-------
DeconvolutionResult
a dataclass that contains different values for evaluation.

Examples
--------
>>> from pybdm import BDM
>>> bdm = BDM(ndim=2,shape=(4,4))
>>> X = np.array([[0, 1, 1, 1, 1, 0, 0, 0],
... [1, 0, 1, 1, 0, 0, 0, 0],
... [1, 1, 0, 1, 0, 0, 0, 0],
... [1, 1, 1, 0, 0, 0, 0, 0],
... [1, 0, 0, 0, 0, 1, 1, 1],
... [0, 0, 0, 0, 1, 0, 1, 1],
... [0, 0, 0, 0, 1, 1, 0, 1],
... [0, 0, 0, 0, 1, 1, 1, 0]])
>>> perturbation = PerturbationExperiment(bdm, X)
>>> result = perturbation.deconvolve(auxiliary_cutoff=20, is_directed=False)
>>> result.edges_for_deletion
array([[0, 4], [4, 0]])
"""
if self.X.ndim != 2:
raise ValueError('Deconvolution is only supported for 2D datasets')

info_loss_values = np.empty((0, 1), dtype=float)
info_loss_edges = np.empty((0, 2), dtype=int)
deleted_edge_graph = np.copy(self.X)

nonzero_edges = deleted_edge_graph if is_directed else np.triu(deleted_edge_graph)
nonzero_edges = np.column_stack(np.nonzero(nonzero_edges))
original_bdm = self.bdm.bdm(self.X)

for edge in nonzero_edges:

edges_to_perturb = ((edge[0], edge[1])) if is_directed else (edge, edge[::-1])

deleted_edge_graph[edges_to_perturb] = 0

deleted_edge_bdm = self.bdm.bdm(deleted_edge_graph)
info_loss = original_bdm - deleted_edge_bdm

info_loss_edges = np.vstack((info_loss_edges, np.array([edge])))
info_loss_values = np.vstack((info_loss_values, np.array([info_loss])))

deleted_edge_graph[edges_to_perturb] = 1

deconvolution_result = self._process_deconvolution(
auxiliary_cutoff, info_loss_values, info_loss_edges, is_directed
)

if deconvolution_result.edges_for_deletion.size == 0:
return deconvolution_result

if not keep_changes:
deleted_edge_graph[
deconvolution_result.edges_for_deletion[:,0],
deconvolution_result.edges_for_deletion[:,1]
] = 0
return deconvolution_result

self.run(idx=deconvolution_result.edges_for_deletion,keep_changes=keep_changes)
return deconvolution_result
Loading