From 5ef146eca6af009a4ab138b1904777f4bea86e57 Mon Sep 17 00:00:00 2001
From: Brian Thorne <brian@thorne.link>
Date: Thu, 20 Oct 2016 09:19:40 +1100
Subject: [PATCH] WIP #26 Allow alternative encoding schemes.

---
 phe/encoding.py | 248 ++++++++++++++++++++++++++++++++++++++++++++++++
 phe/paillier.py | 106 ++++++++-------------
 2 files changed, 288 insertions(+), 66 deletions(-)
 create mode 100644 phe/encoding.py

diff --git a/phe/encoding.py b/phe/encoding.py
new file mode 100644
index 0000000..fc270f1
--- /dev/null
+++ b/phe/encoding.py
@@ -0,0 +1,248 @@
+import abc
+from .paillier import PaillierPublicKey, EncodedNumber
+
+
+class BaseEncodingScheme(abc.ABC):
+    """
+    Abstract Base Class for all Encoding Schemes.
+
+    Notes:
+      Paillier encryption is only defined for non-negative integers less
+      than :attr:`PaillierPublicKey.n`. Since we frequently want to use
+      signed integers and/or floating point numbers (luxury!), values
+      must be encoded as a valid integer before encryption.
+
+      The operations of addition and multiplication [1]_ must be
+      preserved under any encoding. Namely:
+
+      1. Decode(Encode(a) + Encode(b)) = a + b
+      2. Decode(Encode(a) * Encode(b)) = a * b
+
+      for any real numbers a and b.
+
+    Review Questions:
+     - which properties are required for all possible encoding schemes?
+
+    """
+
+    @abc.abstractmethod
+    def max_encoded(self, public_key: PaillierPublicKey):
+        """Returns the largest number that can be encoded using
+        this scheme.
+
+        :param public_key:
+        :return:
+        """
+
+    @abc.abstractproperty
+    def signed(self) -> bool:
+        """Exposes whether this Encoding Scheme supports signed numbers.
+        """
+
+    @abc.abstractmethod
+    def valid_type(self, typ) -> bool:
+        """Check whether the given type is supported for Encoding.
+        """
+
+    @abc.abstractmethod
+    def encode(self, public_key: PaillierPublicKey, value) -> (int, dict):
+        """
+        Encode the given value to an integer representation.
+
+        :returns the integer representation, and a dictionary of any public
+            information. For example the number's precision.
+        """
+
+
+    @abc.abstractmethod
+    def decode(self, public_key: PaillierPublicKey, encoded: int, **public_data):
+        """Decode the integer representation.
+        May require additional arguments containing additional information,
+        e.g. exponent used to determine the precision.
+        """
+
+
+class PositiveIntegerEncoding(BaseEncodingScheme):
+    """
+    A simple encoding scheme that works for positive integers.
+    """
+
+    def valid_type(self, typ) -> bool:
+        return issubclass(typ, int)
+
+    @property
+    def signed(self):
+        return False
+
+    def max_encoded(self, public_key: PaillierPublicKey):
+        return public_key.max_int
+
+    def encode(self, public_key, value):
+        # the int is unchanged using this scheme
+        return value, {}
+
+    def decode(self, public_key, encoded, **public_data):
+        return encoded
+
+
+class SharedExponentEncoding(BaseEncodingScheme):
+    """
+    Encoding scheme that works for floating point numbers.
+
+    Relies on knowing an `exponent` that sets the precision.
+
+    Representing signed integers is relatively easy: we exploit the
+    modular arithmetic properties of the Paillier scheme. We choose to
+    represent only integers between
+    +/-:attr:`~PaillierPublicKey.max_int`, where `max_int` is
+    approximately :attr:`~PaillierPublicKey.n`/3 (larger integers may
+    be treated as floats). The range of values between `max_int` and
+    `n` - `max_int` is reserved for detecting overflows. This encoding
+    scheme supports properties #1 and #2 above.
+
+    Representing floating point numbers as integers is a harder task.
+    Here we use a variant of fixed-precision arithmetic. In fixed
+    precision, you encode by multiplying every float by a large number
+    (e.g. 1e6) and rounding the resulting product. You decode by
+    dividing by that number. However, this encoding scheme does not
+    satisfy property #2 above: upon every multiplication, you must
+    divide by the large number. In a Paillier scheme, this is not
+    possible to do without decrypting. For some tasks, this is
+    acceptable or can be worked around, but for other tasks this can't
+    be worked around.
+
+    In our scheme, the "large number" is allowed to vary, and we keep
+    track of it. It is:
+
+      :attr:`BASE` ** :attr:`exponent`
+
+    One number has many possible encodings; this property can be used
+    to mitigate the leak of information due to the fact that
+    :attr:`exponent` is never encrypted.
+
+    """
+
+    def valid_type(self, typ) -> bool:
+        return issubclass(typ, (float, int))
+
+    @property
+    def signed(self):
+        return True
+
+    def max_encoded(self, public_key: PaillierPublicKey):
+        """Maximum int that may safely be stored.
+
+        This can be increased, if you are happy to redefine "safely" and lower
+        the chance of detecting an integer overflow.
+        """
+        return public_key.max_int // 3 - 1
+
+    def encode(self, public_key, value, **kwargs):
+        """
+
+        Supported Optional Keyword Arguments:
+
+        precision (float):
+          If *value* is a float then *precision* is the maximum
+          **absolute** error allowed when encoding *value*. Defaults
+          to encoding *value* exactly.
+
+        :param public_key:
+        :param value:
+        :param kwargs:
+        :return:
+        """
+
+        def encode(cls, public_key, scalar, precision=None, max_exponent=None):
+            """Return an encoding of an int or float.
+
+            This encoding is carefully chosen so that it supports the same
+            operations as the Paillier cryptosystem.
+
+            If *scalar* is a float, first approximate it as an int, `int_rep`:
+
+                scalar = int_rep * (:attr:`BASE` ** :attr:`exponent`),
+
+            for some (typically negative) integer exponent, which can be
+            tuned using *precision* and *max_exponent*. Specifically,
+            :attr:`exponent` is chosen to be equal to or less than
+            *max_exponent*, and such that the number *precision* is not
+            rounded to zero.
+
+            Having found an integer representation for the float (or having
+            been given an int `scalar`), we then represent this integer as
+            a non-negative integer < :attr:`~PaillierPublicKey.n`.
+
+            Paillier homomorphic arithemetic works modulo
+            :attr:`~PaillierPublicKey.n`. We take the convention that a
+            number x < n/3 is positive, and that a number x > 2n/3 is
+            negative. The range n/3 < x < 2n/3 allows for overflow
+            detection.
+
+            Args:
+              public_key (PaillierPublicKey): public key for which to encode
+                (this is necessary because :attr:`~PaillierPublicKey.n`
+                varies).
+              scalar: an int or float to be encrypted.
+                If int, it must satisfy abs(*value*) <
+                :attr:`~PaillierPublicKey.n`/3.
+                If float, it must satisfy abs(*value* / *precision*) <<
+                :attr:`~PaillierPublicKey.n`/3
+                (i.e. if a float is near the limit then detectable
+                overflow may still occur)
+              precision (float): Choose exponent (i.e. fix the precision) so
+                that this number is distinguishable from zero. If `scalar`
+                is a float, then this is set so that minimal precision is
+                lost. Lower precision leads to smaller encodings, which
+                might yield faster computation.
+              max_exponent (int): Ensure that the exponent of the returned
+                `EncryptedNumber` is at most this.
+
+            Returns:
+              EncodedNumber: Encoded form of *scalar*, ready for encryption
+              against *public_key*.
+            """
+            # Calculate the maximum exponent for desired precision
+            if precision is None:
+                if isinstance(scalar, int):
+                    prec_exponent = 0
+                elif isinstance(scalar, float):
+                    # Encode with *at least* as much precision as the python float
+                    # What's the base-2 exponent on the float?
+                    bin_flt_exponent = math.frexp(scalar)[1]
+
+                    # What's the base-2 exponent of the least significant bit?
+                    # The least significant bit has value 2 ** bin_lsb_exponent
+                    bin_lsb_exponent = bin_flt_exponent - cls.FLOAT_MANTISSA_BITS
+
+                    # What's the corresponding base BASE exponent? Round that down.
+                    prec_exponent = math.floor(bin_lsb_exponent / cls.LOG2_BASE)
+                else:
+                    raise TypeError("Don't know the precision of type %s."
+                                    % type(scalar))
+            else:
+                prec_exponent = math.floor(math.log(precision, cls.BASE))
+
+            # Remember exponents are negative for numbers < 1.
+            # If we're going to store numbers with a more negative
+            # exponent than demanded by the precision, then we may
+            # as well bump up the actual precision.
+            if max_exponent is None:
+                exponent = prec_exponent
+            else:
+                exponent = min(max_exponent, prec_exponent)
+
+            int_rep = int(round(scalar * pow(cls.BASE, -exponent)))
+
+            if abs(int_rep) > public_key.max_int:
+                raise ValueError('Integer needs to be within +/- %d but got %d'
+                                 % (public_key.max_int, int_rep))
+
+            # Wrap negative numbers by adding n
+            return cls(public_key, int_rep % public_key.n, exponent)
+
+
+        return int_repr, {}
+
+    def decode(self, public_key, encoded, **public_data):
+        return encoded
diff --git a/phe/paillier.py b/phe/paillier.py
index 6f698b5..0a40012 100644
--- a/phe/paillier.py
+++ b/phe/paillier.py
@@ -22,6 +22,9 @@
 import hashlib
 import math
 import sys
+
+from phe.encoding import SharedExponentEncoding
+
 try:
     from collections.abc import Mapping
 except ImportError:
@@ -83,15 +86,13 @@ class PaillierPublicKey(object):
       g (int): part of the public key - see Paillier's paper.
       n (int): part of the public key - see Paillier's paper.
       nsquare (int): :attr:`n` ** 2, stored for frequent use.
-      max_int (int): Maximum int that may safely be stored. This can be
-        increased, if you are happy to redefine "safely" and lower the
-        chance of detecting an integer overflow.
+      max_int (int): Maximum int that may be stored.
     """
     def __init__(self, g, n):
         self.g = g
         self.n = n
         self.nsquare = n * n
-        self.max_int = n // 3 - 1
+        self.max_int = n
 
     def __repr__(self):
         nsquare = self.nsquare.to_bytes(1024, 'big')
@@ -145,23 +146,24 @@ def get_random_lt_n(self):
         """Return a cryptographically random number less than :attr:`n`"""
         return random.SystemRandom().randrange(1, self.n)
 
-    def encrypt(self, value, precision=None, r_value=None):
+    def encrypt(self, value, encoding_scheme=None, r_value=None, **encoding_args):
         """Encode and Paillier encrypt a real number *value*.
 
         Args:
-          value: an int or float to be encrypted.
-            If int, it must satisfy abs(*value*) < :attr:`n`/3.
-            If float, it must satisfy abs(*value* / *precision*) <<
-            :attr:`n`/3
-            (i.e. if a float is near the limit then detectable
-            overflow may still occur)
-          precision (float): Passed to :meth:`EncodedNumber.encode`.
-            If *value* is a float then *precision* is the maximum
-            **absolute** error allowed when encoding *value*. Defaults
-            to encoding *value* exactly.
+          value: The value to be encrypted. The type and size must be supported
+            by the encoding scheme.
+
+          encoding_scheme (EncodingScheme): The encoding scheme to use
+            to encode *value*. The default scheme will encode int and
+            float but requires the sharing of an exponent.
+
           r_value (int): obfuscator for the ciphertext; by default (i.e.
             if *r_value* is None), a random value is used.
 
+          encoding_args: Any other arguments which are directly passed
+            to :meth:`encoding_scheme.encode`. For example `precision` is
+            supported by the default encoding scheme :class:`SharedExponentEncoding`.
+
         Returns:
           EncryptedNumber: An encryption of *value*.
 
@@ -173,7 +175,13 @@ def encrypt(self, value, precision=None, r_value=None):
         if isinstance(value, EncodedNumber):
             encoding = value
         else:
-            encoding = EncodedNumber.encode(self, value, precision)
+            if encoding_scheme is None:
+                encoding_scheme = SharedExponentEncoding()
+
+            if encoding_scheme.valid_type(type(value)):
+                int_repr, public_data = encoding_scheme.encode(self, value, **encoding_args)
+                # TODO remove exponent from EncodedNumber
+                encoding = EncodedNumber(self, int_repr, public_data['exponent'])
 
         return self.encrypt_encoded(encoding, r_value)
 
@@ -364,63 +372,25 @@ def decrypt(self, encrypted_number):
 
 
 class EncodedNumber(object):
-    """Represents a float or int encoded for Paillier encryption.
+    """Represents some value encoded for Paillier encryption.
 
-    For end users, this class is mainly useful for specifying precision
-    when adding/multiplying an :class:`EncryptedNumber` by a scalar.
+    For end users, this class is mainly useful as an intermediate
+    representation ready to be encrypted using the Paillier
+    cryptosystem.
 
     If you want to manually encode a number for Paillier encryption,
-    then use :meth:`encode`, if de-serializing then use
-    :meth:`__init__`.
+    then use one of the :class:`BaseEncodingScheme` implementations,
+    if de-serializing an EncodedNumber then use :meth:`__init__`.
 
 
     .. note::
         If working with other Paillier libraries you will have to agree on
-        a specific :attr:`BASE` and :attr:`LOG2_BASE` - inheriting from this
-        class and overriding those two attributes will enable this.
-
-    Notes:
-      Paillier encryption is only defined for non-negative integers less
-      than :attr:`PaillierPublicKey.n`. Since we frequently want to use
-      signed integers and/or floating point numbers (luxury!), values
-      should be encoded as a valid integer before encryption.
-
-      The operations of addition and multiplication [1]_ must be
-      preserved under this encoding. Namely:
-
-      1. Decode(Encode(a) + Encode(b)) = a + b
-      2. Decode(Encode(a) * Encode(b)) = a * b
-
-      for any real numbers a and b.
-
-      Representing signed integers is relatively easy: we exploit the
-      modular arithmetic properties of the Paillier scheme. We choose to
-      represent only integers between
-      +/-:attr:`~PaillierPublicKey.max_int`, where `max_int` is
-      approximately :attr:`~PaillierPublicKey.n`/3 (larger integers may
-      be treated as floats). The range of values between `max_int` and
-      `n` - `max_int` is reserved for detecting overflows. This encoding
-      scheme supports properties #1 and #2 above.
-
-      Representing floating point numbers as integers is a harder task.
-      Here we use a variant of fixed-precision arithmetic. In fixed
-      precision, you encode by multiplying every float by a large number
-      (e.g. 1e6) and rounding the resulting product. You decode by
-      dividing by that number. However, this encoding scheme does not
-      satisfy property #2 above: upon every multiplication, you must
-      divide by the large number. In a Paillier scheme, this is not
-      possible to do without decrypting. For some tasks, this is
-      acceptable or can be worked around, but for other tasks this can't
-      be worked around.
-
-      In our scheme, the "large number" is allowed to vary, and we keep
-      track of it. It is:
-
-        :attr:`BASE` ** :attr:`exponent`
-
-      One number has many possible encodings; this property can be used
-      to mitigate the leak of information due to the fact that
-      :attr:`exponent` is never encrypted.
+        a specific EncodingScheme. Some encoding schemes include non-secret
+        information required for encoding and decoding. For example fixed
+        point encoding schemes usually raise the value by some `base` and
+        `exponent`.
+
+
 
       For more details, see :meth:`encode`.
 
@@ -442,6 +412,9 @@ class and overriding those two attributes will enable this.
         varies)
       encoding (int): The encoded number to store. Must be positive and
         less than :attr:`~PaillierPublicKey.max_int`.
+      scheme (EncodingScheme): The encoding scheme instance used to
+        encode and decode this value.
+
       exponent (int): Together with :attr:`BASE`, determines the level
         of fixed-precision used in encoding the number.
 
@@ -451,6 +424,7 @@ class and overriding those two attributes will enable this.
         varies)
       encoding (int): The encoded number to store. Must be positive and
         less than :attr:`~PaillierPublicKey.max_int`.
+
       exponent (int): Together with :attr:`BASE`, determines the level
         of fixed-precision used in encoding the number.
     """