From 5ef146eca6af009a4ab138b1904777f4bea86e57 Mon Sep 17 00:00:00 2001 From: Brian Thorne Date: Thu, 20 Oct 2016 09:19:40 +1100 Subject: [PATCH] WIP #26 Allow alternative encoding schemes. --- phe/encoding.py | 248 ++++++++++++++++++++++++++++++++++++++++++++++++ phe/paillier.py | 106 ++++++++------------- 2 files changed, 288 insertions(+), 66 deletions(-) create mode 100644 phe/encoding.py diff --git a/phe/encoding.py b/phe/encoding.py new file mode 100644 index 0000000..fc270f1 --- /dev/null +++ b/phe/encoding.py @@ -0,0 +1,248 @@ +import abc +from .paillier import PaillierPublicKey, EncodedNumber + + +class BaseEncodingScheme(abc.ABC): + """ + Abstract Base Class for all Encoding Schemes. + + Notes: + Paillier encryption is only defined for non-negative integers less + than :attr:`PaillierPublicKey.n`. Since we frequently want to use + signed integers and/or floating point numbers (luxury!), values + must be encoded as a valid integer before encryption. + + The operations of addition and multiplication [1]_ must be + preserved under any encoding. Namely: + + 1. Decode(Encode(a) + Encode(b)) = a + b + 2. Decode(Encode(a) * Encode(b)) = a * b + + for any real numbers a and b. + + Review Questions: + - which properties are required for all possible encoding schemes? + + """ + + @abc.abstractmethod + def max_encoded(self, public_key: PaillierPublicKey): + """Returns the largest number that can be encoded using + this scheme. + + :param public_key: + :return: + """ + + @abc.abstractproperty + def signed(self) -> bool: + """Exposes whether this Encoding Scheme supports signed numbers. + """ + + @abc.abstractmethod + def valid_type(self, typ) -> bool: + """Check whether the given type is supported for Encoding. + """ + + @abc.abstractmethod + def encode(self, public_key: PaillierPublicKey, value) -> (int, dict): + """ + Encode the given value to an integer representation. + + :returns the integer representation, and a dictionary of any public + information. For example the number's precision. + """ + + + @abc.abstractmethod + def decode(self, public_key: PaillierPublicKey, encoded: int, **public_data): + """Decode the integer representation. + May require additional arguments containing additional information, + e.g. exponent used to determine the precision. + """ + + +class PositiveIntegerEncoding(BaseEncodingScheme): + """ + A simple encoding scheme that works for positive integers. + """ + + def valid_type(self, typ) -> bool: + return issubclass(typ, int) + + @property + def signed(self): + return False + + def max_encoded(self, public_key: PaillierPublicKey): + return public_key.max_int + + def encode(self, public_key, value): + # the int is unchanged using this scheme + return value, {} + + def decode(self, public_key, encoded, **public_data): + return encoded + + +class SharedExponentEncoding(BaseEncodingScheme): + """ + Encoding scheme that works for floating point numbers. + + Relies on knowing an `exponent` that sets the precision. + + Representing signed integers is relatively easy: we exploit the + modular arithmetic properties of the Paillier scheme. We choose to + represent only integers between + +/-:attr:`~PaillierPublicKey.max_int`, where `max_int` is + approximately :attr:`~PaillierPublicKey.n`/3 (larger integers may + be treated as floats). The range of values between `max_int` and + `n` - `max_int` is reserved for detecting overflows. This encoding + scheme supports properties #1 and #2 above. + + Representing floating point numbers as integers is a harder task. + Here we use a variant of fixed-precision arithmetic. In fixed + precision, you encode by multiplying every float by a large number + (e.g. 1e6) and rounding the resulting product. You decode by + dividing by that number. However, this encoding scheme does not + satisfy property #2 above: upon every multiplication, you must + divide by the large number. In a Paillier scheme, this is not + possible to do without decrypting. For some tasks, this is + acceptable or can be worked around, but for other tasks this can't + be worked around. + + In our scheme, the "large number" is allowed to vary, and we keep + track of it. It is: + + :attr:`BASE` ** :attr:`exponent` + + One number has many possible encodings; this property can be used + to mitigate the leak of information due to the fact that + :attr:`exponent` is never encrypted. + + """ + + def valid_type(self, typ) -> bool: + return issubclass(typ, (float, int)) + + @property + def signed(self): + return True + + def max_encoded(self, public_key: PaillierPublicKey): + """Maximum int that may safely be stored. + + This can be increased, if you are happy to redefine "safely" and lower + the chance of detecting an integer overflow. + """ + return public_key.max_int // 3 - 1 + + def encode(self, public_key, value, **kwargs): + """ + + Supported Optional Keyword Arguments: + + precision (float): + If *value* is a float then *precision* is the maximum + **absolute** error allowed when encoding *value*. Defaults + to encoding *value* exactly. + + :param public_key: + :param value: + :param kwargs: + :return: + """ + + def encode(cls, public_key, scalar, precision=None, max_exponent=None): + """Return an encoding of an int or float. + + This encoding is carefully chosen so that it supports the same + operations as the Paillier cryptosystem. + + If *scalar* is a float, first approximate it as an int, `int_rep`: + + scalar = int_rep * (:attr:`BASE` ** :attr:`exponent`), + + for some (typically negative) integer exponent, which can be + tuned using *precision* and *max_exponent*. Specifically, + :attr:`exponent` is chosen to be equal to or less than + *max_exponent*, and such that the number *precision* is not + rounded to zero. + + Having found an integer representation for the float (or having + been given an int `scalar`), we then represent this integer as + a non-negative integer < :attr:`~PaillierPublicKey.n`. + + Paillier homomorphic arithemetic works modulo + :attr:`~PaillierPublicKey.n`. We take the convention that a + number x < n/3 is positive, and that a number x > 2n/3 is + negative. The range n/3 < x < 2n/3 allows for overflow + detection. + + Args: + public_key (PaillierPublicKey): public key for which to encode + (this is necessary because :attr:`~PaillierPublicKey.n` + varies). + scalar: an int or float to be encrypted. + If int, it must satisfy abs(*value*) < + :attr:`~PaillierPublicKey.n`/3. + If float, it must satisfy abs(*value* / *precision*) << + :attr:`~PaillierPublicKey.n`/3 + (i.e. if a float is near the limit then detectable + overflow may still occur) + precision (float): Choose exponent (i.e. fix the precision) so + that this number is distinguishable from zero. If `scalar` + is a float, then this is set so that minimal precision is + lost. Lower precision leads to smaller encodings, which + might yield faster computation. + max_exponent (int): Ensure that the exponent of the returned + `EncryptedNumber` is at most this. + + Returns: + EncodedNumber: Encoded form of *scalar*, ready for encryption + against *public_key*. + """ + # Calculate the maximum exponent for desired precision + if precision is None: + if isinstance(scalar, int): + prec_exponent = 0 + elif isinstance(scalar, float): + # Encode with *at least* as much precision as the python float + # What's the base-2 exponent on the float? + bin_flt_exponent = math.frexp(scalar)[1] + + # What's the base-2 exponent of the least significant bit? + # The least significant bit has value 2 ** bin_lsb_exponent + bin_lsb_exponent = bin_flt_exponent - cls.FLOAT_MANTISSA_BITS + + # What's the corresponding base BASE exponent? Round that down. + prec_exponent = math.floor(bin_lsb_exponent / cls.LOG2_BASE) + else: + raise TypeError("Don't know the precision of type %s." + % type(scalar)) + else: + prec_exponent = math.floor(math.log(precision, cls.BASE)) + + # Remember exponents are negative for numbers < 1. + # If we're going to store numbers with a more negative + # exponent than demanded by the precision, then we may + # as well bump up the actual precision. + if max_exponent is None: + exponent = prec_exponent + else: + exponent = min(max_exponent, prec_exponent) + + int_rep = int(round(scalar * pow(cls.BASE, -exponent))) + + if abs(int_rep) > public_key.max_int: + raise ValueError('Integer needs to be within +/- %d but got %d' + % (public_key.max_int, int_rep)) + + # Wrap negative numbers by adding n + return cls(public_key, int_rep % public_key.n, exponent) + + + return int_repr, {} + + def decode(self, public_key, encoded, **public_data): + return encoded diff --git a/phe/paillier.py b/phe/paillier.py index 6f698b5..0a40012 100644 --- a/phe/paillier.py +++ b/phe/paillier.py @@ -22,6 +22,9 @@ import hashlib import math import sys + +from phe.encoding import SharedExponentEncoding + try: from collections.abc import Mapping except ImportError: @@ -83,15 +86,13 @@ class PaillierPublicKey(object): g (int): part of the public key - see Paillier's paper. n (int): part of the public key - see Paillier's paper. nsquare (int): :attr:`n` ** 2, stored for frequent use. - max_int (int): Maximum int that may safely be stored. This can be - increased, if you are happy to redefine "safely" and lower the - chance of detecting an integer overflow. + max_int (int): Maximum int that may be stored. """ def __init__(self, g, n): self.g = g self.n = n self.nsquare = n * n - self.max_int = n // 3 - 1 + self.max_int = n def __repr__(self): nsquare = self.nsquare.to_bytes(1024, 'big') @@ -145,23 +146,24 @@ def get_random_lt_n(self): """Return a cryptographically random number less than :attr:`n`""" return random.SystemRandom().randrange(1, self.n) - def encrypt(self, value, precision=None, r_value=None): + def encrypt(self, value, encoding_scheme=None, r_value=None, **encoding_args): """Encode and Paillier encrypt a real number *value*. Args: - value: an int or float to be encrypted. - If int, it must satisfy abs(*value*) < :attr:`n`/3. - If float, it must satisfy abs(*value* / *precision*) << - :attr:`n`/3 - (i.e. if a float is near the limit then detectable - overflow may still occur) - precision (float): Passed to :meth:`EncodedNumber.encode`. - If *value* is a float then *precision* is the maximum - **absolute** error allowed when encoding *value*. Defaults - to encoding *value* exactly. + value: The value to be encrypted. The type and size must be supported + by the encoding scheme. + + encoding_scheme (EncodingScheme): The encoding scheme to use + to encode *value*. The default scheme will encode int and + float but requires the sharing of an exponent. + r_value (int): obfuscator for the ciphertext; by default (i.e. if *r_value* is None), a random value is used. + encoding_args: Any other arguments which are directly passed + to :meth:`encoding_scheme.encode`. For example `precision` is + supported by the default encoding scheme :class:`SharedExponentEncoding`. + Returns: EncryptedNumber: An encryption of *value*. @@ -173,7 +175,13 @@ def encrypt(self, value, precision=None, r_value=None): if isinstance(value, EncodedNumber): encoding = value else: - encoding = EncodedNumber.encode(self, value, precision) + if encoding_scheme is None: + encoding_scheme = SharedExponentEncoding() + + if encoding_scheme.valid_type(type(value)): + int_repr, public_data = encoding_scheme.encode(self, value, **encoding_args) + # TODO remove exponent from EncodedNumber + encoding = EncodedNumber(self, int_repr, public_data['exponent']) return self.encrypt_encoded(encoding, r_value) @@ -364,63 +372,25 @@ def decrypt(self, encrypted_number): class EncodedNumber(object): - """Represents a float or int encoded for Paillier encryption. + """Represents some value encoded for Paillier encryption. - For end users, this class is mainly useful for specifying precision - when adding/multiplying an :class:`EncryptedNumber` by a scalar. + For end users, this class is mainly useful as an intermediate + representation ready to be encrypted using the Paillier + cryptosystem. If you want to manually encode a number for Paillier encryption, - then use :meth:`encode`, if de-serializing then use - :meth:`__init__`. + then use one of the :class:`BaseEncodingScheme` implementations, + if de-serializing an EncodedNumber then use :meth:`__init__`. .. note:: If working with other Paillier libraries you will have to agree on - a specific :attr:`BASE` and :attr:`LOG2_BASE` - inheriting from this - class and overriding those two attributes will enable this. - - Notes: - Paillier encryption is only defined for non-negative integers less - than :attr:`PaillierPublicKey.n`. Since we frequently want to use - signed integers and/or floating point numbers (luxury!), values - should be encoded as a valid integer before encryption. - - The operations of addition and multiplication [1]_ must be - preserved under this encoding. Namely: - - 1. Decode(Encode(a) + Encode(b)) = a + b - 2. Decode(Encode(a) * Encode(b)) = a * b - - for any real numbers a and b. - - Representing signed integers is relatively easy: we exploit the - modular arithmetic properties of the Paillier scheme. We choose to - represent only integers between - +/-:attr:`~PaillierPublicKey.max_int`, where `max_int` is - approximately :attr:`~PaillierPublicKey.n`/3 (larger integers may - be treated as floats). The range of values between `max_int` and - `n` - `max_int` is reserved for detecting overflows. This encoding - scheme supports properties #1 and #2 above. - - Representing floating point numbers as integers is a harder task. - Here we use a variant of fixed-precision arithmetic. In fixed - precision, you encode by multiplying every float by a large number - (e.g. 1e6) and rounding the resulting product. You decode by - dividing by that number. However, this encoding scheme does not - satisfy property #2 above: upon every multiplication, you must - divide by the large number. In a Paillier scheme, this is not - possible to do without decrypting. For some tasks, this is - acceptable or can be worked around, but for other tasks this can't - be worked around. - - In our scheme, the "large number" is allowed to vary, and we keep - track of it. It is: - - :attr:`BASE` ** :attr:`exponent` - - One number has many possible encodings; this property can be used - to mitigate the leak of information due to the fact that - :attr:`exponent` is never encrypted. + a specific EncodingScheme. Some encoding schemes include non-secret + information required for encoding and decoding. For example fixed + point encoding schemes usually raise the value by some `base` and + `exponent`. + + For more details, see :meth:`encode`. @@ -442,6 +412,9 @@ class and overriding those two attributes will enable this. varies) encoding (int): The encoded number to store. Must be positive and less than :attr:`~PaillierPublicKey.max_int`. + scheme (EncodingScheme): The encoding scheme instance used to + encode and decode this value. + exponent (int): Together with :attr:`BASE`, determines the level of fixed-precision used in encoding the number. @@ -451,6 +424,7 @@ class and overriding those two attributes will enable this. varies) encoding (int): The encoded number to store. Must be positive and less than :attr:`~PaillierPublicKey.max_int`. + exponent (int): Together with :attr:`BASE`, determines the level of fixed-precision used in encoding the number. """