From 5153c63d2f3c311bd8272ba9848b296f88b409b4 Mon Sep 17 00:00:00 2001 From: Marcel Keller Date: Thu, 16 Feb 2023 12:34:22 +1100 Subject: [PATCH] More accessible machine learning functionality. --- BMR/Register.h | 2 + CHANGELOG.md | 12 + CONFIG | 2 +- Compiler/GC/types.py | 35 +- Compiler/allocator.py | 39 +- Compiler/compilerLib.py | 180 ++++- Compiler/decision_tree.py | 233 ++++-- Compiler/floatingpoint.py | 4 +- Compiler/instructions.py | 26 +- Compiler/instructions_base.py | 39 +- Compiler/library.py | 355 ++------ Compiler/ml.py | 760 +++++++++++++++--- Compiler/mpc_math.py | 2 +- Compiler/program.py | 74 +- Compiler/sorting.py | 18 + Compiler/sqrt_oram.py | 3 +- Compiler/types.py | 572 +++++++++---- ExternalIO/README.md | 11 +- GC/FakeSecret.cpp | 6 + GC/FakeSecret.h | 2 + GC/Program.hpp | 2 +- GC/RuntimeBranching.h | 3 + GC/Secret.h | 2 + GC/SemiSecret.h | 2 + GC/ShareSecret.h | 5 +- GC/ThreadMaster.hpp | 3 +- GC/instructions.h | 2 +- License.txt | 2 +- Machines/TripleMachine.cpp | 4 + Makefile | 42 +- Math/bigint.h | 12 + Math/gf2n.cpp | 27 +- Math/gf2n.h | 4 +- Math/gf2nlong.cpp | 28 +- Math/gf2nlong.h | 15 - Math/gfp.h | 1 + Networking/Exchanger.h | 3 + Networking/ServerSocket.cpp | 25 +- Networking/sockets.cpp | 2 +- Networking/sockets.h | 21 +- Networking/ssl_sockets.h | 2 +- OT/BitMatrix.h | 3 - OT/OTVole.hpp | 6 +- Processor/Data_Files.hpp | 5 +- Processor/ExternalClients.cpp | 4 + Processor/ExternalClients.h | 3 + Processor/Instruction.h | 1 + Processor/Instruction.hpp | 45 +- Processor/Machine.h | 5 + Processor/Machine.hpp | 62 +- Processor/OnlineOptions.cpp | 7 +- Processor/PrepBase.cpp | 11 +- Processor/Processor.h | 25 +- Processor/Processor.hpp | 96 ++- Processor/Program.cpp | 12 + Processor/Program.h | 5 + Processor/instructions.h | 2 +- Programs/Source/bankers_bonus.mpc | 3 +- Programs/Source/breast_logistic.mpc | 54 ++ Programs/Source/breast_tree.mpc | 33 + Programs/Source/diabetes.mpc | 32 + Programs/Source/easy_adult.mpc | 38 + Programs/Source/keras_cifar_lenet.mpc | 41 +- Programs/Source/keras_mnist_dense.mpc | 34 +- Programs/Source/keras_mnist_lenet.mpc | 34 +- Programs/Source/keras_mnist_lenet_predict.mpc | 2 +- Programs/Source/test_sbitfix.mpc | 3 +- Programs/Source/torch_alex_test.mpc | 92 +++ Programs/Source/torch_cifar_alex.mpc | 70 ++ Programs/Source/torch_cifar_lenet.mpc | 57 ++ .../Source/torch_cifar_lenet_pretrain.mpc | 81 ++ Programs/Source/torch_mnist_dense.mpc | 57 ++ .../Source/torch_mnist_dense_pretrain.mpc | 72 ++ Programs/Source/torch_mnist_dense_test.mpc | 40 + Programs/Source/torch_mnist_lenet.mpc | 49 ++ Programs/Source/torch_mnist_lenet_predict.mpc | 74 ++ Protocols/FakeShare.h | 1 + Protocols/Hemi.hpp | 4 +- Protocols/SemiInput.hpp | 2 +- Protocols/ShareInterface.h | 2 + README.md | 101 ++- Scripts/build.sh | 7 +- Scripts/compile-emulate.py | 17 + Scripts/compile-run.py | 23 + Scripts/memory-usage.py | 15 +- Scripts/setup-clients.sh | 3 + Scripts/setup-ssl.sh | 2 +- Scripts/test_tutorial.sh | 10 +- Scripts/tldr.sh | 3 +- Scripts/torch_cifar_alex_import.py | 61 ++ Scripts/torch_mnist_dense_import.py | 46 ++ Scripts/torch_mnist_lenet_import.py | 51 ++ Tools/FlexBuffer.cpp | 5 +- Tools/Hash.cpp | 5 + Tools/Hash.h | 1 + Tools/Lock.h | 17 + Tools/ezOptionParser.h | 20 +- Yao/YaoEvalWire.cpp | 9 + Yao/YaoEvalWire.h | 2 + Yao/YaoEvaluator.cpp | 8 +- Yao/YaoEvaluator.h | 2 +- Yao/YaoGarbleWire.cpp | 9 + Yao/YaoGarbleWire.h | 2 + Yao/YaoGarbler.cpp | 5 +- Yao/YaoGarbler.h | 2 +- Yao/YaoPlayer.cpp | 2 +- deps/libOTe | 2 +- doc/Compiler.rst | 8 + doc/Doxyfile | 2 +- doc/add-protocol.rst | 12 +- doc/compilation.rst | 8 +- doc/gen-readme.sh | 7 +- doc/index.rst | 14 +- doc/io.rst | 4 +- doc/low-level.rst | 6 + doc/machine-learning.rst | 472 ++++++++++- doc/ml-quickstart.rst | 92 +++ doc/networking.rst | 2 + doc/troubleshooting.rst | 20 + 119 files changed, 3857 insertions(+), 969 deletions(-) create mode 100644 Programs/Source/breast_logistic.mpc create mode 100644 Programs/Source/breast_tree.mpc create mode 100644 Programs/Source/diabetes.mpc create mode 100644 Programs/Source/easy_adult.mpc create mode 100644 Programs/Source/torch_alex_test.mpc create mode 100644 Programs/Source/torch_cifar_alex.mpc create mode 100644 Programs/Source/torch_cifar_lenet.mpc create mode 100644 Programs/Source/torch_cifar_lenet_pretrain.mpc create mode 100644 Programs/Source/torch_mnist_dense.mpc create mode 100644 Programs/Source/torch_mnist_dense_pretrain.mpc create mode 100644 Programs/Source/torch_mnist_dense_test.mpc create mode 100644 Programs/Source/torch_mnist_lenet.mpc create mode 100644 Programs/Source/torch_mnist_lenet_predict.mpc create mode 100755 Scripts/compile-emulate.py create mode 100755 Scripts/compile-run.py create mode 100755 Scripts/torch_cifar_alex_import.py create mode 100755 Scripts/torch_mnist_dense_import.py create mode 100755 Scripts/torch_mnist_lenet_import.py create mode 100644 doc/ml-quickstart.rst diff --git a/BMR/Register.h b/BMR/Register.h index 4def65901..2085eb25a 100644 --- a/BMR/Register.h +++ b/BMR/Register.h @@ -296,6 +296,8 @@ class ProgramRegister : public Phase, public Register static void andm(GC::Processor&, const BaseInstruction&) { throw runtime_error("andm not implemented"); } + static void run_tapes(const vector&) { throw not_implemented(); } + // most BMR phases don't need actual input template static T get_input(GC::Processor& processor, const InputArgs& args) diff --git a/CHANGELOG.md b/CHANGELOG.md index f201d4640..9a3a276d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ The changelog explains changes pulled through from the private development repository. Bug fixes and small enhancements are committed between releases and not documented here. +## 0.3.5 (Feb 16, 2023) + +- Easier-to-use machine learning interface +- Integrated compilation-execution facility +- Import/export sequential models and parameters from/to PyTorch +- Binary-format input files +- Less aggressive round optimization for faster compilation by default +- Multithreading with client interface +- Functionality to protect order of specific memory accesses +- Oblivious transfer works again on older (pre-2011) x86 CPUs +- clang is used by default + ## 0.3.4 (Nov 9, 2022) - Decision tree learning diff --git a/CONFIG b/CONFIG index 0d41c9ef7..6d5f0f170 100644 --- a/CONFIG +++ b/CONFIG @@ -47,7 +47,7 @@ endif USE_KOS = 0 # allow to set compiler in CONFIG.mine -CXX = g++ +CXX = clang++ # use CONFIG.mine to overwrite DIR settings -include CONFIG.mine diff --git a/Compiler/GC/types.py b/Compiler/GC/types.py index f70ee6417..1c024c7e3 100644 --- a/Compiler/GC/types.py +++ b/Compiler/GC/types.py @@ -711,16 +711,23 @@ def n_elements(): def mem_size(): return n @classmethod - def get_input_from(cls, player): + def get_input_from(cls, player, size=1, f=0): """ Secret input from :py:obj:`player`. The input is decomposed into bits. :param: player (int) """ + v = [0] * n sbits._check_input_player(player) - res = cls.from_vec(sbit() for i in range(n)) - inst.inputbvec(n + 3, 0, player, *res.v) - return res + instructions_base.check_vector_size(size) + for i in range(size): + vv = [sbit() for i in range(n)] + inst.inputbvec(n + 3, f, player, *vv) + for j in range(n): + tmp = vv[j] << i + v[j] = tmp ^ v[j] + sbits._check_input_player(player) + return cls.from_vec(v) get_raw_input_from = get_input_from @classmethod def from_vec(cls, vector): @@ -728,6 +735,7 @@ def from_vec(cls, vector): res.v = _complement_two_extend(list(vector), n)[:n] return res def __init__(self, other=None, size=None): + instructions_base.check_vector_size(size) if other is not None: if util.is_constant(other): t = sbits.get_type(size or 1) @@ -1148,6 +1156,9 @@ class sbitint(_bitint, _number, sbits, _sbitintbase): mul: 15 lt: 0 + This class is retained for compatibility, but development now + focuses on :py:class:`sbitintvec`. + """ n_bits = None bin_type = None @@ -1347,9 +1358,12 @@ def output(self): cbits(0), cbits(0)) class sbitfix(_fix): - """ Secret signed integer in one binary register. + """ Secret signed fixed-point number in one binary register. Use :py:obj:`set_precision()` to change the precision. + This class is retained for compatibility, but development now + focuses on :py:class:`sbitfixvec`. + Example:: print_ln('add: %s', (sbitfix(0.5) + sbitfix(0.3)).reveal()) @@ -1453,15 +1467,8 @@ def get_input_from(cls, player, size=1): :param: player (int) """ - v = [0] * sbitfix.k - sbits._check_input_player(player) - for i in range(size): - vv = [sbit() for i in range(sbitfix.k)] - inst.inputbvec(len(v) + 3, sbitfix.f, player, *vv) - for j in range(sbitfix.k): - tmp = vv[j] << i - v[j] = tmp ^ v[j] - return cls._new(cls.int_type.from_vec(v)) + return cls._new(cls.int_type.get_input_from(player, size=size, + f=sbitfix.f)) def __init__(self, value=None, *args, **kwargs): if isinstance(value, (list, tuple)): self.v = self.int_type.from_vec(sbitvec([x.v for x in value])) diff --git a/Compiler/allocator.py b/Compiler/allocator.py index b68160434..980a189a4 100644 --- a/Compiler/allocator.py +++ b/Compiler/allocator.py @@ -315,7 +315,6 @@ def dependency_graph(self, merge_classes): last_def = defaultdict_by_id(lambda: -1) last_mem_write = [] last_mem_read = [] - warned_about_mem = [] last_mem_write_of = defaultdict(list) last_mem_read_of = defaultdict(list) last_print_str = None @@ -364,20 +363,22 @@ def mem_access(n, instr, last_access_this_kind, last_access_other_kind): addr_i = addr + i handle_mem_access(addr_i, reg_type, last_access_this_kind, last_access_other_kind) - if block.warn_about_mem and not warned_about_mem and \ - (instr.get_size() > 100): + if block.warn_about_mem and \ + not block.parent.warned_about_mem and \ + (instr.get_size() > 100) and not instr._protect: print('WARNING: Order of memory instructions ' \ 'not preserved due to long vector, errors possible') - warned_about_mem.append(True) + block.parent.warned_about_mem = True else: handle_mem_access(addr, reg_type, last_access_this_kind, last_access_other_kind) - if block.warn_about_mem and not warned_about_mem and \ - not isinstance(instr, DirectMemoryInstruction): + if block.warn_about_mem and \ + not block.parent.warned_about_mem and \ + not isinstance(instr, DirectMemoryInstruction) and \ + not instr._protect: print('WARNING: Order of memory instructions ' \ 'not preserved, errors possible') - # hack - warned_about_mem.append(True) + block.parent.warned_about_mem = True def strict_mem_access(n, last_this_kind, last_other_kind): if last_other_kind and last_this_kind and \ @@ -473,14 +474,14 @@ def keep_text_order(inst, n): depths[n] = depth if isinstance(instr, ReadMemoryInstruction): - if options.preserve_mem_order: + if options.preserve_mem_order or instr._protect: strict_mem_access(n, last_mem_read, last_mem_write) - else: + elif not options.preserve_mem_order: mem_access(n, instr, last_mem_read_of, last_mem_write_of) elif isinstance(instr, WriteMemoryInstruction): - if options.preserve_mem_order: + if options.preserve_mem_order or instr._protect: strict_mem_access(n, last_mem_write, last_mem_read) - else: + elif not options.preserve_mem_order: mem_access(n, instr, last_mem_write_of, last_mem_read_of) elif isinstance(instr, matmulsm): if options.preserve_mem_order: @@ -495,7 +496,7 @@ def keep_text_order(inst, n): add_edge(last_print_str, n) last_print_str = n elif isinstance(instr, PublicFileIOInstruction): - keep_order(instr, n, instr.__class__) + keep_order(instr, n, PublicFileIOInstruction) elif isinstance(instr, prep_class): keep_order(instr, n, instr.args[0]) elif isinstance(instr, StackInstruction): @@ -586,7 +587,7 @@ class RegintOptimizer: def __init__(self): self.cache = util.dict_by_id() - def run(self, instructions): + def run(self, instructions, program): for i, inst in enumerate(instructions): if isinstance(inst, ldint_class): self.cache[inst.args[0]] = inst.args[1] @@ -601,6 +602,7 @@ def run(self, instructions): elif isinstance(inst, IndirectMemoryInstruction): if inst.args[1] in self.cache: instructions[i] = inst.get_direct(self.cache[inst.args[1]]) + instructions[i]._protect = inst._protect elif type(inst) == convint_class: if inst.args[1] in self.cache: res = self.cache[inst.args[1]] @@ -614,4 +616,13 @@ def run(self, instructions): if op == 0: instructions[i] = ldsi(inst.args[0], 0, add_to_prog=False) + elif isinstance(inst, (crash, cond_print_str, cond_print_plain)): + if inst.args[0] in self.cache: + cond = self.cache[inst.args[0]] + if not cond: + instructions[i] = None + pre = len(instructions) instructions[:] = list(filter(lambda x: x is not None, instructions)) + post = len(instructions) + if pre != post and program.options.verbose: + print('regint optimizer removed %d instructions' % (pre - post)) diff --git a/Compiler/compilerLib.py b/Compiler/compilerLib.py index 462a5d108..bb80dc344 100644 --- a/Compiler/compilerLib.py +++ b/Compiler/compilerLib.py @@ -3,6 +3,7 @@ import re import sys import tempfile +import subprocess from optparse import OptionParser from Compiler.exceptions import CompilerError @@ -12,11 +13,12 @@ class Compiler: - def __init__(self, custom_args=None, usage=None): + def __init__(self, custom_args=None, usage=None, execute=False): if usage: self.usage = usage else: self.usage = "usage: %prog [options] filename [args]" + self.execute = execute self.custom_args = custom_args self.build_option_parser() self.VARS = {} @@ -72,7 +74,8 @@ def build_option_parser(self): "--optimize-hard", action="store_true", dest="optimize_hard", - help="currently not in use", + help="lower number of rounds at higher compilation cost " + "(disables -C and increases the budget to 100000)", ) parser.add_option( "-u", @@ -157,8 +160,8 @@ def build_option_parser(self): "-b", "--budget", dest="budget", - default=defaults.budget, - help="set budget for optimized loop unrolling " "(default: 100000)", + help="set budget for optimized loop unrolling (default: %d)" % \ + defaults.budget, ) parser.add_option( "-X", @@ -195,7 +198,8 @@ def build_option_parser(self): "--CISC", action="store_true", dest="cisc", - help="faster CISC compilation mode", + help="faster CISC compilation mode " + "(used by default unless -O is given)", ) parser.add_option( "-K", @@ -217,15 +221,62 @@ def build_option_parser(self): dest="verbose", help="more verbose output", ) + if self.execute: + parser.add_option( + "-E", + "--execute", + dest="execute", + help="protocol to execute with", + ) + parser.add_option( + "-H", + "--hostfile", + dest="hostfile", + help="hosts to execute with", + ) self.parser = parser def parse_args(self): self.options, self.args = self.parser.parse_args(self.custom_args) - if self.options.optimize_hard: - print("Note that -O/--optimize-hard currently has no effect") + if self.execute: + if not self.options.execute: + raise CompilerError("must give name of protocol with '-E'") + protocol = self.options.execute + if protocol.find("ring") >= 0 or protocol.find("2k") >= 0 or \ + protocol.find("brain") >= 0 or protocol == "emulate": + if not (self.options.ring or self.options.binary): + self.options.ring = "64" + if self.options.field: + raise CompilerError( + "field option not compatible with %s" % protocol) + else: + if protocol.find("bin") >= 0 or protocol.find("ccd") >= 0 or \ + protocol.find("bmr") >= 0 or \ + protocol in ("replicated", "tinier", "tiny", "yao"): + if not self.options.binary: + self.options.binary = "32" + if self.options.ring or self.options.field: + raise CompilerError( + "ring/field options not compatible with %s" % + protocol) + if self.options.ring: + raise CompilerError( + "ring option not compatible with %s" % protocol) + if protocol == "emulate": + self.options.keep_cisc = '' def build_program(self, name=None): self.prog = Program(self.args, self.options, name=name) + if self.execute: + if self.options.execute in \ + ("emulate", "ring", "rep-field", "semi2k"): + self.prog.use_trunc_pr = True + if self.options.execute in ("ring",): + self.prog.use_split(3) + if self.options.execute in ("semi2k",): + self.prog.use_split(2) + if self.options.execute in ("rep4-ring",): + self.prog.use_split(4) def build_vars(self): from . import comparison, floatingpoint, instructions, library, types @@ -283,11 +334,15 @@ def build_vars(self): ]: del self.VARS[i] - def prep_compile(self, name=None): + def prep_compile(self, name=None, build=True): self.parse_args() if len(self.args) < 1 and name is None: self.parser.print_help() exit(1) + if build: + self.build(name=name) + + def build(self, name=None): self.build_program(name=name) self.build_vars() @@ -307,7 +362,7 @@ def compile_file(self): if if_stack and not re.match(if_stack[-1][0], line): if_stack.pop() m = re.match( - r"(\s*)for +([a-zA-Z_]+) +in " r"+range\(([0-9a-zA-Z_]+)\):", + r"(\s*)for +([a-zA-Z_]+) +in " r"+range\(([0-9a-zA-Z_.]+)\):", line, ) if m: @@ -403,3 +458,110 @@ def finalize_compile(self): print("Memory size:", dict(self.prog.allocated_mem)) return self.prog + + @staticmethod + def executable_from_protocol(protocol): + match = { + "ring": "replicated-ring", + "rep-field": "replicated-field", + "replicated": "replicated-bin" + } + if protocol in match: + protocol = match[protocol] + if protocol.find("bmr") == -1: + protocol = re.sub("^mal-", "malicious-", protocol) + if protocol == "emulate": + return protocol + ".x" + else: + return protocol + "-party.x" + + def local_execution(self, args=[]): + executable = self.executable_from_protocol(self.options.execute) + if not os.path.exists(executable): + print("Creating binary for virtual machine...") + try: + subprocess.run(["make", executable], check=True) + except: + raise CompilerError( + "Cannot produce %s. " % executable + \ + "Note that compilation requires a few GB of RAM.") + vm = 'Scripts/%s.sh' % self.options.execute + os.execl(vm, vm, self.prog.name, *args) + + def remote_execution(self, args=[]): + vm = self.executable_from_protocol(self.options.execute) + hosts = list(x.strip() + for x in filter(None, open(self.options.hostfile))) + # test availability before compilation + from fabric import Connection + import subprocess + print("Creating static binary for virtual machine...") + subprocess.run(["make", "static/%s" % vm], check=True) + + # transfer files + import glob + hostnames = [] + destinations = [] + for host in hosts: + split = host.split('/', maxsplit=1) + hostnames.append(split[0]) + if len(split) > 1: + destinations.append(split[1]) + else: + destinations.append('.') + connections = [Connection(hostname) for hostname in hostnames] + print("Setting up players...") + + def run(i): + dest = destinations[i] + connection = connections[i] + connection.run( + "mkdir -p %s/{Player-Data,Programs/{Bytecode,Schedules}} " % \ + dest) + # executable + connection.put("static/%s" % vm, dest) + # program + dest += "/" + connection.put("Programs/Schedules/%s.sch" % self.prog.name, + dest + "Programs/Schedules") + for filename in glob.glob( + "Programs/Bytecode/%s-*.bc" % self.prog.name): + connection.put(filename, dest + "Programs/Bytecode") + # inputs + for filename in glob.glob("Player-Data/Input*-P%d-*" % i): + connection.put(filename, dest + "Player-Data") + # key and certificates + for suffix in ('key', 'pem'): + connection.put("Player-Data/P%d.%s" % (i, suffix), + dest + "Player-Data") + for filename in glob.glob("Player-Data/*.0"): + connection.put(filename, dest + "Player-Data") + + import threading + import random + threads = [] + for i in range(len(hosts)): + threads.append(threading.Thread(target=run, args=(i,))) + for thread in threads: + thread.start() + for thread in threads: + thread.join() + + # execution + threads = [] + # random port numbers to avoid conflict + port = 10000 + random.randrange(40000) + if '@' in hostnames[0]: + party0 = hostnames[0].split('@')[1] + else: + party0 = hostnames[0] + for i in range(len(connections)): + run = lambda i: connections[i].run( + "cd %s; ./%s -p %d %s -h %s -pn %d %s" % \ + (destinations[i], vm, i, self.prog.name, party0, port, + ' '.join(args))) + threads.append(threading.Thread(target=run, args=(i,))) + for thread in threads: + thread.start() + for thread in threads: + thread.join() diff --git a/Compiler/decision_tree.py b/Compiler/decision_tree.py index 89e3fe5c7..7e25f1591 100644 --- a/Compiler/decision_tree.py +++ b/Compiler/decision_tree.py @@ -8,7 +8,6 @@ debug = False debug_split = False -debug_layers = False max_leaves = None def get_type(x): @@ -70,26 +69,35 @@ def Sort(keys, *to_sort, n_bits=None, time=False): bs = Matrix.create_from( sum([k.get_vector().bit_decompose(nb) for k, nb in reversed(list(zip(keys, n_bits)))], [])) - res = Matrix.create_from(to_sort) + get_vec = lambda x: x[:] if isinstance(x, Array) else x + res = Matrix.create_from(get_vec(x).v if isinstance(get_vec(x), sfix) else x + for x in to_sort) res = res.transpose() if time: start_timer(11) - print_ln('sort') radix_sort_from_matrix(bs, res) if time: stop_timer(11) stop_timer(1) - return res.transpose() + res = res.transpose() + return [sfix._new(get_vec(x), k=get_vec(y).k, f=get_vec(y).f) + if isinstance(get_vec(y), sfix) + else x for (x, y) in zip(res, to_sort)] -def VectMax(key, *data): +def VectMax(key, *data, debug=False): def reducer(x, y): b = x[0] > y[0] + if debug: + print_ln('max b=%s', b.reveal()) return [b.if_else(xx, yy) for xx, yy in zip(x, y)] if debug: key = list(key) data = [list(x) for x in data] print_ln('vect max key=%s data=%s', util.reveal(key), util.reveal(data)) - return util.tree_reduce(reducer, zip(key, *data))[1:] + res = util.tree_reduce(reducer, zip(key, *data))[1:] + if debug: + print_ln('vect max res=%s', util.reveal(res)) + return res def GroupSum(g, x): assert len(g) == len(x) @@ -161,19 +169,19 @@ def ModifiedGini(g, y, debug=False): wqs = w[0] ** 2 + w[1] ** 2 res = sfix(uqs) / us + sfix(wqs) / ws if debug: + print_ln('g=%s y=%s s=%s', + util.reveal(g), util.reveal(y), + util.reveal(s)) print_ln('u0=%s', util.reveal(u[0])) print_ln('u0=%s', util.reveal(u[1])) print_ln('us=%s', util.reveal(us)) print_ln('w0=%s', util.reveal(w[0])) print_ln('w1=%s', util.reveal(w[1])) print_ln('ws=%s', util.reveal(ws)) - print_ln('p=%s', util.reveal(p)) - print_ln('q=%s', util.reveal(q)) - print_ln('g=%s y=%s s=%s', - util.reveal(g), util.reveal(y), - util.reveal(s)) + print_ln('uqs=%s', util.reveal(uqs)) + print_ln('wqs=%s', util.reveal(wqs)) if debug: - print_ln('gini %s %s', str(res), util.reveal(res)) + print_ln('gini %s %s', type(res), util.reveal(res)) return res MIN_VALUE = -10000 @@ -181,11 +189,16 @@ def ModifiedGini(g, y, debug=False): def FormatLayer(h, g, *a): return CropLayer(h, *FormatLayer_without_crop(g, *a)) -def FormatLayer_without_crop(g, *a): +def FormatLayer_without_crop(g, *a, debug=False): for x in a: assert len(x) == len(g) v = [g.if_else(aa, 0) for aa in a] + if debug: + print_ln('format in %s', util.reveal(a)) + print_ln('format mux %s', util.reveal(v)) v = Sort([g.bit_not()], *v, n_bits=[1]) + if debug: + print_ln('format sort %s', util.reveal(v)) return v def CropLayer(k, *v): @@ -243,8 +256,9 @@ def ApplyTests(self, x, AID, Threshold): def _(j): e[j][:] = AID[:] == j xx = sum(x[j] * e[j] for j in range(m)) - if debug: + if self.debug > 1: print_ln('apply e=%s xx=%s', util.reveal(e), util.reveal(xx)) + print_ln('threshold %s', util.reveal(Threshold)) return 2 * xx < Threshold def AttributeWiseTestSelection(self, g, x, y, time=False, debug=False): @@ -252,10 +266,10 @@ def AttributeWiseTestSelection(self, g, x, y, time=False, debug=False): assert len(g) == len(y) if time: start_timer(2) - s = ModifiedGini(g, y, debug=debug) + s = ModifiedGini(g, y, debug=debug or self.debug > 2) if time: stop_timer(2) - if debug: + if debug or self.debug > 1: print_ln('gini %s', s.reveal()) xx = x t = get_type(x).Array(len(x)) @@ -296,35 +310,46 @@ def GlobalTestSelection(self, x, y, g): @for_range_multithread(self.n_threads, 1, m) def _(j): single = not self.n_threads or self.n_threads == 1 - print_ln('run %s', j) + time = self.time and single + if debug: + print_ln('run %s', j) @if_e(self.attr_lengths[j]) def _(): u[j][:], v[j][:] = Sort((PrefixSum(g), x[j]), x[j], y, - n_bits=[util.log2(n), 1], time=single) + n_bits=[util.log2(n), 1], time=time) @else_ def _(): u[j][:], v[j][:] = Sort((PrefixSum(g), x[j]), x[j], y, n_bits=[util.log2(n), None], - time=single) + time=time) if self.debug_threading: print_ln('global sort %s %s %s', j, util.reveal(u[j]), util.reveal(v[j])) t[j][:], s[j][:] = self.AttributeWiseTestSelection( - g, u[j], v[j], time=single, debug=self.debug_selection) + g, u[j], v[j], time=time, debug=self.debug_selection) if self.debug_threading: print_ln('global attribute %s %s %s', j, util.reveal(t[j]), util.reveal(s[j])) n = len(g) - a, tt = [sint.Array(n) for i in range(2)] + a = sint.Array(n) if self.debug_threading: print_ln('global s=%s', util.reveal(s)) if self.debug_gini: print_ln('Gini indices ' + ' '.join(str(i) + ':%s' for i in range(m)), *(ss[0].reveal() for ss in s)) - start_timer(4) - a[:], tt[:] = VectMax((s[j][:] for j in range(m)), range(m), - (t[j][:] for j in range(m))) - stop_timer(4) + if self.time: + start_timer(4) + if self.debug > 1: + print_ln('s=%s', s.reveal_nested()) + print_ln('t=%s', t.reveal_nested()) + a[:], tt = VectMax((s[j][:] for j in range(m)), range(m), + (t[j][:] for j in range(m)), debug=self.debug > 1) + tt = Array.create_from(tt) + if self.time: + stop_timer(4) + if self.debug > 1: + print_ln('a=%s', util.reveal(a)) + print_ln('tt=%s', util.reveal(tt)) return a[:], tt[:] def TrainInternalNodes(self, k, x, y, g, NID): @@ -333,13 +358,18 @@ def TrainInternalNodes(self, k, x, y, g, NID): assert len(xx) == len(g) AID, Threshold = self.GlobalTestSelection(x, y, g) s = GroupSame(g[:], y[:]) - if debug or debug_split: + if self.debug > 1 or debug_split: print_ln('AID=%s', util.reveal(AID)) print_ln('Threshold=%s', util.reveal(Threshold)) print_ln('GroupSame=%s', util.reveal(s)) AID, Threshold = s.if_else(0, AID), s.if_else(MIN_VALUE, Threshold) + if self.debug > 1 or debug_split: + print_ln('AID=%s', util.reveal(AID)) + print_ln('Threshold=%s', util.reveal(Threshold)) b = self.ApplyTests(x, AID, Threshold) - return FormatLayer_without_crop(g[:], NID, AID, Threshold), b + layer = FormatLayer_without_crop(g[:], NID, AID, Threshold, + debug=self.debug > 1) + return *layer, b @method_block def train_layer(self, k): @@ -347,19 +377,21 @@ def train_layer(self, k): y = self.y g = self.g NID = self.NID - layer_matrix = self.layer_matrix - self.layer_matrix[k], b = \ + if self.debug > 1: + print_ln('g=%s', g.reveal()) + print_ln('y=%s', y.reveal()) + print_ln('x=%s', x.reveal_nested()) + self.nids[k], self.aids[k], self.thresholds[k], b = \ self.TrainInternalNodes(k, x, y, g, NID) - if debug: - print_ln('internal %s %s', - util.reveal(layer_matrix[k]), util.reveal(b)) - if debug_layers: + if self.debug > 1: print_ln('layer %s:', k) - for name, data in zip(('NID', 'AID', 'Thr'), layer_matrix[k]): + for name, data in zip(('NID', 'AID', 'Thr'), + (self.nids[k], self.aids[k], + self.thresholds[k])): print_ln(' %s: %s', name, data.reveal()) NID[:] = 2 ** k * b + NID b_not = b.bit_not() - if debug: + if self.debug > 1: print_ln('b_not=%s', b_not.reveal()) g[:] = GroupFirstOne(g, b_not) + GroupFirstOne(g, b) y[:], g[:], NID[:], *xx = Sort([b], y, g, NID, *x, n_bits=[1]) @@ -388,33 +420,38 @@ def __init__(self, x, y, h, binary=False, attr_lengths=None, self.NID.assign_all(1) self.y = Array.create_from(y) self.x = Matrix.create_from(x) - self.layer_matrix = sint.Tensor([h, 3, n]) + self.nids, self.aids = [sint.Matrix(h, n) for i in range(2)] + self.thresholds = self.x.value_type.Matrix(h, n) self.n_threads = n_threads self.debug_selection = False self.debug_threading = False - self.debug_gini = True + self.debug_gini = False + self.debug = False + self.time = False def train(self): """ Train and return decision tree. """ - h = len(self.layer_matrix) + h = len(self.nids) @for_range(h) def _(k): self.train_layer(k) return self.get_tree(h) - def train_with_testing(self, *test_set): + def train_with_testing(self, *test_set, output=False): """ Train decision tree and test against test data. :param y: binary labels (list or sint vector) :param x: sample data (by attribute, list or :py:obj:`~Compiler.types.Matrix`) + :param output: output tree after every level :returns: tree """ - for k in range(len(self.layer_matrix)): + for k in range(len(self.nids)): self.train_layer(k) tree = self.get_tree(k + 1) - output_decision_tree(tree) + if output: + output_decision_tree(tree) test_decision_tree('train', tree, self.y, self.x, n_threads=self.n_threads) if test_set: @@ -425,7 +462,8 @@ def train_with_testing(self, *test_set): def get_tree(self, h): Layer = [None] * (h + 1) for k in range(h): - Layer[k] = CropLayer(k, *self.layer_matrix[k]) + Layer[k] = CropLayer(k, self.nids[k], self.aids[k], + self.thresholds[k]) Layer[h] = TrainLeafNodes(h, self.g[:], self.y[:], self.NID) return Layer @@ -479,8 +517,9 @@ def run_decision_tree(layers, data): bits = layers[h][0].equal(index, h) return pick(bits, layers[h][1]) -def test_decision_tree(name, layers, y, x, n_threads=None): - start_timer(100) +def test_decision_tree(name, layers, y, x, n_threads=None, time=False): + if time: + start_timer(100) n = len(y) x = x.transpose().reveal() y = y.reveal() @@ -488,7 +527,8 @@ def test_decision_tree(name, layers, y, x, n_threads=None): truth = regint.Array(n) correct = regint.Array(2) parts = regint.Array(2) - layers = [Matrix.create_from(util.reveal(layer)) for layer in layers] + layers = [[Array.create_from(util.reveal(x)) for x in layer] + for layer in layers] @for_range_multithread(n_threads, 1, n) def _(i): guess[i] = run_decision_tree([[part[:] for part in layer] @@ -501,4 +541,105 @@ def _(i): correct[truth[i]] += c print_ln('%s for height %s: %s/%s (%s/%s, %s/%s)', name, len(layers) - 1, sum(correct), n, correct[0], parts[0], correct[1], parts[1]) - stop_timer(100) + if time: + stop_timer(100) + +class TreeClassifier: + """ Tree classification with convenient interface. Uses + :py:class:`TreeTrainer` internally. + + :param max_depth: the depth of the decision tree + + """ + def __init__(self, max_depth): + self.max_depth = max_depth + + @staticmethod + def get_attr_lengths(attr_types): + if attr_types == None: + return None + else: + return [1 if x == 'b' else 0 for x in attr_types] + + def fit(self, X, y, attr_types=None): + """ Train tree. + + :param X: sample data with row-wise samples (sint/sfix matrix) + :param y: binary labels (sint list/array) + + """ + self.tree = TreeTrainer( + X.transpose(), y, self.max_depth, + attr_lengths=self.get_attr_lengths(attr_types)).train() + + def fit_with_testing(self, X_train, y_train, X_test, y_test, + attr_types=None, output_trees=False, debug=False): + """ Train tree with accuracy output after every level. + + :param X_train: training data with row-wise samples (sint/sfix matrix) + :param y_train: training binary labels (sint list/array) + :param X_test: testing data with row-wise samples (sint/sfix matrix) + :param y_test: testing binary labels (sint list/array) + :param attr_types: attributes types (list of 'b'/'c' for + binary/continuous; default is all continuous) + :param output_trees: output tree after every level + :param debug: output debugging information + + """ + trainer = TreeTrainer(X_train.transpose(), y_train, self.max_depth, + attr_lengths=self.get_attr_lengths(attr_types)) + trainer.debug = debug + trainer.debug_gini = debug + trainer.debug_threading = debug > 1 + self.tree = trainer.train_with_testing(y_test, X_test.transpose(), + output=output_trees) + + def predict(self, X): + """ Use tree for prediction. + + :param X: sample data with row-wise samples (sint/sfix matrix) + :returns: sint array + + """ + res = sint.Array(len(X)) + @for_range(len(X)) + def _(i): + res[i] = run_decision_tree(self.tree, X[i]) + return res + + def output(self): + """ Output decision tree. """ + output_decision_tree(self.tree) + +def preprocess_pandas(data): + """ Preprocess pandas data frame to suit + :py:class:`TreeClassifier` by expanding non-continuous attributes + to several binary attributes as a unary encoding. + + :returns: a tuple of the processed data and a type list for the + :py:obj:`attr_types` argument. + + """ + import pandas + import numpy + res = [] + types = [] + for i, t in enumerate(data.dtypes): + if pandas.api.types.is_int64_dtype(t): + res.append(data.iloc[:,i].to_numpy()) + types.append('c') + elif pandas.api.types.is_object_dtype(t): + values = data.iloc[:,i].unique() + print('converting the following to unary:', values) + if len(values) == 2: + res.append(data.iloc[:,i].to_numpy() == values[1]) + types.append('b') + else: + for value in values: + res.append(data.iloc[:,i].to_numpy() == value) + types.append('b') + else: + raise CompilerError('unknown pandas type: ' + t) + res = numpy.array(res) + res = numpy.swapaxes(res, 0, 1) + return res, types diff --git a/Compiler/floatingpoint.py b/Compiler/floatingpoint.py index 7786f73c8..f44d95cbe 100644 --- a/Compiler/floatingpoint.py +++ b/Compiler/floatingpoint.py @@ -319,7 +319,7 @@ def Pow2(a, l, kappa): def Pow2_from_bits(bits): m = len(bits) t = list(bits) - pow2k = [types.cint() for i in range(m)] + pow2k = [None for i in range(m)] for i in range(m): pow2k[i] = two_power(2**i) t[i] = t[i]*pow2k[i] + 1 - t[i] @@ -641,7 +641,7 @@ def BitDecFull(a, n_bits=None, maybe_mixed=False): n_bits = n_bits or bit_length assert n_bits <= bit_length logp = int(round(math.log(p, 2))) - if abs(p - 2 ** logp) / p < 2 ** -get_program().security: + if get_program().rabbit_gap(): # inspired by Rabbit (https://eprint.iacr.org/2021/119) # no need for exact randomness generation # if modulo a power of two is close enough diff --git a/Compiler/instructions.py b/Compiler/instructions.py index c51318322..e50bc9fd8 100644 --- a/Compiler/instructions.py +++ b/Compiler/instructions.py @@ -295,6 +295,7 @@ class movint(base.Instruction): @base.vectorize class pushint(base.StackInstruction): """ Pushes clear integer register to the thread-local stack. + Considered obsolete. :param: source (regint) """ @@ -304,6 +305,7 @@ class pushint(base.StackInstruction): @base.vectorize class popint(base.StackInstruction): """ Pops from the thread-local stack to clear integer register. + Considered obsolete. :param: destination (regint) """ @@ -385,7 +387,7 @@ class use(base.Instruction): :param: number (int, -1 for unknown) """ code = base.opcodes['USE'] - arg_format = ['int','int','int'] + arg_format = ['int','int','long'] @classmethod def get_usage(cls, args): @@ -404,7 +406,7 @@ class use_inp(base.Instruction): :param: number (int, -1 for unknown) """ code = base.opcodes['USE_INP'] - arg_format = ['int','int','int'] + arg_format = ['int','int','long'] @classmethod def get_usage(cls, args): @@ -423,7 +425,7 @@ class use_edabit(base.Instruction): :param: number (int, -1 for unknown) """ code = base.opcodes['USE_EDABIT'] - arg_format = ['int','int','int'] + arg_format = ['int','int','long'] @classmethod def get_usage(cls, args): @@ -439,7 +441,7 @@ class use_matmul(base.Instruction): :param: number (int, -1 for unknown) """ code = base.opcodes['USE_MATMUL'] - arg_format = ['int','int','int','int'] + arg_format = ['int','int','int','long'] @classmethod def get_usage(cls, args): @@ -488,7 +490,7 @@ class use_prep(base.Instruction): :param: number of items to use (int, -1 for unknown) """ code = base.opcodes['USE_PREP'] - arg_format = ['str','int'] + arg_format = ['str','long'] @classmethod def get_usage(cls, args): @@ -1873,6 +1875,20 @@ class floatoutput(base.PublicFileIOInstruction): code = base.opcodes['FLOATOUTPUT'] arg_format = ['p','c','c','c','c'] +@base.vectorize +class fixinput(base.PublicFileIOInstruction): + """ Binary fixed-point input. + + :param: player (int) + :param: destination (cint) + :param: exponent (int) + :param: input type (0: 64-bit integer, 1: float, 2: double) + + """ + __slots__ = [] + code = base.opcodes['FIXINPUT'] + arg_format = ['p','cw','int','int'] + @base.vectorize class rand(base.Instruction): """ Store insecure random value of specified length in clear integer diff --git a/Compiler/instructions_base.py b/Compiler/instructions_base.py index f811e47c8..b72079c76 100644 --- a/Compiler/instructions_base.py +++ b/Compiler/instructions_base.py @@ -209,6 +209,7 @@ CONDPRINTPLAIN = 0xE1, INTOUTPUT = 0xE6, FLOATOUTPUT = 0xE7, + FIXINPUT = 0xE8, GBITDEC = 0x18A, GBITCOM = 0x18B, # Secure socket @@ -226,8 +227,13 @@ def int_to_bytes(x): global_vector_size_stack = [] global_instruction_type_stack = ['modp'] +def check_vector_size(size): + if isinstance(size, program.curr_tape.Register): + raise CompilerError('vector size must be known at compile time') + def set_global_vector_size(size): stack = global_vector_size_stack + check_vector_size(size) if size == 1 and not stack: return stack.append(size) @@ -420,6 +426,7 @@ class MergeCISC(Mergeable): def __init__(self, *args, **kwargs): self.args = args self.kwargs = kwargs + self.security = program.security self.calls = [(args, kwargs)] self.params = [] self.used = [] @@ -443,7 +450,7 @@ def is_vec(self): def merge_id(self): return self.function, tuple(self.params), \ - tuple(sorted(self.kwargs.items())) + tuple(sorted(self.kwargs.items())), self.security def merge(self, other): self.calls += other.calls @@ -468,7 +475,10 @@ def new_instructions(self, size, regs): except: args.append(arg) program.options.cisc = False + old_security = program.security + program.security = self.security self.function(*args, **self.kwargs) + program.security = old_security program.options.cisc = True reset_global_vector_size() program.curr_tape = old_tape @@ -579,7 +589,7 @@ def wrapper(*args, **kwargs): same_sizes &= arg.size == args[0].size except: pass - if program.options.cisc and same_sizes: + if program.use_cisc() and same_sizes: return MergeCISC(*args, **kwargs) else: return function(*args, **kwargs) @@ -592,9 +602,9 @@ def instruction(res, *args, **kwargs): instruction = cisc(instruction) def wrapper(*args, **kwargs): - if not program.options.cisc: - return function(*args, **kwargs) from Compiler import types + if not (program.options.cisc and isinstance(args[0], types._register)): + return function(*args, **kwargs) if isinstance(args[0], types._clear): res_type = type(args[1]) else: @@ -671,7 +681,8 @@ def check(cls, arg): raise ArgumentError(arg, 'Invalid register argument') if arg.program != program.curr_tape: raise ArgumentError(arg, 'Register from other tape, trace: %s' % \ - util.format_trace(arg.caller)) + util.format_trace(arg.caller) + + '\nMaybe use MemValue') if arg.reg_type != cls.reg_type: raise ArgumentError(arg, "Wrong register type '%s', expected '%s'" % \ (arg.reg_type, cls.reg_type)) @@ -729,10 +740,10 @@ class LongArgFormat(IntArgFormat): @classmethod def encode(cls, arg): - return list(struct.pack('>Q', arg)) + return list(struct.pack('>q', arg)) def __init__(self, f): - self.i = struct.unpack('>Q', f.read(8))[0] + self.i = struct.unpack('>q', f.read(8))[0] class ImmediateModpAF(IntArgFormat): @classmethod @@ -1075,21 +1086,27 @@ class ClearImmediate(ImmediateBase): ### Memory access instructions ### -class DirectMemoryInstruction(Instruction): +class MemoryInstruction(Instruction): + __slots__ = ['_protect'] + def __init__(self, *args, **kwargs): + super(MemoryInstruction, self).__init__(*args, **kwargs) + self._protect = program._protect_memory + +class DirectMemoryInstruction(MemoryInstruction): __slots__ = [] def __init__(self, *args, **kwargs): super(DirectMemoryInstruction, self).__init__(*args, **kwargs) -class IndirectMemoryInstruction(Instruction): +class IndirectMemoryInstruction(MemoryInstruction): __slots__ = [] def get_direct(self, address): return self.direct(self.args[0], address, add_to_prog=False) -class ReadMemoryInstruction(Instruction): +class ReadMemoryInstruction(MemoryInstruction): __slots__ = [] -class WriteMemoryInstruction(Instruction): +class WriteMemoryInstruction(MemoryInstruction): __slots__ = [] class DirectMemoryWriteInstruction(DirectMemoryInstruction, \ diff --git a/Compiler/library.py b/Compiler/library.py index 80c43ca8f..7c0ac10c0 100644 --- a/Compiler/library.py +++ b/Compiler/library.py @@ -117,7 +117,12 @@ def print_ln(s='', *args): print_ln('a is %s.', a.reveal()) """ - print_str(s + '\n', *args) + print_str(str(s) + '\n', *args) + +def print_both(s, end='\n'): + """ Print line during compilation and execution. """ + print(s, end=end) + print_str(s + end) def print_ln_if(cond, ss, *args): """ Print line if :py:obj:`cond` is true. The further arguments @@ -486,6 +491,8 @@ def cond_swap(x,y): return b.cond_swap(y, x) def sort(a): + print("WARNING: you're using bubble sort") + res = a for i in range(len(a)): @@ -524,272 +531,23 @@ def odd_even_merge_sort(a): raise CompilerError('Length of list must be power of two') def chunky_odd_even_merge_sort(a): - tmp = a[0].Array(len(a)) - for i,j in enumerate(a): - tmp[i] = j - l = 1 - while l < len(a): - l *= 2 - k = 1 - while k < l: - k *= 2 - def round(): - for i in range(len(a)): - a[i] = tmp[i] - for i in range(len(a) // l): - for j in range(l // k): - base = i * l + j - step = l // k - if k == 2: - a[base], a[base+step] = cond_swap(a[base], a[base+step]) - else: - b = a[base:base+k*step:step] - for m in range(base + step, base + (k - 1) * step, 2 * step): - a[m], a[m+step] = cond_swap(a[m], a[m+step]) - for i in range(len(a)): - tmp[i] = a[i] - chunk = MPCThread(round, 'sort-%d-%d' % (l,k), single_thread=True) - chunk.start() - chunk.join() - #round() - for i in range(len(a)): - a[i] = tmp[i] + raise CompilerError( + 'This function has been removed, use loopy_odd_even_merge_sort instead') def chunkier_odd_even_merge_sort(a, n=None, max_chunk_size=512, n_threads=7, use_chunk_wraps=False): - if n is None: - n = len(a) - a_base = instructions.program.malloc(n, 's') - for i,j in enumerate(a): - store_in_mem(j, a_base + i) - else: - a_base = a - tmp_base = instructions.program.malloc(n, 's') - chunks = {} - threads = [] - - def run_threads(): - for thread in threads: - thread.start() - for thread in threads: - thread.join() - del threads[:] - - def run_chunk(size, base): - if size not in chunks: - def swap_list(list_base): - for i in range(size // 2): - base = list_base + 2 * i - x, y = cond_swap(sint.load_mem(base), - sint.load_mem(base + 1)) - store_in_mem(x, base) - store_in_mem(y, base + 1) - chunks[size] = FunctionTape(swap_list, 'sort-%d' % size) - return chunks[size](base) - - def run_round(size): - # minimize number of chunk sizes - n_chunks = int(math.ceil(1.0 * size / max_chunk_size)) - lower_size = size // n_chunks // 2 * 2 - n_lower_size = n_chunks - (size - n_chunks * lower_size) // 2 - # print len(to_swap) == lower_size * n_lower_size + \ - # (lower_size + 2) * (n_chunks - n_lower_size), \ - # len(to_swap), n_chunks, lower_size, n_lower_size - base = 0 - round_threads = [] - for i in range(n_lower_size): - round_threads.append(run_chunk(lower_size, tmp_base + base)) - base += lower_size - for i in range(n_chunks - n_lower_size): - round_threads.append(run_chunk(lower_size + 2, tmp_base + base)) - base += lower_size + 2 - run_threads_in_rounds(round_threads) - - postproc_chunks = [] - wrap_chunks = {} - post_threads = [] - pre_threads = [] - - def load_and_store(x, y, to_right): - if to_right: - store_in_mem(sint.load_mem(x), y) - else: - store_in_mem(sint.load_mem(y), x) - - def run_setup(k, a_addr, step, tmp_addr): - if k == 2: - def mem_op(preproc, a_addr, step, tmp_addr): - load_and_store(a_addr, tmp_addr, preproc) - load_and_store(a_addr + step, tmp_addr + 1, preproc) - res = 2 - else: - def mem_op(preproc, a_addr, step, tmp_addr): - instructions.program.curr_tape.merge_opens = False -# for i,m in enumerate(range(a_addr + step, a_addr + (k - 1) * step, step)): - for i in range(k - 2): - m = a_addr + step + i * step - load_and_store(m, tmp_addr + i, preproc) - res = k - 2 - if not use_chunk_wraps or k <= 4: - mem_op(True, a_addr, step, tmp_addr) - postproc_chunks.append((mem_op, (a_addr, step, tmp_addr))) - else: - if k not in wrap_chunks: - pre_chunk = FunctionTape(mem_op, 'pre-%d' % k, - compile_args=[True]) - post_chunk = FunctionTape(mem_op, 'post-%d' % k, - compile_args=[False]) - wrap_chunks[k] = (pre_chunk, post_chunk) - pre_chunk, post_chunk = wrap_chunks[k] - pre_threads.append(pre_chunk(a_addr, step, tmp_addr)) - post_threads.append(post_chunk(a_addr, step, tmp_addr)) - return res - - def run_threads_in_rounds(all_threads): - for thread in all_threads: - if len(threads) == n_threads: - run_threads() - threads.append(thread) - run_threads() - del all_threads[:] - - def run_postproc(): - run_threads_in_rounds(post_threads) - for chunk,args in postproc_chunks: - chunk(False, *args) - postproc_chunks[:] = [] - - l = 1 - while l < n: - l *= 2 - k = 1 - while k < l: - k *= 2 - size = 0 - instructions.program.curr_tape.merge_opens = False - for i in range(n // l): - for j in range(l // k): - base = i * l + j - step = l // k - size += run_setup(k, a_base + base, step, tmp_base + size) - run_threads_in_rounds(pre_threads) - run_round(size) - run_postproc() - - if isinstance(a, list): - for i in range(n): - a[i] = sint.load_mem(a_base + i) - instructions.program.free(a_base, 's') - instructions.program.free(tmp_base, 's') + raise CompilerError( + 'This function has been removed, use loopy_odd_even_merge_sort instead') def loopy_chunkier_odd_even_merge_sort(a, n=None, max_chunk_size=512, n_threads=7): - if n is None: - n = len(a) - a_base = instructions.program.malloc(n, 's') - for i,j in enumerate(a): - store_in_mem(j, a_base + i) - else: - a_base = a - tmp_base = instructions.program.malloc(n, 's') - tmp_i = instructions.program.malloc(1, 'ci') - chunks = {} - threads = [] - - def run_threads(): - for thread in threads: - thread.start() - for thread in threads: - thread.join() - del threads[:] - - def run_threads_in_rounds(all_threads): - for thread in all_threads: - if len(threads) == n_threads: - run_threads() - threads.append(thread) - run_threads() - del all_threads[:] - - def run_chunk(size, base): - if size not in chunks: - def swap_list(list_base): - for i in range(size // 2): - base = list_base + 2 * i - x, y = cond_swap(sint.load_mem(base), - sint.load_mem(base + 1)) - store_in_mem(x, base) - store_in_mem(y, base + 1) - chunks[size] = FunctionTape(swap_list, 'sort-%d' % size) - return chunks[size](base) - - def run_round(size): - # minimize number of chunk sizes - n_chunks = int(math.ceil(1.0 * size / max_chunk_size)) - lower_size = size // n_chunks // 2 * 2 - n_lower_size = n_chunks - (size - n_chunks * lower_size) // 2 - # print len(to_swap) == lower_size * n_lower_size + \ - # (lower_size + 2) * (n_chunks - n_lower_size), \ - # len(to_swap), n_chunks, lower_size, n_lower_size - base = 0 - round_threads = [] - for i in range(n_lower_size): - round_threads.append(run_chunk(lower_size, tmp_base + base)) - base += lower_size - for i in range(n_chunks - n_lower_size): - round_threads.append(run_chunk(lower_size + 2, tmp_base + base)) - base += lower_size + 2 - run_threads_in_rounds(round_threads) - - l = 1 - while l < n: - l *= 2 - k = 1 - while k < l: - k *= 2 - def load_and_store(x, y): - if to_tmp: - store_in_mem(sint.load_mem(x), y) - else: - store_in_mem(sint.load_mem(y), x) - def outer(i): - def inner(j): - base = j + a_base + i * l - step = l // k - if k == 2: - tmp_addr = regint.load_mem(tmp_i) - load_and_store(base, tmp_addr) - load_and_store(base + step, tmp_addr + 1) - store_in_mem(tmp_addr + 2, tmp_i) - else: - def inner2(m): - m += base - tmp_addr = regint.load_mem(tmp_i) - load_and_store(m, tmp_addr) - store_in_mem(tmp_addr + 1, tmp_i) - range_loop(inner2, step, (k - 1) * step, step) - range_loop(inner, l // k) - instructions.program.curr_tape.merge_opens = False - to_tmp = True - store_in_mem(tmp_base, tmp_i) - range_loop(outer, n // l) - if k == 2: - run_round(n) - else: - run_round(n // k * (k - 2)) - instructions.program.curr_tape.merge_opens = False - to_tmp = False - store_in_mem(tmp_base, tmp_i) - range_loop(outer, n // l) - - if isinstance(a, list): - for i in range(n): - a[i] = sint.load_mem(a_base + i) - instructions.program.free(a_base, 's') - instructions.program.free(tmp_base, 's') - instructions.program.free(tmp_i, 'ci') + raise CompilerError( + 'This function has been removed, use loopy_odd_even_merge_sort instead') def loopy_odd_even_merge_sort(a, sorted_length=1, n_parallel=32, n_threads=None): + a_in = a + if isinstance(a_in, list): + a = Array.create_from(a) steps = {} l = sorted_length while l < len(a): @@ -833,8 +591,14 @@ def f(i): swap(m2, step) steps[key] = step steps[key](l) + if isinstance(a_in, list): + a_in[:] = list(a) def mergesort(A): + if not get_program().options.insecure: + raise CompilerError('mergesort reveals the order of elements, ' + 'use --insecure to activate it') + B = Array(len(A), sint) def merge(i_left, i_right, i_end): @@ -901,16 +665,16 @@ def for_range(start, stop=None, step=None): :param start/stop/step: regint/cint/int - Example: - - .. code:: + The following should output 10:: + n = 10 a = sint.Array(n) x = sint(0) @for_range(n) def _(i): a[i] = i x.update(x + 1) + print_ln('%s', x.reveal()) Note that you cannot overwrite data structures such as :py:class:`~Compiler.types.Array` in a loop. Use @@ -924,11 +688,13 @@ def decorator(loop_body): def for_range_parallel(n_parallel, n_loops): """ Decorator to execute a loop :py:obj:`n_loops` up to - :py:obj:`n_parallel` loop bodies in parallel. + :py:obj:`n_parallel` loop bodies with optimized communication in a + single thread. + In most cases, it is easier to use :py:func:`for_range_opt`. Using any other control flow instruction inside the loop breaks the optimization. - :param n_parallel: compile-time (int) + :param n_parallel: optimization parameter (int) :param n_loops: regint/cint/int or list of int Example: @@ -1084,7 +850,7 @@ def exit_elimination(block): del blocks[-n_to_merge + 1:] del get_tape().req_node.children[-1] merged.children = [] - RegintOptimizer().run(merged.instructions) + RegintOptimizer().run(merged.instructions, get_program()) get_tape().active_basicblock = merged else: req_node = get_tape().req_node.children[-1].nodes[0] @@ -1151,6 +917,15 @@ def _(i): @for_range_opt_multithread(2, [5, 3]) def f(i, j): ... + + Note that you cannot use registers across threads. Use + :py:class:`MemValue` instead:: + + a = MemValue(sint(0)) + @for_range_opt_multithread(8, 80) + def _(i): + b = a + 1 + """ return for_range_multithread(n_threads, None, n_loops) @@ -1179,6 +954,7 @@ def f(base, size): return map_reduce(n_threads, None, n_items, initializer=lambda: [], reducer=None, looping=False) else: + max_size = max(1, max_size) def wrapper(function): @multithread(n_threads, n_items) def new_function(base, size): @@ -1419,57 +1195,50 @@ def f(i): return f return decorator -def while_loop(loop_body, condition, arg, g=None): +def while_loop(loop_body, condition, arg=None, g=None): if not callable(condition): raise CompilerError('Condition must be callable') - # store arg in stack - pre_condition = condition(arg) - if not isinstance(pre_condition, (bool,int)) or pre_condition: + if arg is None: + pre_condition = condition() + else: + pre_condition = condition(arg) arg = regint(arg) - def loop_fn(): - result = loop_body(arg) + cond = condition + condition = lambda: cond(arg) + tmp = loop_body + def loop_body(): + result = tmp(arg) if isinstance(result, MemValue): result = result.read() result.link(arg) - cont = condition(result) - return cont + if not isinstance(pre_condition, (bool,int)) or pre_condition: + def loop_fn(): + loop_body() + return condition() if_statement(pre_condition, lambda: do_while(loop_fn, g=g)) def while_do(condition, *args): - """ While-do loop. The decorator requires an initialization, and - the loop body function must return a suitable input for - :py:obj:`condition`. + """ While-do loop. :param condition: function returning public integer (regint/cint/int) - :param args: arguments given to :py:obj:`condition` and loop body The following executes an ten-fold loop: .. code:: - @while_do(lambda x: x < 10, regint(0)) - def f(i): + i = regint(0) + @while_do(lambda: i < 10) + def f(): ... - return i + 1 + i.update(i + 1) + ... + """ def decorator(loop_body): while_loop(loop_body, condition, *args) return loop_body return decorator -def do_loop(condition, loop_fn): - # store initial condition to stack - pushint(condition if isinstance(condition,regint) else regint(condition)) - def wrapped_loop(): - # save condition to stack - new_cond = regint.pop() - # run the loop - condition = loop_fn(new_cond) - pushint(condition) - return condition - do_while(wrapped_loop) - regint.pop() - def _run_and_link(function, g=None): if g is None: g = function.__globals__ diff --git a/Compiler/ml.py b/Compiler/ml.py index c667e1d64..f5c9a9eba 100644 --- a/Compiler/ml.py +++ b/Compiler/ml.py @@ -216,9 +216,13 @@ def __getitem__(self, *args): self.alloc() return super(Tensor, self).__getitem__(*args) - def assign_vector(self, *args): + def assign_all(self, *args): self.alloc() - return super(Tensor, self).assign_vector(*args) + return super(Tensor, self).assign_all(*args) + + def assign_vector(self, *args, **kwargs): + self.alloc() + return super(Tensor, self).assign_vector(*args, **kwargs) def assign_vector_by_indices(self, *args): self.alloc() @@ -261,14 +265,15 @@ def forward(self, batch=None, training=None): self._forward(batch) def __str__(self): - return type(self).__name__ + str(self._Y.sizes) + return type(self).__name__ + str(self._Y.shape) def __repr__(self): - return '%s(%s)' % (type(self).__name__, self.Y.sizes) + return '%s(%s)' % (type(self).__name__, self.Y.shape) class NoVariableLayer(Layer): input_from = lambda *args, **kwargs: None output_weights = lambda *args: None + reveal_parameters_to_binary = lambda *args, **kwargs: None nablas = lambda self: () reset = lambda self: None @@ -300,7 +305,8 @@ def __init__(self, N, debug=False, approx=False): self.compute_loss = True self.d_out = 1 - def divisor(self, divisor, size): + @staticmethod + def divisor(divisor, size=1): return cfix(1.0 / divisor, size=size) def _forward(self, batch): @@ -325,7 +331,8 @@ def _(base, size): self.divisor(N, 1)) def eval(self, size, base=0, top=False): - assert not top + if top: + return self.X.get_vector(base, size) > 0 if self.approx: return approx_sigmoid(self.X.get_vector(base, size), self.approx) else: @@ -383,6 +390,36 @@ def _(i): i, truth, guess, b, nabla) return n_correct +class LinearOutput(NoVariableLayer): + n_outputs = -1 + + def __init__(self, N): + self.X = sfix.Array(N) + self.Y = sfix.Array(N) + self.nabla_X = sfix.Array(N) + self.l = MemValue(sfix(0)) + + def _forward(self, batch): + N = len(batch) + guess = self.X.get_vector(0, N) + truth = self.Y.get(batch.get_vector(0, N)) + diff = guess - truth + self.nabla_X.assign_vector(diff) + #print_ln('%s %s %s', diff.reveal(), truth.reveal(), guess.reveal()) + self.l.write(sum((diff) ** 2) * Output.divisor(N)) + + def backward(self, batch): + pass + + def reveal_correctness(*args): + return 0 + + def average_loss(self, N): + return self.l.reveal() + + def eval(self, size, base=0, top=False): + return self.X.get_vector(base, size) + class MultiOutputBase(NoVariableLayer): def __init__(self, N, d_out, approx=False, debug=False): self.X = sfix.Matrix(N, d_out) @@ -621,6 +658,25 @@ def output_weights(self): self.W.print_reveal_nested() print_ln('%s', self.b.reveal_nested()) + def reveal_parameters_to_binary(self, reshape=None): + if reshape: + trans = self.W.transpose() + O = trans.sizes[0] + tmp = MultiArray([O] + reshape, + value_type=self.W.value_type, + address=trans.address) + X, Y, C = reshape + @for_range(O) + def _(i): + @for_range(C) + def _(j): + part = tmp.get_vector_by_indices(i, None, None, j) + part.reveal().binary_output() + else: + self.W.transpose().reveal_to_binary_output() + if self.input_bias: + self.b.reveal_to_binary_output() + def backward_params(self, f_schur_Y, batch): N = len(batch) tmp = Matrix(self.d_in, self.d_out, unreduced_sfix) @@ -726,14 +782,14 @@ def __init__(self, N, d_in, d_out, d=1, activation='id', debug=False): self.d = d self.activation = activation - self.X = MultiArray([N, d, d_in], sfix) - self.Y = MultiArray([N, d, d_out], sfix) + self.X = Tensor([N, d, d_in], sfix) + self.Y = Tensor([N, d, d_out], sfix) self.W = Tensor([d_in, d_out], sfix) self.b = sfix.Array(d_out) back_N = min(N, self.back_batch_size) - self.nabla_Y = MultiArray([back_N, d, d_out], sfix) - self.nabla_X = MultiArray([back_N, d, d_in], sfix) + self.nabla_Y = Tensor([back_N, d, d_out], sfix) + self.nabla_X = Tensor([back_N, d, d_in], sfix) self.nabla_W = sfix.Matrix(d_in, d_out) self.nabla_b = sfix.Array(d_out) @@ -757,7 +813,7 @@ def reset(self): d_out = self.d_out r = math.sqrt(6.0 / (d_in + d_out)) print('Initializing dense weights in [%f,%f]' % (-r, r)) - self.W.randomize(-r, r) + self.W.randomize(-r, r, n_threads=self.n_threads) self.b.assign_all(0) def input_from(self, player, raw=False): @@ -841,6 +897,7 @@ def backward(self, compute_nabla_X=True, batch=None): f_schur_Y = nabla_Y if compute_nabla_X: + nabla_X.alloc() @multithread(self.n_threads, N) def _(base, size): B = sfix.Matrix(N, d_out, address=f_schur_Y.address) @@ -875,8 +932,8 @@ def __init__(self, N, d_in, d_out): self.b = sfix.Array(d_out) self.nabla_b = self.b.same_shape() - self.X = MultiArray([N, 1, d_in], sfix) - self.Y = MultiArray([N, 1, d_out], sfix) + self.X = Tensor([N, 1, d_in], sfix) + self.Y = Tensor([N, 1, d_out], sfix) self.nabla_Y = self.Y.same_shape() def reset(self): @@ -920,10 +977,10 @@ def __init__(self, N, d1, d2=1, alpha=0.5): self.N = N self.d1 = d1 self.d2 = d2 - self.X = MultiArray([N, d1, d2], sfix) - self.Y = MultiArray([N, d1, d2], sfix) - self.nabla_Y = MultiArray([N, d1, d2], sfix) - self.nabla_X = MultiArray([N, d1, d2], sfix) + self.X = Tensor([N, d1, d2], sfix) + self.Y = Tensor([N, d1, d2], sfix) + self.nabla_Y = Tensor([N, d1, d2], sfix) + self.nabla_X = Tensor([N, d1, d2], sfix) self.alpha = alpha self.B = MultiArray([N, d1, d2], sint) @@ -1070,8 +1127,15 @@ def __init__(self, shape, strides=(1, 2, 2, 1), ksize=(1, 2, 2, 1), self.X = Tensor(shape, sfix) if padding == 'SAME': output_shape = [int(math.ceil(shape[i] / strides[i])) for i in range(4)] + padding = [0, 0] else: - output_shape = [(shape[i] - ksize[i]) // strides[i] + 1 for i in range(4)] + if padding == 'VALID': + padding = 0 + if isinstance(padding, int): + padding = [padding, padding] + output_shape = [shape[0]] + [ + (shape[i + 1] + 2 * padding[i] - ksize[i + 1]) // \ + strides [i + 1] + 1 for i in range(2)] + [shape[3]] self.Y = Tensor(output_shape, sfix) self.strides = strides self.ksize = ksize @@ -1108,48 +1172,53 @@ def backward(self, compute_nabla_X=True, batch=None): if compute_nabla_X: self.nabla_X.alloc() self.nabla_X.assign_all(0) + break_point() def process(pool, bi, k, i, j): for (x, h_in, w_in, h, w), c \ in zip(pool, self.comparisons[bi][k][i][j]): hh = h * h_in ww = w * w_in res = h_in * w_in * c * self.nabla_Y[bi][i][j][k] + get_program().protect_memory(True) self.nabla_X[bi][hh][ww][k] += res + get_program().protect_memory(False) self.traverse(batch, process) def traverse(self, batch, process): need_padding = [self.strides[i] * (self.Y.sizes[i] - 1) + self.ksize[i] > self.X.sizes[i] for i in range(4)] - overlap = reduce(operator.or_, - (x < y for x, y in zip(self.strides, self.ksize))) @for_range_opt_multithread(self.n_threads, [len(batch), self.X.sizes[3]]) def _(l, k): bi = batch[l] + XX = self.X[bi] @for_range_opt(self.Y.sizes[1]) def _(i): - h_base = self.strides[1] * i + h_base = self.strides[1] * i - self.padding[1] + hs = [h_base + jj for jj in range(self.ksize[1])] + if need_padding[1]: + h_ins = [(h < self.X.sizes[1]) * (h >= 0) for h in hs] + else: + h_ins = [True] * self.ksize[1] @for_range_opt(self.Y.sizes[2]) def _(j): - if overlap: - break_point() - w_base = self.strides[2] * j + w_base = self.strides[2] * j - self.padding[1] pool = [] + ws = [w_base + jj for jj in range(self.ksize[2])] + if need_padding[2]: + w_ins = [(w < self.X.sizes[2]) * (w >= 0) for w in ws] + else: + w_ins = [True] * self.ksize[2] for ii in range(self.ksize[1]): - h = h_base + ii - if need_padding[1]: - h_in = h < self.X.sizes[1] - else: - h_in = True + h = hs[ii] + h_in = h_ins[ii] + XXX = XX[h_in * h] for jj in range(self.ksize[2]): - w = w_base + jj - if need_padding[2]: - w_in = w < self.X.sizes[2] - else: - w_in = True + w = ws[jj] + w_in = w_ins[jj] if not is_zero(h_in * w_in): - pool.append([h_in * w_in * self.X[bi][h_in * h] - [w_in * w][k], h_in, w_in, h, w]) + pool.append([h_in * w_in * XXX[w_in * w][k], + h_in, w_in, h, w]) process(pool, bi, k, i, j) @@ -1160,7 +1229,7 @@ class Argmax(NoVariableLayer): """ def __init__(self, shape): assert len(shape) == 2 - self.X = MultiArray(shape, sfix) + self.X = Tensor(shape, sfix) self.Y = Array(shape[0], sint) def _forward(self, batch=[0]): @@ -1270,7 +1339,7 @@ def __init__(self, shape, approx=True, args=None): self.var, self.mu, self.weights, self.bias = arrays arrays = (sfix.Array(shape[2]) for i in range(4)) self.mu_hat, self.var_hat, self.nabla_weights, self.nabla_bias = arrays - self.epsilon = 2 ** (-sfix.f + 1) + self.epsilon = 2 ** (-sfix.f * 2 // 3 + 1) self.momentum = 0.1 if args != None: approx = 'precisebn' not in args @@ -1449,8 +1518,8 @@ def __init__(self, input_shape, output_shape, inputs=None): for x in back_shapes: x[0] = min(x[0], self.back_batch_size) - self.nabla_X = MultiArray(back_shapes[0], self.input_squant) - self.nabla_Y = MultiArray(back_shapes[1], self.output_squant) + self.nabla_X = Tensor(back_shapes[0], self.input_squant) + self.nabla_Y = Tensor(back_shapes[1], self.output_squant) self.inputs = inputs def temp_shape(self): @@ -1544,6 +1613,18 @@ def output_weights(self): self.weights.print_reveal_nested() print_ln('%s', self.bias.reveal_nested()) + def reveal_parameters_to_binary(self): + assert not self.tf_weight_format + n_filters = self.weights.shape[0] + n_channels = self.weights.shape[3] + @for_range(n_filters) + def _(i): + @for_range(n_channels) + def _(j): + part = self.weights.get_vector_by_indices(i, None, None, j) + part.reveal().binary_output() + self.bias.reveal_to_binary_output() + def dot_product(self, iv, wv, out_y, out_x, out_c): bias = self.bias[out_c] acc = self.output_squant.unreduced_dot_product(iv, wv) @@ -1704,11 +1785,10 @@ class FixConv2d(Conv2d, FixBase): def reset(self): assert not self.tf_weight_format - kernel_size = self.weight_shape[1] * self.weight_shape[2] - r = math.sqrt(6.0 / (kernel_size * sum(self.weight_shape[::3]))) + n_in = reduce(operator.mul, self.weight_shape[1:]) + r = math.sqrt(6.0 / (n_in + self.weight_shape[0])) print('Initializing convolution weights in [%f,%f]' % (-r, r)) - self.weights.assign_vector( - sfix.get_random(-r, r, size=self.weights.total_size())) + self.weights.randomize(-r, r, n_threads=self.n_threads) self.bias.assign_all(0) def backward(self, compute_nabla_X=True, batch=None): @@ -1944,6 +2024,51 @@ def _(out_y, out_x, c): acc = self.const_div(acc, n) self.Y[0][out_y][out_x][c] = self.output_squant._new(acc) +def easyConv2d(input_shape, batch_size, out_channels, kernel_size, stride=1, + padding=0): + """ More convenient interface to :py:class:`FixConv2d`. + + :param input_shape: input shape (tuple/list of four int) + :param out_channels: output channels (int) + :param kernel_size: kernel size (int or tuple/list of two int) + :param stride: stride (int or tuple/list of two int) + :param padding: :py:obj:`'SAME'`, :py:obj:`'VALID'`, int, or tuple/list of two int + + """ + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size) + if isinstance(stride, int): + stride = (stride, stride) + weight_shape = [out_channels] + list(kernel_size) + [input_shape[-1]] + output_shape = [batch_size] + list( + apply_padding(input_shape[1:3], kernel_size, stride, padding)) + \ + [out_channels] + padding = padding.upper() if isinstance(padding, str) \ + else padding + return FixConv2d(input_shape, weight_shape, (out_channels,), output_shape, + stride, padding) + +def easyMaxPool(input_shape, kernel_size, stride=None, padding=0): + """ More convenient interface to :py:class:`MaxPool`. + + :param input_shape: input shape (tuple/list of four int) + :param kernel_size: kernel size (int or tuple/list of two int) + :param stride: stride (int or tuple/list of two int) + :param padding: :py:obj:`'SAME'`, :py:obj:`'VALID'`, int, + or tuple/list of two int + + """ + if isinstance(kernel_size, int): + kernel_size = (kernel_size, kernel_size) + if isinstance(stride, int): + stride = (stride, stride) + if stride == None: + stride = kernel_size + padding = padding.upper() if isinstance(padding, str) \ + else padding + return MaxPool(input_shape, [1] + list(stride) + [1], + [1] + list(kernel_size) + [1], padding) + class QuantAveragePool2d(QuantBase, AveragePool2d): def input_params_from(self, player): print('WARNING: assuming that input and output quantization parameters are the same') @@ -1997,9 +2122,15 @@ class Optimizer: """ Base class for graphs of layers. """ n_threads = Layer.n_threads always_shuffle = True + shuffle = True time_layers = False revealing_correctness = False early_division = False + output_diff = False + output_grad = False + output_stats = False + print_accuracy = True + time_training = True @staticmethod def from_args(program, layers): @@ -2007,14 +2138,19 @@ def from_args(program, layers): res = Adam(layers, 1, approx='adamapprox' in program.args) elif 'amsgrad' in program.args: res = Adam(layers, approx=True, amsgrad=True) + elif 'amsgradprec' in program.args: + res = Adam(layers, approx=False, amsgrad=True) elif 'quotient' in program.args: res = Adam(layers, approx=True, amsgrad=True, normalize=True) else: res = SGD(layers, 1) res.early_division = 'early_div' in program.args + res.output_diff = 'output_diff' in program.args + res.output_grad = 'output_grad' in program.args + res.output_stats = 'output_stats' in program.args return res - def __init__(self, report_loss=None): + def __init__(self, layers=[], report_loss=None): if get_program().options.binary: raise CompilerError( 'machine learning code not compatible with binary circuits') @@ -2028,6 +2164,7 @@ def __init__(self, report_loss=None): self.i_epoch = MemValue(0) self.stopped_on_loss = MemValue(0) self.stopped_on_low_loss = MemValue(0) + self.layers = layers @property def layers(self): @@ -2054,6 +2191,10 @@ def set_layers_with_inputs(self, layers): layer.last_used = list(filter(lambda x: x not in used, layer.inputs)) used.update(layer.inputs) + def set_learning_rate(self, lr): + print('Setting learning rate to', lr) + self.gamma = MemValue(cfix(lr)) + def reset(self): """ Initialize weights. """ for layer in self.layers: @@ -2151,6 +2292,7 @@ def backward(self, batch): layer.backward(compute_nabla_X=False, batch=self.batch_for(layer, batch)) else: + layer.nabla_X.alloc() layer.backward(batch=self.batch_for(layer, batch)) if len(layer.inputs) == 1: layer.inputs[0].nabla_Y.address = \ @@ -2161,6 +2303,92 @@ def backward(self, batch): if self.time_layers: stop_timer(200 + i) + @classmethod + def stat(cls, name, tensor): + zero, neg, small = (cint.Array(cls.n_threads) for i in range(3)) + s, mx, mn = (cfix.Array(cls.n_threads) for i in range(3)) + for x in zero, neg, small, s, mx, mn: + x.assign_all(0) + total = tensor.total_size() + @multithread(cls.n_threads, total) + def _(base, size): + tn = get_thread_number() - 1 + tmp = Array.create_from( + tensor.get_vector(base, size).reveal()) + @for_range_opt(size, budget=1000) + def _(i): + zero[tn] += tmp[i] == 0 + neg[tn] += tmp[i] < 0 + small[tn] += abs(tmp[i]) < 2 ** (-tmp[i].f / 2) + s[tn] += tmp[i] + mx[tn] = util.max(mx[tn], tmp[i]) + mn[tn] = util.min(mn[tn], tmp[i]) + tmp.delete() + print_str( + ' %s 0:%s/%s, <0:%s/%s, >0:%s/%s, ~0:%s/%s sum:%s max:%s min:%s ', + name, sum(zero), total, sum(neg), total, + total - sum(zero) - sum(neg), total, + sum(small) - sum(zero), total, sum(s), util.max(mx), util.min(mn)) + if len(tensor.shape) == 4: + corners = sum(([tensor[0][i][j][0] for j in (0, -1)] + for i in (0, -1)), []) + elif len(tensor.shape) == 1: + x = tensor.to_array() + corners = [x[i] for i in (0, len(x) // 2 - 1, -1)] + else: + x = tensor[0].to_array() + corners = [x[i] for i in (0, len(x) // 2 - 1, -1)] + print_ln('corners:%s shape:%s', util.reveal(corners), tensor.shape) + + def update(self, i_epoch, i_batch, batch): + if self.output_grad: + @if_(i_batch % 100 == 0) + def _(): + for layer in self.layers[:-1]: + cfix(10000).binary_output() + break_point() + layer.nabla_Y.get_vector(size=2000).reveal().binary_output() + break_point() + for theta, nabla in zip(layer.thetas(), layer.nablas()): + cfix(5000).binary_output() + break_point() + nabla.get_vector().reveal().binary_output() + break_point() + if self.output_stats: + old_params = [] + @if_((i_batch % self.output_stats == 0).bit_or(i_epoch == 0)) + def _(): + for i, layer in enumerate(self.layers[:-1]): + print_ln(layer) + if layer == self.layers[0]: + x = Array.create_from(layer.X.get_slice_vector(batch)) + self.stat(' 0 X', x) + else: + self.stat(' %d X' % i, layer.X) + self.stat(' %d Y' % i, layer.Y) + self.stat(' %d nabla_Y' % i, layer.nabla_Y) + for nabla in layer.nablas(): + self.stat(' %d grad' % i, nabla) + for theta in layer.thetas(): + self.stat(' %d param' % i, theta) + if theta.total_size() < 1000: + old_params.append(theta.get_vector()) + if self.time_layers: + start_timer(1000) + self._update(i_epoch, MemValue(i_batch), batch) + if self.time_layers: + stop_timer(1000) + if self.output_stats: + @if_(i_batch % self.output_stats == 0) + def _(): + for i, layer in enumerate(self.layers[:-1]): + for theta in layer.thetas(): + if theta.total_size() < 1000: + print_ln(layer) + self.stat(' %d diff' % i, Array.create_from( + theta.get_vector() - old_params[0])) + del old_params[0] + @_no_mem_warnings def run(self, batch_size=None, stop_on_loss=0): """ Run training. @@ -2197,7 +2425,7 @@ def _(_): indices.assign_vector( regint.get_random(int(math.log2(len(X))), size=missing), base=len(X)) - if self.always_shuffle or n_per_epoch > 1: + if self.shuffle and (self.always_shuffle or n_per_epoch > 1): indices.shuffle() loss_sum = MemValue(sfix(0)) self.n_correct.write(0) @@ -2212,11 +2440,7 @@ def _(j): label * n) self.forward(batch=batch, training=True) self.backward(batch=batch) - if self.time_layers: - start_timer(1000) - self.update(i, batch=batch) - if self.time_layers: - stop_timer(1000) + self.update(i, j, batch=batch) loss_sum.iadd(self.layers[-1].l) if self.print_loss_reduction: before = self.layers[-1].average_loss(N) @@ -2241,12 +2465,19 @@ def _(j): return res if self.print_losses: print_ln() + self.missing_newline = False if self.report_loss and self.layers[-1].compute_loss and self.layers[-1].approx != 5: print_ln('loss in epoch %s: %s', i, (loss_sum.reveal() * cfix(1 / n_per_epoch))) else: - print_ln('done with epoch %s', i) - time() + print_str('done with epoch %s', i) + if self.time_training or self.print_losses: + print_ln() + else: + print_str('\r') + self.missing_newline = True + if self.time_training: + time() i.iadd(1) res = True if self.tol > 0: @@ -2255,7 +2486,15 @@ def _(j): self.stopped_on_low_loss.write(1 - res) return res - def reveal_correctness(self, data, truth, batch_size): + def reveal_correctness(self, data, truth, batch_size=128, running=False): + """ Test correctness by revealing results. + + :param data: test sample data + :param truth: test labels + :param batch_size: batch size + :param running: output after every batch + + """ N = data.sizes[0] n_correct = MemValue(0) loss = MemValue(sfix(0)) @@ -2266,13 +2505,20 @@ def f(start, batch_size, batch): n_correct.iadd( self.layers[-1].reveal_correctness(batch_size, part_truth)) loss.iadd(self.layers[-1].l * batch_size) - self.run_in_batches(f, data, batch_size) + if running: + total = start + batch_size + print_str('\rpart acc: %s (%s/%s) ', + cfix(n_correct, k=63, f=31) / total, n_correct, total) + self.run_in_batches(f, data, batch_size, truth) + if running: + print_ln() loss = loss.reveal() if cfix.f < 31: loss = cfix._new(loss.v << (31 - cfix.f), k=63, f=31) return n_correct, loss / N def run_in_batches(self, f, data, batch_size, truth=None): + batch_size = min(batch_size, data.sizes[0]) training_data = self.layers[0].X.address training_truth = self.layers[-1].Y.address self.layers[0].X.address = data.address @@ -2287,30 +2533,35 @@ def _(i): batch_size = N % batch_size if batch_size: start = N - batch_size - f(start, batch_size, batch) + f(start, batch_size, regint.Array(batch_size)) self.layers[0].X.address = training_data self.layers[-1].Y.address = training_truth @_no_mem_warnings def run_by_args(self, program, n_runs, batch_size, test_X, test_Y, - acc_batch_size=None): + acc_batch_size=None, reset=True): if acc_batch_size is None: acc_batch_size = batch_size depreciation = None + if program is None: + class A: + pass + program = A() + program.args = [] for arg in program.args: m = re.match('rate(.*)', arg) if m: - self.gamma = MemValue(cfix(float(m.group(1)))) + self.set_learning_rate(float(m.group(1))) m = re.match('dep(.*)', arg) if m: depreciation = float(m.group(1)) if 'nomom' in program.args: self.momentum = 0 - self.print_losses = 'print_losses' in program.args + self.print_losses |= 'print_losses' in program.args self.print_random_update = 'print_random_update' in program.args Layer.print_random_update = self.print_random_update self.time_layers = 'time_layers' in program.args - self.revealing_correctness = not 'no_acc' in program.args + self.revealing_correctness &= not 'no_acc' in program.args self.layers[-1].compute_loss = not 'no_loss' in program.args if 'full_cisc' in program.args: program.options.keep_cisc = 'FPDiv,exp2_fx,log2_fx' @@ -2319,7 +2570,7 @@ def run_by_args(self, program, n_runs, batch_size, test_X, test_Y, if model_input: for layer in self.layers: layer.input_from(0) - else: + elif reset: self.reset() if 'one_iter' in program.args: print_float_prec(16) @@ -2351,34 +2602,42 @@ def _(i): @for_range(n_runs) def _(i): if not acc_first: - start_timer(1) + if self.time_training: + start_timer(1) self.run(batch_size, stop_on_loss=0 if 'no_loss' in program.args else 100) - stop_timer(1) + if self.time_training: + stop_timer(1) if 'no_acc' in program.args: return N = self.layers[0].X.sizes[0] n_trained = (N + batch_size - 1) // batch_size * batch_size - if not acc_first: + if not acc_first and self.print_accuracy and \ + self.revealing_correctness: print_ln('train_acc: %s (%s/%s)', cfix(self.n_correct, k=63, f=31) / n_trained, self.n_correct, n_trained) if test_X and test_Y: print('use test set') n_test = len(test_Y) - n_correct, loss = self.reveal_correctness(test_X, test_Y, - acc_batch_size) + n_correct, loss = self.reveal_correctness( + test_X, test_Y, acc_batch_size, + running='part_acc' in program.args) print_ln('test loss: %s', loss) - print_ln('acc: %s (%s/%s)', - cfix(n_correct, k=63, f=31) / n_test, - n_correct, n_test) + if self.print_accuracy: + print_ln('acc: %s (%s/%s)', + cfix(n_correct, k=63, f=31) / n_test, + n_correct, n_test) if acc_first: - start_timer(1) + if self.time_training: + start_timer(1) self.run(batch_size) - stop_timer(1) + if self.time_training: + stop_timer(1) else: - @if_(util.or_op(self.stopped_on_loss, n_correct < - int(n_test // self.layers[-1].n_outputs * 1.2))) + @if_(util.or_op(self.stopped_on_loss, (n_correct < + int(n_test // self.layers[-1].n_outputs * 1.2)) + if test_X and test_Y else 0)) def _(): self.gamma.imul(.5) if 'crash' in program.args: @@ -2392,9 +2651,36 @@ def _(): self.gamma.imul(depreciation) print_ln('reducing learning rate to %s', self.gamma) return 1 - self.stopped_on_low_loss + if self.missing_newline: + print_ln('') if 'model_output' in program.args: self.output_weights() + def fit(self, X, Y, epochs=1, batch_size=128, validation_data=(None, None), + program=None, reset=True, print_accuracy=False, print_loss=False): + """ Train model. + + :param X: training sample data (sfix tensor) + :param Y: training labels (sint/sfix tensor) + :param epochs: number of epochs (int) + :param batch_size: batch size (int) + :param validation_data: tuple of test sample data and labels for + accuracy testing (optional; reveals labels) + :param program: :py:class:`~Compile.program.Program` instance to use + command-line parameters (optional) + :param reset: whether to initialize model + :param print_accuracy: print accuracy on training data (reveals labels) + :param print_loss: reveal and print training loss after every batch + + """ + self.layers[0].X = X + self.layers[-1].Y = Y + self.revealing_correctness = print_accuracy + self.print_losses = print_loss + self.time_training = False + self.run_by_args(program, epochs, batch_size, *validation_data, + reset=reset) + def output_weights(self): print_float_precision(max(6, sfix.f // 3)) for layer in self.layers: @@ -2405,6 +2691,19 @@ def summary(self): print(sizes) print('Trainable params:', sum(sizes)) + @property + def trainable_variables(self): + return list(self.thetas) + + def reveal_model_to_binary(self): + input_shape = self.layers[0].X.shape + for layer in self.layers: + if len(input_shape) == 4 and isinstance(layer, DenseBase): + layer.reveal_parameters_to_binary(reshape=input_shape[1:]) + else: + layer.reveal_parameters_to_binary() + input_shape = layer.Y.shape + class Adam(Optimizer): """ Adam/AMSgrad optimizer. @@ -2414,7 +2713,8 @@ class Adam(Optimizer): """ def __init__(self, layers, n_epochs=1, approx=False, amsgrad=False, normalize=False): - self.gamma = MemValue(cfix(.001)) + super(Adam, self).__init__() + self.set_learning_rate(.001) self.beta1 = 0.9 self.beta2 = 0.999 self.beta1_power = MemValue(cfix(1)) @@ -2425,15 +2725,15 @@ def __init__(self, layers, n_epochs=1, approx=False, amsgrad=False, self.amsgrad = amsgrad self.normalize = normalize if amsgrad: - print_str('Using AMSgrad ') + print_both('Using AMSgrad ', end='') else: - print_str('Using Adam ') + print_both('Using Adam ', end='') if approx: - print_ln('with inverse square root approximation') + print_both('with inverse square root approximation') else: - print_ln('with more precise inverse square root') + print_both('with more precise inverse square root') if normalize: - print_ln('Normalize gradient') + print_both('Normalize gradient') self.layers = layers self.ms = [] @@ -2448,9 +2748,7 @@ def __init__(self, layers, n_epochs=1, approx=False, amsgrad=False, if amsgrad: self.vhats.append(nabla.same_shape()) - super(Adam, self).__init__() - - def update(self, i_epoch, batch): + def _update(self, i_epoch, i_batch, batch): self.beta1_power *= self.beta1 self.beta2_power *= self.beta2 m_factor = MemValue(1 / (1 - self.beta1_power)) @@ -2478,20 +2776,30 @@ def _(base, size): v_part = self.beta2 * v_part + (1 - self.beta2) * g_part ** 2 m.assign_vector(m_part, base) v.assign_vector(v_part, base) + mhat = m_part * m_factor.expand_to_vector(size) + vhat = v_part * v_factor.expand_to_vector(size) if self.amsgrad: - vhat = self.vhats [i_layer].get_vector(base, size) - vhat = util.max(vhat, v_part) + v_max = self.vhats [i_layer].get_vector(base, size) + vhat = util.max(vhat, v_max) self.vhats[i_layer].assign_vector(vhat, base) - diff = self.gamma.expand_to_vector(size) * m_part - else: - mhat = m_part * m_factor.expand_to_vector(size) - vhat = v_part * v_factor.expand_to_vector(size) - diff = self.gamma.expand_to_vector(size) * mhat + diff = self.gamma.expand_to_vector(size) * mhat if self.approx: diff *= mpc_math.InvertSqrt(vhat + self.epsilon ** 2) else: diff /= mpc_math.sqrt(vhat) + self.epsilon theta.assign_vector(theta.get_vector(base, size) - diff, base) + if self.output_diff: + @if_(i_batch % 100 == 0) + def _(): + diff.reveal().binary_output() + if self.output_stats and m.total_size() < 1000: + @if_(i_batch % self.output_stats == 0) + def _(): + self.stat('g', g) + self.stat('m', m) + self.stat('v', v) + self.stat('vhat', self.vhats[i_layer]) + self.stat('theta', theta) class SGD(Optimizer): """ Stochastic gradient descent. @@ -2500,7 +2808,8 @@ class SGD(Optimizer): :param n_epochs: number of epochs for training :param report_loss: disclose and print loss """ - def __init__(self, layers, n_epochs, debug=False, report_loss=None): + def __init__(self, layers, n_epochs=1, debug=False, report_loss=None): + super(SGD, self).__init__(report_loss=report_loss) self.momentum = 0.9 self.layers = layers self.n_epochs = n_epochs @@ -2510,9 +2819,9 @@ def __init__(self, layers, n_epochs, debug=False, report_loss=None): self.nablas.extend(layer.nablas()) for theta in layer.thetas(): self.delta_thetas.append(theta.same_shape()) - self.gamma = MemValue(cfix(0.01)) + self.set_learning_rate(0.01) self.debug = debug - super(SGD, self).__init__(report_loss) + print_both('Using SGD') @_no_mem_warnings def reset(self, X_by_label=None): @@ -2532,7 +2841,7 @@ def _(i): y.assign_all(0) super(SGD, self).reset() - def update(self, i_epoch, batch): + def _update(self, i_epoch, i_batch, batch): for nabla, theta, delta_theta in zip(self.nablas, self.thetas, self.delta_thetas): @multithread(self.n_threads, nabla.total_size()) @@ -2604,14 +2913,16 @@ def _(i): def apply_padding(input_shape, kernel_size, strides, padding): if isinstance(padding, int): - input_shape = [x + 2 * padding for x in input_shape] + padding = [padding, padding] + if isinstance(padding, (tuple, list)): + input_shape = [x + sum(padding) for x in input_shape] padding = 'valid' - if padding == 'valid': + if padding.lower() == 'valid': res = (input_shape[0] - kernel_size[0] + 1) // strides[0], \ (input_shape[1] - kernel_size[1] + 1) // strides[1], assert min(res) > 0, (input_shape, kernel_size, strides, padding) return res - elif padding == 'same': + elif padding.lower() == 'same': return (input_shape[0]) // strides[0], \ (input_shape[1]) // strides[1], else: @@ -2664,6 +2975,9 @@ def compile_by_args(self, program): self.optimizer = 'adam', [], {} elif 'amsgrad' in program.args: self.optimizer = 'adam', [], {'amsgrad': True} + elif 'amsgradprec' in program.args: + self.optimizer = 'adam', [], {'amsgrad': True, + 'approx': False} else: self.optimizer = 'sgd', [], {} @@ -2679,7 +2993,7 @@ def summary(self): def build(self, input_shape, batch_size=128): data_input_shape = input_shape if self.opt != None and \ - input_shape == self.opt.layers[0].X.sizes and \ + input_shape == self.opt.layers[0]._X.sizes and \ batch_size <= self.batch_size and \ type(self.opt).__name__.lower() == self.optimizer[0]: return @@ -2714,36 +3028,18 @@ def build(self, input_shape, batch_size=128): filters = layer[1]['filters'] strides = layer[1]['strides'] padding = layer[1]['padding'] - if isinstance(kernel_size, int): - kernel_size = (kernel_size, kernel_size) - if isinstance(strides, int): - strides = (strides, strides) - weight_shape = [filters] + list(kernel_size) + \ - [input_shape[-1]] - output_shape = [batch_size] + list( - apply_padding(input_shape[1:3], kernel_size, - strides, padding)) + [filters] - padding = padding.upper() if isinstance(padding, str) \ - else padding - layers.append(FixConv2d(input_shape, weight_shape, - (filters,), output_shape, - strides, padding)) + layers.append(easyConv2d( + input_shape, batch_size, filters, kernel_size, + strides, padding)) + output_shape = layers[-1].Y.sizes input_shape = output_shape print('conv output shape', output_shape) elif name == 'maxpool': pool_size = layer[1]['pool_size'] strides = layer[1]['strides'] padding = layer[1]['padding'] - if isinstance(pool_size, int): - pool_size = (pool_size, pool_size) - if isinstance(strides, int): - strides = (strides, strides) - if strides == None: - strides = pool_size - layers.append(MaxPool(input_shape, - [1] + list(strides) + [1], - [1] + list(pool_size) + [1], - padding)) + layers.append(easyMaxPool(input_shape, pool_size, + strides, padding)) input_shape = layers[-1].Y.sizes elif name == 'dropout': layers.append(Dropout(batch_size, reduce( @@ -2775,7 +3071,7 @@ def build(self, input_shape, batch_size=128): opt.momentum = momentum elif opt == 'adam': opt = Adam(layers, amsgrad=opts.pop('amsgrad', None), - approx=True) + approx=opts.pop('approx', True)) beta1 = opts.pop('beta_1', None) beta2 = opts.pop('beta_2', None) epsilon = opts.pop('epsilon', None) @@ -2795,7 +3091,7 @@ def build(self, input_shape, batch_size=128): raise Exception(opt + ' not supported') lr = opts.pop('learning_rate', None) if lr != None: - opt.gamma = MemValue(cfix(lr)) + opt.set_learning_rate(lr) if opts: raise Exception(opts + ' not supported') self.batch_size = batch_size @@ -2804,7 +3100,7 @@ def build(self, input_shape, batch_size=128): def fit(self, x, y, batch_size, epochs=1, validation_data=None): assert len(x) == len(y) self.build(x.sizes, batch_size) - if x.total_size() != self.opt.layers[0].X.total_size(): + if x.total_size() != self.opt.layers[0]._X.total_size(): raise Exception('sample data size mismatch') if y.total_size() != self.opt.layers[-1].Y.total_size(): print (y, self.opt.layers[-1].Y) @@ -2814,7 +3110,7 @@ def fit(self, x, y, batch_size, epochs=1, validation_data=None): else: if len(validation_data[0]) != len(validation_data[1]): raise Exception('test set size mismatch') - self.opt.layers[0].X.address = x.address + self.opt.layers[0]._X.address = x.address self.opt.layers[-1].Y.address = y.address self.opt.run_by_args(get_program(), epochs, batch_size, validation_data[0], validation_data[1], @@ -2828,6 +3124,195 @@ def predict(self, x, batch_size=None): batch_size = min(batch_size, self.batch_size) return self.opt.eval(x, batch_size=batch_size) +def layers_from_torch(sequence, data_input_shape, batch_size, input_via=None): + """ Convert a PyTorch Sequential object to MP-SPDZ layers. + + :param sequence: PyTorch Sequential object + :param data_input_shape: input shape (list of four int) + :param batch_size: batch size (int) + :param input_via: player to input model data via (default: don't) + + """ + layers = [] + + def mul(x): + return reduce(operator.mul, x) + + def process(item): + nonlocal input_shape + name = type(item).__name__ + if name == 'Sequential': + for x in item: + process(x) + elif name == 'Linear': + assert mul(input_shape[1:]) == item.in_features + assert item.bias is not None + layers.append(Dense(input_shape[0], item.in_features, + item.out_features)) + if input_via is not None: + shapes = [x.shape for x in (layers[-1].W, layers[-1].b)] + import numpy + swapped = item.weight.detach().numpy() + if len(input_shape) == 4: + print (swapped.shape) + swapped = numpy.reshape( + swapped, + [item.out_features, input_shape[3]] + input_shape[1:3]) + print (swapped.shape) + swapped = numpy.moveaxis(swapped, 1, -1) + print (swapped.shape) + swapped = numpy.reshape( + swapped, [item.out_features, item.in_features]) + print (swapped.shape) + swapped = numpy.swapaxes(swapped, 0, 1) + layers[-1].W = sfix.input_tensor_via( + input_via, swapped) + layers[-1].b = sfix.input_tensor_via( + input_via, item.bias.detach()) + assert layers[-1].W.shape == shapes[0] + assert layers[-1].b.shape == shapes[1] + input_shape = [batch_size, item.out_features] + elif name == 'Conv2d': + layers.append(easyConv2d(input_shape, batch_size, item.out_channels, + item.kernel_size, item.stride, + item.padding)) + input_shape = layers[-1].Y.shape + if input_via is not None: + shapes = [x.shape for x in + (layers[-1].weights, layers[-1].bias)] + import numpy + swapped = numpy.moveaxis( + numpy.array(item.weight.detach()), 1, -1) + layers[-1].weights = sfix.input_tensor_via(input_via, swapped) + layers[-1].bias = sfix.input_tensor_via( + input_via, item.bias.detach()) + assert layers[-1].weights.shape == shapes[0] + assert layers[-1].bias.shape == shapes[1] + elif name == 'MaxPool2d': + layers.append(easyMaxPool(input_shape, item.kernel_size, + item.stride, item.padding)) + input_shape = layers[-1].Y.shape + elif name == 'ReLU': + layers.append(Relu(input_shape)) + elif name == 'Flatten': + pass + elif name == 'BatchNorm2d': + layers.append(BatchNorm(layers[-1].Y.sizes)) + elif name == 'Dropout': + layers.append(Dropout(input_shape[0], mul(layers[-1].Y.sizes[1:]), + alpha=item.p)) + input_shape = layers[-1].Y.sizes + else: + raise CompilerError('unknown PyTorch module: ' + name) + + input_shape = data_input_shape + [1] * (4 - len(data_input_shape)) + process(sequence) + if layers[-1].d_out == 1: + layers.append(Output(data_input_shape[0])) + else: + layers.append(MultiOutput(data_input_shape[0], layers[-1].d_out)) + return layers + +class OneLayerSGD: + def __init__(self, n_epochs=1, batch_size=1, program=None): + self.n_epochs = n_epochs + self.batch_size = batch_size + self.program = program + + def fit(self, X_train, y_train): + """ Train classifier. + + :param X_train: training data (sfix matrix) + :param y_train: training binary labels (sint/sfix array) + + """ + self.init(X_train) + self.opt.fit(X_train, y_train, self.n_epochs, self.batch_size, + program=self.program, print_accuracy=False, + print_loss=False) + + def fit_with_testing(self, X_train, y_train, X_test, y_test): + """ Train classifier with accuracy output after every epoch. + This reveals all labels to simplify the accuracy computation. + + :param X_train: training data (sfix matrix) + :param y_train: training labels (sint/sfix array) + :param X_test: testing data (sfix matrix) + :param y_test: testing labels (sint/sfix array) + + """ + self.init(X_train) + self.opt.print_accuracy = self.print_accuracy + self.opt.fit(X_train, y_train, self.n_epochs, self.batch_size, + validation_data=(X_test, y_test), program=self.program, + print_accuracy=self.print_accuracy, print_loss=True) + + def predict(self, X): + """ Use model for prediction. + + :param X: sample data with row-wise samples (sfix matrix) + :returns: sfix array + + """ + return self.opt.eval(X) + +class SGDLogistic(OneLayerSGD): + """ Logistic regression using SGD. + + :param n_epochs: number of epochs + :param batch_size: batch size + :param program: program object to use command-line options from (default is + not to use any) + + """ + print_accuracy = True + + def init(self, X): + dense = Dense(*X.sizes, 1) + if self.program: + sigmoid = Output.from_args(X.sizes[0], self.program) + self.opt = Optimizer.from_args(self.program, [dense, sigmoid]) + else: + sigmoid = Output(X.sizes[0]) + self.opt = SGD([dense, sigmoid], 1) + + def predict(self, X): + """ Use model to predict labels. + + :param X: sample data with row-wise samples (sfix matrix) + :returns: sint array + + """ + return self.opt.eval(X, top=True) + + def predict_proba(self, X): + """ Use model for probility estimates. + + :param X: sample data with row-wise samples (sfix matrix) + :returns: sfix array + + """ + return super(SGDLogistic, self).predict(X) + +class SGDLinear(OneLayerSGD): + """ Logistic regression using SGD. + + :param n_epochs: number of epochs + :param batch_size: batch size + :param program: program object to use command-line options from (default is + not to use any) + + """ + print_accuracy = False + + def init(self, X): + dense = Dense(*X.sizes, 1) + output = LinearOutput(X.sizes[0]) + if self.program: + self.opt = Optimizer.from_args(self.program, [dense, output]) + else: + self.opt = SGD([dense, output], 1) + def solve_linear(A, b, n_iterations, progress=False, n_threads=None, stop=False, already_symmetric=False, precond=False): """ Iterative linear solution approximation for :math:`Ax=b`. @@ -2867,6 +3352,8 @@ def _(i): vr.reveal(), v_norm.reveal()) if stop: return (alpha > 0).reveal() + if not already_symmetric: + AtA.delete() return x def solve_linear_diag_precond(A, b, x, r, n_iterations, progress=False, @@ -2926,3 +3413,26 @@ def _(i): def _(i): res.iadd((x[i] - mean.read()) ** 2) return res.read() + +def cholesky(A, reveal_diagonal=False): + """ Cholesky decomposition. """ + assert len(A.shape) == 2 + assert A.shape[0] == A.shape[1] + L = A.same_shape() + L.assign_all(0) + @for_range(A.shape[0]) + def _(i): + @for_range(i + 1) + def _(j): + sum = sfix.dot_product(L[i], L[j]) + + @if_e(i == j) + def _(): + L[i][j] = mpc_math.sqrt(A[i][i] - sum) + if reveal_diagonal: + print_ln('L[%s][%s] = %s = sqrt(%s - %s)', i, j, + L[i][j].reveal(), A[i][j].reveal(), sum.reveal()) + @else_ + def _(): + L[i][j] = (1.0 / L[j][j] * (A[i][j] - sum)) + return L diff --git a/Compiler/mpc_math.py b/Compiler/mpc_math.py index 8f09bd776..39e31fc6f 100644 --- a/Compiler/mpc_math.py +++ b/Compiler/mpc_math.py @@ -916,7 +916,7 @@ def SqrtComp(z, old=False): k = len(z) if isinstance(z[0], types.sint): return types.sfix._new(sum(z[i] * types.cfix( - 2 ** (-(i - f + 1) / 2)).v for i in range(k))) + 2 ** (-(i - f + 1) / 2), k=k, f=f).v for i in range(k))) k_prime = k // 2 f_prime = f // 2 c1 = types.sfix(2 ** ((f + 1) / 2 + 1)) diff --git a/Compiler/program.py b/Compiler/program.py index dfe08f87f..dfe6a5daf 100644 --- a/Compiler/program.py +++ b/Compiler/program.py @@ -10,6 +10,7 @@ import os import re import sys +import hashlib from collections import defaultdict, deque from functools import reduce @@ -49,12 +50,12 @@ class defaults: garbled = False prime = None galois = 40 - budget = 100000 + budget = 1000 mixed = False edabit = False invperm = False split = None - cisc = False + cisc = True comparison = None merge_opens = True preserve_mem_order = False @@ -126,7 +127,13 @@ def __init__(self, args, options=defaults, name=None): self.n_threads = 1 self.public_input_file = None self.types = {} - self.budget = int(self.options.budget) + if self.options.budget: + self.budget = int(self.options.budget) + else: + if self.options.optimize_hard: + self.budget = 100000 + else: + self.budget = defaults.budget self.to_merge = [ Compiler.instructions.asm_open_class, Compiler.instructions.gasm_open_class, @@ -175,6 +182,11 @@ def __init__(self, args, options=defaults, name=None): self.relevant_opts = set() self.n_running_threads = None self.input_files = {} + self.base_addresses = {} + self._protect_memory = False + if not self.options.cisc: + self.options.cisc = not self.options.optimize_hard + Program.prog = self from . import comparison, instructions, instructions_base, types @@ -196,7 +208,7 @@ def init_names(self, args): # ignore path to file - source must be in Programs/Source if "Programs" in os.listdir(os.getcwd()): # compile prog in ./Programs/Source directory - self.programs_dir = os.getcwd() + "/Programs" + self.programs_dir = "Programs" else: # assume source is in main SPDZ directory self.programs_dir = sys.path[0] + "/Programs" @@ -367,8 +379,12 @@ def write_bytes(self): sch_file.write("lgp:%s" % req) sch_file.write("\n") sch_file.write("opts: %s\n" % " ".join(self.relevant_opts)) + sch_file.close() + h = hashlib.sha256() for tape in self.tapes: tape.write_bytes() + h.update(tape.hash) + print('Hash:', h.hexdigest()) def finalize_tape(self, tape): if not tape.purged: @@ -435,7 +451,9 @@ def malloc(self, size, mem_type, reg_type=None, creator_tape=None): tn = get_thread_number() runtime_error_if(tn > self.n_running_threads, "malloc") - return addr + single_size * (tn - 1) + res = addr + single_size * (tn - 1) + self.base_addresses[str(res)] = addr + return res else: return addr @@ -443,6 +461,8 @@ def free(self, addr, mem_type): """Free memory""" if self.curr_block.alloc_pool is not self.curr_tape.basicblocks[0].alloc_pool: raise CompilerError("Cannot free memory within function block") + if not util.is_constant(addr): + addr = self.base_addresses[str(addr)] size = self.allocated_mem_blocks.pop((addr, mem_type)) self.free_mem_blocks[mem_type].push(addr, size) @@ -490,15 +510,26 @@ def public_input(self, x): ) self.public_input_file.write("%s\n" % str(x)) + def get_binary_input_file(self, player): + key = player, 'bin' + if key not in self.input_files: + filename = 'Player-Data/Input-Binary-P%d-0' % player + print('Writing binary data to', filename) + self.input_files[key] = open(filename, 'wb') + return self.input_files[key] + def set_bit_length(self, bit_length): """Change the integer bit length for non-linear functions.""" self.bit_length = bit_length print("Changed bit length for comparisons etc. to", bit_length) def set_security(self, security): + changed = self._security != security self._security = security self.non_linear.set_security(security) - print("Changed statistical security for comparison etc. to", security) + if changed: + print("Changed statistical security for comparison etc. to", + security) @property def security(self): @@ -626,6 +657,19 @@ def disable_memory_warnings(self): self.warn_about_mem.append(False) self.curr_block.warn_about_mem = False + def protect_memory(self, status): + """ Enable or disable memory protection. """ + self._protect_memory = status + + def use_cisc(self): + return self.options.cisc and (not self.prime or self.rabbit_gap()) + + def rabbit_gap(self): + assert self.prime + p = self.prime + logp = int(round(math.log(p, 2))) + return abs(p - 2 ** logp) / p < 2 ** -self.security + @staticmethod def read_tapes(schedule): m = re.search(r"([^/]*)\.mpc", schedule) @@ -644,7 +688,7 @@ def read_tapes(schedule): sys.exit(1) for tapename in lines[2].split(" "): - yield tapename.strip() + yield tapename.strip().split(":")[0] class Tape: @@ -672,6 +716,7 @@ def __init__(self, name, program): self.singular = True self.free_threads = set() self.loop_breaks = [] + self.warned_about_mem = False class BasicBlock(object): def __init__(self, parent, name, scope, exit_condition=None): @@ -984,7 +1029,7 @@ def alloc_loop(block): if self.program.verbose: print("Tape requires", self.req_num) for req, num in sorted(self.req_num.items()): - if num == float("inf") or num >= 2**32: + if num == float("inf") or num >= 2**64: num = -1 if req[1] in data_types: self.basicblocks[-1].instructions.append( @@ -1092,10 +1137,14 @@ def write_bytes(self, filename=None): filename = self.program.programs_dir + "/Bytecode/" + filename print("Writing to", filename) f = open(filename, "wb") + h = hashlib.sha256() for i in self._get_instructions(): if i is not None: - f.write(i.get_bytes()) + b = i.get_bytes() + f.write(b) + h.update(b) f.close() + self.hash = h.digest() def new_reg(self, reg_type, size=None): return self.Register(reg_type, self, size=size) @@ -1274,8 +1323,11 @@ class _no_truth(object): def __bool__(self): raise CompilerError( - "Cannot derive truth value from register, " - "consider using 'compile.py -l'" + "Cannot derive truth value from register. " + "This is a catch-all error appearing if you try to use a " + "run-time value where the compiler expects a compile-time " + "value, most likely a Python integer. " + "In some cases, you can fix this by using 'compile.py -l'." ) class Register(_no_truth): diff --git a/Compiler/sorting.py b/Compiler/sorting.py index fc619b732..c8cb87e89 100644 --- a/Compiler/sorting.py +++ b/Compiler/sorting.py @@ -10,6 +10,16 @@ def dest_comp(B): return sum(Tt) - 1 def reveal_sort(k, D, reverse=False): + """ Sort in place according to "perfect" key. The name hints at the fact + that a random order of the keys is revealed. + + :param k: vector or Array of sint containing exactly :math:`0,\dots,n-1` + in any order + :param D: Array or MultiArray to sort + :param reverse: wether :py:obj:`key` is a permutation in forward or + backward order + + """ assert len(k) == len(D) library.break_point() shuffle = types.sint.get_secure_shuffle(len(k)) @@ -28,6 +38,14 @@ def reveal_sort(k, D, reverse=False): instructions.delshuffle(shuffle) def radix_sort(k, D, n_bits=None, signed=True): + """ Sort in place according to key. + + :param k: keys (vector or Array of sint or sfix) + :param D: Array or MultiArray to sort + :param n_bits: number of bits in keys (int) + :param signed: whether keys are signed (bool) + + """ assert len(k) == len(D) bs = types.Matrix.create_from(k.get_vector().bit_decompose(n_bits)) if signed and len(bs) > 1: diff --git a/Compiler/sqrt_oram.py b/Compiler/sqrt_oram.py index 741baaf74..ae1aa81ca 100644 --- a/Compiler/sqrt_oram.py +++ b/Compiler/sqrt_oram.py @@ -764,9 +764,8 @@ def condition_i(i): # We only need once, so we pick the first one we find @lib.for_range_opt(self.n) def _(i): - nonlocal done self.physical_demux[i] &= done.bit_not() - done |= self.physical_demux[i] + done.update(done | self.physical_demux[i]) # Retrieve the value from the physical memory obliviously @lib.map_sum_opt(get_n_threads(self.n), self.n, [self.value_type]) diff --git a/Compiler/types.py b/Compiler/types.py index ab100ce3b..34e574807 100644 --- a/Compiler/types.py +++ b/Compiler/types.py @@ -166,7 +166,7 @@ def vectorized_function(cls, *args, **kwargs): size = None if 'size' in kwargs: size = kwargs.pop('size') - if size: + if size is not None: set_global_vector_size(size) try: res = function(cls, *args, **kwargs) @@ -187,7 +187,7 @@ def vectorized_init(*args, **kwargs): if 'size' in kwargs and kwargs['size'] is not None \ and kwargs['size'] != size: raise CompilerError('Mismatch in vector size') - if 'size' in kwargs and kwargs['size']: + if 'size' in kwargs and kwargs['size'] is not None: size = kwargs['size'] if size is not None: set_global_vector_size(size) @@ -344,6 +344,10 @@ def popcnt_bits(bits): def zero_if_not(self, condition): return condition * self + def iadd(self, other): + """ Addition assignment. This uses :py:func:`update` internally. """ + self.update(self + other) + class _int(Tape._no_truth): """ Integer functionality. """ @@ -537,6 +541,8 @@ def Tensor(cls, shape): """ if len(shape) == 1: return Array(shape[0], cls) + elif len(shape) == 2: + return Matrix(*shape, cls) else: return MultiArray(shape, cls) @@ -577,7 +583,8 @@ def input_tensor_from_client(cls, client_id, shape): return res @classmethod - def input_tensor_via(cls, player, content): + def input_tensor_via(cls, player, content=None, shape=None, binary=True, + one_hot=False): """ Input tensor-like data via a player. This overwrites the input file for the relevant player. The following returns an @@ -586,37 +593,74 @@ def input_tensor_via(cls, player, content): M = [[1, 2], [3, 4]] sint.input_tensor_via(0, M) - Make sure to copy ``Player-Data/Input-P-0`` if running + Make sure to copy ``Player-Data/Input-P-0`` or + ``Player-Data/Input-Binary-P-0`` if running on another host. + :param player: player to input via (int) + :param content: nested Python list or numpy array (binary mode only) or + left out if not available + :param shape: shape if content not given + :param binary: binary mode (bool) + :param one_hot: one-hot encoding (bool) + """ if program.curr_tape != program.tapes[0]: raise CompilerError('only available in main thread') - shape = [] - tmp = content - while True: - try: - shape.append(len(tmp)) - tmp = tmp[0] - except: - break - if not program.input_files.get(player, None): - program.input_files[player] = open( - 'Player-Data/Input-P%d-0' % player, 'w') - f = program.input_files[player] - def traverse(content, level): - assert len(content) == shape[level] - if level == len(shape) - 1: - for x in content: - f.write(' ') - f.write(str(x)) + if content is not None: + requested_shape = shape + if binary: + import numpy + content = numpy.array(content) + if issubclass(cls, _fix): + min_k = \ + math.ceil(math.log(abs(content).max(), 2)) + cls.f + 1 + if cls.k < min_k: + raise CompilerError( + "data outside fixed-point range, " + "use 'sfix.set_precision(%d, %d)'" % (cls.f, min_k)) + if binary == 2: + t = numpy.double + else: + t = numpy.single + else: + t = numpy.int64 + if one_hot: + content = numpy.eye(content.max() + 1)[content] + content = content.astype(t) + f = program.get_binary_input_file(player) + f.write(content.tobytes()) + f.flush() + shape = content.shape else: - for x in content: - traverse(x, level + 1) - traverse(content, 0) - f.write('\n') + shape = [] + tmp = content + while True: + try: + shape.append(len(tmp)) + tmp = tmp[0] + except: + break + if not program.input_files.get(player, None): + program.input_files[player] = open( + 'Player-Data/Input-P%d-0' % player, 'w') + f = program.input_files[player] + def traverse(content, level): + assert len(content) == shape[level] + if level == len(shape) - 1: + for x in content: + f.write(' ') + f.write(str(x)) + else: + for x in content: + traverse(x, level + 1) + traverse(content, 0) + f.write('\n') + if requested_shape is not None and \ + list(shape) != list(requested_shape): + raise CompilerError('content contradicts shape') res = cls.Tensor(shape) - res.input_from(player) + res.input_from(player, binary=binary) return res class _vec(Tape._no_truth): @@ -1357,14 +1401,14 @@ def store_in_mem(self, address): @vectorized_classmethod def pop(cls): - """ Pop from stack. """ + """ Pop from stack. Made obsolete by :py:func:`update`. """ res = cls() popint(res) return res @vectorized_classmethod def push(cls, value): - """ Push to stack. + """ Push to stack. Made obsolete by :py:func:`update`. :param value: any convertible type """ pushint(cls.conv(value)) @@ -1728,6 +1772,38 @@ def __init__(self, player, value): self.player = player self._v = value + @classmethod + def read_int(cls, player): + """ Read integer from + ``Player-Data/Input-Binary-P-`` only on + party :py:obj:`player`. + + :param player: player (int) + :return: personal cint + + """ + tmp = cint() + fixinput(player, tmp, 0, 0) + return cls(player, tmp) + + @classmethod + def read_fix(cls, player, f, k, precision): + """ Read fixed-point value from + ``Player-Data/Input-Binary-P-`` only on + party :py:obj:`player`. + + :param player: player (int) + :param f: fixed-point precision (int) + :param k: fixed-point length (int) + :param precision: input precision (1: single, 2: double) + :return: personal cfix + + """ + assert precision in (1, 2) + tmp = cint() + fixinput(player, tmp, f, precision) + return cls(player, cfix._new(tmp, f=f, k=k)) + def binary_output(self): """ Write binary output to ``Player-Data/Binary-Output-P-`` if @@ -2278,14 +2354,17 @@ def get_random(cls): return res @vectorized_classmethod - def get_input_from(cls, player): + def get_input_from(cls, player, binary=False): """ Secret input. :param player: public (regint/cint/int) :param size: vector size (int, default 1) """ - res = cls() - inputmixed('int', res, player) + if binary: + return cls(personal.read_int(player)) + else: + res = cls() + inputmixed('int', res, player) return res @vectorized_classmethod @@ -2478,7 +2557,8 @@ def __init__(self, val=None, size=None): inputpersonal(size, val.player, self, self.clear_type.conv(val._v)) elif isinstance(val, _fix): super(sint, self).__init__('s', size=val.v.size) - self.load_other(val.v.round(val.k, val.f)) + self.load_other(val.v.round(val.k, val.f, + nearest=val.round_nearest)) elif isinstance(val, sbitvec): super(sint, self).__init__('s', val=val, size=val[0].n) else: @@ -2533,8 +2613,9 @@ def __ge__(self, other, bit_length=None, security=None): @type_comp @vectorize def __eq__(self, other, bit_length=None, security=None): - return floatingpoint.EQZ(self - other, bit_length or program.bit_length, - security or program.security) + return sintbit.conv( + floatingpoint.EQZ(self - other, bit_length or program.bit_length, + security or program.security)) @read_mem_value @type_comp @@ -2696,7 +2777,8 @@ def Norm(self, k, f, kappa=None, simplex_flag=False): @vectorize def int_div(self, other, bit_length=None, security=None): - """ Secret integer division. + """ Secret integer division. Note that the domain bit length + needs to be about four times the bit length. :param other: sint :param bit_length: bit length of input (default: global bit length) @@ -2710,7 +2792,8 @@ def int_div(self, other, bit_length=None, security=None): @vectorize def int_mod(self, other, bit_length=None): - """ Secret integer modulo. + """ Secret integer modulo. Note that the domain bit length + needs to be about four times the bit length. :param other: sint :param bit_length: bit length of input (default: global bit length) @@ -3626,9 +3709,8 @@ class cfix(_number, _structure): scalars = (int, float, regint, cint) @classmethod def set_precision(cls, f, k = None): - """ Set the precision of the integer representation. Note that some - operations are undefined when the precision of :py:class:`sfix` and - :py:class:`cfix` differs. The initial defaults are chosen to + """ Set the precision of the integer representation. + The initial defaults are chosen to allow the best optimization of probabilistic truncation in computation modulo 2^64 (2*k < 64). Generally, 2*k must be at most the integer length for rings and at most m-s-1 for @@ -3686,6 +3768,10 @@ def cfix_to_cint(fix_val): def malloc(size, creator_tape=None): return program.malloc(size, cint, creator_tape=creator_tape) + @classmethod + def free(cls, addr): + return cint.free(addr) + @staticmethod def n_elements(): return 1 @@ -3749,6 +3835,9 @@ def __getitem__(self, index): return [self._new(x, k=self.k, f=self.f) for x in self.v[index]] return self._new(self.v[index], k=self.k, f=self.f) + def get_vector(self): + return self + @vectorize def load_int(self, v): self.v = cint(v) * (2 ** self.f) @@ -3777,14 +3866,25 @@ def size(self): def sizeof(self): return self.size * 4 + @read_mem_value + def parse_type(self, other): + res = parse_type(other, f=self.f, k=self.k) + # check attributes if available + try: + assert res.k == self.k + assert res.f == self.f + except AttributeError: + pass + return res + @vectorize def add(self, other): """ Clear fixed-point addition. :param other: cfix/cint/regint/int """ - other = parse_type(other) + other = self.parse_type(other) if isinstance(other, cfix): - return cfix._new(self.v + other.v) + return cfix._new(self.v + other.v, k=self.k, f=self.f) else: return NotImplemented @@ -3796,13 +3896,13 @@ def mul(self, other): return sfix._new(self.v * other, k=self.k, f=self.f) if isinstance(other, (int, regint, cint)): return cfix._new(self.v * cint(other), k=self.k, f=self.f) - other = parse_type(other) + other = self.parse_type(other) if isinstance(other, cfix): assert self.f == other.f sgn = cint(1 - 2 * ((self < 0) ^ (other < 0))) absolute = self.v * other.v * sgn val = sgn * (absolute >> self.f) - return cfix._new(val) + return cfix._new(val, k=self.k, f=self.f) elif isinstance(other, sfix): return NotImplemented else: @@ -3819,11 +3919,11 @@ def __sub__(self, other): """ Clear fixed-point subtraction. :param other: cfix/cint/regint/int """ - other = parse_type(other) + other = self.parse_type(other) if isinstance(other, cfix): - return cfix._new(self.v - other.v) + return cfix._new(self.v - other.v, k=self.k, f=self.f) elif isinstance(other, sfix): - return sfix._new(self.v - other.v) + return sfix._new(self.v - other.v, k=self.k, f=self.f) else: raise NotImplementedError @@ -3831,7 +3931,7 @@ def __sub__(self, other): def __neg__(self): """ Clear fixed-point negation. """ # cfix type always has .v - return cfix._new(-self.v) + return cfix._new(-self.v, f=self.f, k=self.k) def __rsub__(self, other): return -self + other @@ -3844,7 +3944,7 @@ def __eq__(self, other): :param other: cfix/cint/regint/int :return: 0/1 :rtype: regint """ - other = parse_type(other) + other = self.parse_type(other) if isinstance(other, cfix): return self.v == other.v elif isinstance(other, sfix): @@ -3855,7 +3955,7 @@ def __eq__(self, other): @vectorize def __lt__(self, other): """ Clear fixed-point comparison. """ - other = parse_type(other) + other = self.parse_type(other) if isinstance(other, cfix): assert self.k == other.k return self.v.less_than(other.v, self.k) @@ -3869,7 +3969,7 @@ def __lt__(self, other): @vectorize def __le__(self, other): """ Clear fixed-point comparison. """ - other = parse_type(other) + other = self.parse_type(other) if isinstance(other, cfix): return 1 - (self > other) elif isinstance(other, sfix): @@ -3880,7 +3980,7 @@ def __le__(self, other): @vectorize def __gt__(self, other): """ Clear fixed-point comparison. """ - other = parse_type(other) + other = self.parse_type(other) if isinstance(other, cfix): return other.__lt__(self) elif isinstance(other, sfix): @@ -3891,7 +3991,7 @@ def __gt__(self, other): @vectorize def __ge__(self, other): """ Clear fixed-point comparison. """ - other = parse_type(other) + other = self.parse_type(other) if isinstance(other, cfix): return 1 - (self < other) elif isinstance(other, sfix): @@ -3902,7 +4002,7 @@ def __ge__(self, other): @vectorize def __ne__(self, other): """ Clear fixed-point comparison. """ - other = parse_type(other) + other = self.parse_type(other) if isinstance(other, cfix): return self.v != other.v elif isinstance(other, sfix): @@ -3919,7 +4019,7 @@ def __truediv__(self, other): """ Clear fixed-point division. :param other: cfix/cint/regint/int """ - other = parse_type(other, self.k, self.f) + other = self.parse_type(other) if isinstance(other, cfix): return cfix._new(library.cint_cint_division( self.v, other.v, self.k, self.f), k=self.k, f=self.f) @@ -3938,7 +4038,7 @@ def __rtruediv__(self, other): """ Fixed-point division. :param other: sfix/sint/cfix/cint/regint/int """ - other = parse_type(other, self.k, self.f) + other = self.parse_type(other) return other / self @vectorize @@ -4230,7 +4330,7 @@ def set_precision_from_args(cls, program, adapt_ring=False): elif k is not None: raise CompilerError('need to set fractional precision') if 'nearest' in program.args: - print('Nearest rounding instead of proabilistic ' + print('Nearest rounding instead of probabilistic ' 'for fixed-point computation') cls.round_nearest = True if adapt_ring and program.options.ring \ @@ -4242,7 +4342,7 @@ def set_precision_from_args(cls, program, adapt_ring=False): program.set_ring_size(need) @classmethod - def coerce(cls, other): + def coerce(cls, other, equal_precision=None): if isinstance(other, (_fix, cls.clear_type)): return other else: @@ -4303,6 +4403,10 @@ def __init__(self, _v=None, k=None, f=None, size=None): self.v = type(self)(_v.read()).v elif isinstance(_v, (list, tuple)): self.v = self.int_type(list(self.conv(x).v for x in _v)) + elif isinstance(_v, personal): + assert _v._v.f == f + assert _v._v.k == k + self.v = self.int_type(personal(_v.player, _v._v.v)) else: raise CompilerError('cannot convert %s to sfix' % _v) if not isinstance(self.v, self.int_type): @@ -4347,7 +4451,7 @@ def mul(self, other): k = len(bin(abs(v))) - 1 other = self.multipliable(v, k, f, self.size) try: - other = self.coerce(other) + other = self.coerce(other, equal_precision=False) except: return NotImplemented if isinstance(other, (_fix, self.clear_type)): @@ -4463,16 +4567,19 @@ class sfix(_fix): default_type = sint @vectorized_classmethod - def get_input_from(cls, player): + def get_input_from(cls, player, binary=False): """ Secret fixed-point input. :param player: public (regint/cint/int) :param size: vector size (int, default 1) """ cls.int_type.require_bit_length(cls.k) - v = cls.int_type() - inputmixed('fix', v, cls.f, player) - return cls._new(v) + if binary: + return cls(personal.read_fix(player, cls.f, cls.k, int(binary))) + else: + v = cls.int_type() + inputmixed('fix', v, cls.f, player) + return cls._new(v) @vectorized_classmethod def get_raw_input_from(cls, player): @@ -4487,21 +4594,31 @@ def get_random(cls, lower, upper, symmetric=True): :param upper: float :param size: vector size (int, default 1) """ + f = cls.f + k = cls.k log_range = int(math.log(upper - lower, 2)) n_bits = log_range + cls.f + gen_range = (2 ** (n_bits) - 1) / 2 ** cls.f + diff = upper - lower + factor = diff / gen_range + real = lambda x: cfix.int_rep(x, f, k) * 2 ** -f + real_range = real(real(factor) * gen_range) average = lower + 0.5 * (upper - lower) - real_range = (2 ** (n_bits) - 1) / 2 ** cls.f lower = average - 0.5 * real_range - real_lower = round(lower * 2 ** cls.f) / 2 ** cls.f - r = cls._new(cls.int_type.get_random_int(n_bits)) + lower + upper = average + 0.5 * real_range + r = cls._new(cls.int_type.get_random_int(n_bits)) * factor + lower if symmetric: lowest = math.floor(lower * 2 ** cls.f) / 2 ** cls.f - print('randomness range [%f,%f], fringes half the probability' % \ - (lowest, lowest + 2 ** log_range)) + highest = math.ceil(upper * 2 ** cls.f) / 2 ** cls.f + if program.verbose: + print('randomness range [%f,%f], ' + 'fringes half the probability' % \ + (lowest, highest)) return cls.int_type.get_random_bit().if_else(r, -r + 2 * average) else: - print('randomness range [%f,%f], %d bits' % \ - (real_lower, real_lower + real_range, n_bits)) + if program.verbose: + print('randomness range [%f,%f], %d bits' % \ + (real(lower), real(lower) + real_range, n_bits)) return r @classmethod @@ -4531,8 +4648,17 @@ def dot_product(cls, x, y, res_params=None): def expand_to_vector(self, size): return self._new(self.v.expand_to_vector(size), k=self.k, f=self.f) - def coerce(self, other): - return parse_type(other, k=self.k, f=self.f) + @read_mem_value + def coerce(self, other, equal_precision=True): + res = parse_type(other, k=self.k, f=self.f) + if equal_precision: + # check parameters if available + try: + assert res.k == self.k + assert res.f == self.f + except AttributeError: + pass + return res def hard_conv_me(self, cls): assert cls == sint @@ -4953,29 +5079,20 @@ def __init__(self, v, p=None, z=None, s=None, size=None): if isinstance(v, int): if not ((v >= 2**(self.vlen-1) and v < 2**(self.vlen)) or v == 0): raise CompilerError('Floating point number malformed: significand') - self.v = sint(v) - else: - self.v = v if isinstance(p, int): if not (p >= -2**(self.plen - 1) and p < 2**(self.plen - 1)): raise CompilerError('Floating point number malformed: exponent %d not unsigned %d-bit integer' % (p, self.plen)) - self.p = sint(p) - else: - self.p = p if isinstance(z, int): if not (z == 0 or z == 1): raise CompilerError('Floating point number malformed: zero bit') - self.z = sint() - ldsi(self.z, z) - else: - self.z = z if isinstance(s, int): if not (s == 0 or s == 1): raise CompilerError('Floating point number malformed: sign') - self.s = sint() - ldsi(self.s, s) - else: - self.s = s + # copying necessary for update to work properly + self.v = sint(v) + self.p = sint(p) + self.z = sint(z) + self.s = sint(s) def __getitem__(self, index): return sfloat(*(x[index] for x in self)) @@ -5240,6 +5357,19 @@ def reveal(self): :return: cfloat """ return cfloat(self.v.reveal(), self.p.reveal(), self.z.reveal(), self.s.reveal()) + def update(self, other): + """ + Update register. Useful in loops like + :py:func:`~Compiler.library.for_range`. + + :param other: any convertible type + + """ + self.v.update(other.v) + self.p.update(other.p) + self.z.update(other.z) + self.s.update(other.s) + class cfloat(Tape._no_truth): """ Helper class for printing revealed sfloats. """ __slots__ = ['v', 'p', 'z', 's', 'nan'] @@ -5297,6 +5427,12 @@ def reveal_to_clients(self, clients): """ self.value_type.reveal_to_clients(clients, [self.get_vector()]) + @staticmethod + def _cmp_fail(*args): + raise CompilerError('equality of data structures is not implemented') + + __eq__ = __ne__ = __le__ = __lt__ = __gt__ = __ge__ = _cmp_fail + class Array(_vectorizable): """ Array accessible by public index. That is, ``a[i]`` works for an @@ -5361,14 +5497,28 @@ def __init__(self, length, value_type, address=None, debug=None, alloc=True): self.alloc() def alloc(self): - if self.address is None: - self.address = self.value_type.malloc(self.length, - self.creator_tape) + if self._address is None: + try: + self.address = self.value_type.malloc(self.length, + self.creator_tape) + except AttributeError: + raise CompilerError('cannot create Array of %s' % \ + self.value_type) def delete(self): self.value_type.free(self.address) self.address = None + @property + def address(self): + if self._address is None: + raise CompilerError('trying access unallocated memory') + return self._address + + @address.setter + def address(self, address): + self._address = address + def get_address(self, index, size=None): if isinstance(index, (_secret, _single)): raise CompilerError('need cleartext index') @@ -5498,6 +5648,10 @@ def __len__(self): def total_size(self): return self.length * self.value_type.n_elements() + @property + def shape(self): + return [self.length] + def __iter__(self): for i in range(self.length): yield self[i] @@ -5539,16 +5693,25 @@ def assign_all(self, value, use_threads=True, conv=True): """ Assign the same value to all entries. :param value: convertible to basic type """ - if conv: - value = self.value_type.conv(value) - if value.size != 1: - raise CompilerError('cannot assign vector to all elements') - mem_value = MemValue(value) + from Compiler.GC.types import bits + use_vector = util.is_constant(value) and \ + not issubclass(self.value_type, (bits, squant)) + if not use_vector: + if conv: + value = self.value_type.conv(value) + if value.size != 1: + raise CompilerError('cannot assign vector to all elements') + mem_value = MemValue(value) self.address = MemValue.if_necessary(self.address) n_threads = 8 if use_threads and len(self) > 2**20 else None - @library.for_range_multithread(n_threads, 1024, len(self)) - def f(i): - self[i] = mem_value + @library.multithread(n_threads, len(self)) + def _(base, size): + if use_vector: + self.assign_vector(self.value_type(value, size=size), base) + else: + @library.for_range_opt(size) + def _(i): + self[base + i] = mem_value return self def get_vector(self, base=0, size=None): @@ -5615,7 +5778,15 @@ def expand_to_vector(self, index, size): def get_mem_value(self, index): return MemValue(self[index], self.get_address(index)) - def input_from(self, player, budget=None, raw=False): + def concat(self, other): + """ Concatenate two arrays. """ + assert self.value_type == other.value_type + res = Array(len(self) + len(other), self.value_type) + res.assign_vector(self[:]) + res.assign_vector(other[:], len(self)) + return res + + def input_from(self, player, budget=None, raw=False, **kwargs): """ Fill with inputs from player if supported by type. :param player: public (regint/cint/int) """ @@ -5624,12 +5795,15 @@ def input_from(self, player, budget=None, raw=False): else: input_from = self.value_type.get_input_from try: - self.assign(input_from(player, size=len(self))) + @library.multithread(None, len(self), + max_size=budget or program.budget) + def _(base, size): + self.assign(input_from(player, size=size, **kwargs), base) except (TypeError, CompilerError): print (budget) @library.for_range_opt(self.length, budget=budget) def _(i): - self[i] = input_from(player) + self[i] = input_from(player, **kwargs) def read_from_file(self, start): """ Read content from ``Persistence/Transactions-P.data``. @@ -5713,15 +5887,29 @@ def shuffle(self): self.assign_vector(self.get(regint.inc(len(self)).shuffle())) def secure_shuffle(self): - """ Secure shuffle in place according to the security model. """ + """ Secure shuffle in place according to the security model. + See :py:func:`MultiArray.secure_shuffle` for references. """ self.assign_vector(self.get_vector().secure_shuffle()) def secure_permute(self, *args, **kwargs): - """ Secure permutate in place according to the security model. """ + """ Secure permutate in place according to the security model. + See :py:func:`MultiArray.secure_shuffle` for references. + + :param permutation: output of :py:func:`sint.get_secure_shuffle()` + :param reverse: whether to apply inverse (default: False) + + """ self.assign_vector(self.get_vector().secure_permute(*args, **kwargs)) def randomize(self, *args): - """ Randomize according to data type. """ + """ Randomize array according to data type. + If it is :py:class:`sfix`, the following will sample an + individual uniformly random entry of the array + :py:obj:`M` roughly in the range :math:`[a,b]`:: + + M.randomize(a, b) + + """ self.assign_vector(self.value_type.get_random(*args, size=len(self))) def reveal(self): @@ -5780,10 +5968,12 @@ def reveal_to(self, player): def sort(self, n_threads=None, batcher=False, n_bits=None): """ - Sort in place using radix sort with complexity :math:`O(n \log - n)` for :py:class:`sint` and :py:class:`sfix`, and Batcher's - odd-even mergesort with :math:`O(n (\log n)^2)` for - :py:class:`sfloat`. + Sort in place using `radix sort + `_ with complexity + :math:`O(n \log n)` for :py:class:`sint` and :py:class:`sfix`, + and `Batcher's odd-even mergesort + `_ with :math:`O(n (\log + n)^2)` for :py:class:`sfloat`. :param n_threads: number of threads to use (single thread by default), need to use Batcher's algorithm for several threads @@ -5878,10 +6068,16 @@ def __len__(self): """ Size of top dimension. """ return self.sizes[0] + @property + def shape(self): + return list(self.sizes) + def __iter__(self): return (self[i] for i in range(len(self))) def to_array(self): + assert self.value_type.n_elements() == 1 and \ + self.value_type.mem_size() == 1 return Array(self.total_size(), self.value_type, address=self.address) def maybe_get(self, condition, index): @@ -5895,9 +6091,12 @@ def assign_all(self, value): """ Assign the same value to all entries. :param value: convertible to relevant basic type """ - @library.for_range(self.sizes[0]) - def f(i): - self[i].assign_all(value) + try: + self.to_array().assign_all(value) + except AssertionError: + @library.for_range(self.sizes[0]) + def f(i): + self[i].assign_all(value) return self def total_size(self): @@ -6005,7 +6204,7 @@ def get_addresses(self, *indices): def get_vector_by_indices(self, *indices): """ Vector with potential asterisks. The potential retrieves - all entry where the first dimension index is 0, and the third + all entries where the first dimension index is 0, and the third dimension index is 1:: a.get_vector_by_indices(0, None, 1) @@ -6046,22 +6245,18 @@ def concat(self, other): res.assign_part_vector(other[:], self.sizes[0]) return res - def input_from(self, player, budget=None, raw=False): + def input_from(self, player, budget=None, raw=False, **kwargs): """ Fill with inputs from player if supported by type. :param player: public (regint/cint/int) """ if util.is_constant(self.total_size()) and \ self.value_type.n_elements() == 1 and \ self.value_type.mem_size() == 1: - if raw or program.always_raw(): - input_from = self.value_type.get_raw_input_from - else: - input_from = self.value_type.get_input_from - self.assign_vector(input_from(player, size=self.total_size())) + self.to_array().input_from(player, budget=budget, raw=raw, **kwargs) else: @library.for_range_opt(self.sizes[0], budget=budget) def _(i): - self[i].input_from(player, budget=budget, raw=raw) + self[i].input_from(player, budget=budget, raw=raw, **kwargs) def write_to_file(self, position=None): """ Write shares of integer representation to @@ -6174,7 +6369,10 @@ def dot(self, other, res_params=None, n_threads=None): :param self: two-dimensional :param other: Matrix or Array of matching size and type - :param n_threads: number of threads (default: all in same thread) """ + :param n_threads: number of threads (default: all in same thread) + :rtype: Matrix or Array of appropriate size and type + + """ assert len(self.sizes) == 2 if isinstance(other, Array): assert len(other) == self.sizes[1] @@ -6241,6 +6439,8 @@ def _(k): def direct_mul(self, other, reduce=True, indices=None): """ Matrix multiplication in the virtual machine. + Unlike :py:func:`dot`, this only works for sint and sfix, and it + returns a vector instead of a data structure. :param self: :py:class:`Matrix` / 2-dimensional :py:class:`MultiArray` :param other: :py:class:`Matrix` / 2-dimensional :py:class:`MultiArray` @@ -6326,6 +6526,10 @@ def trans_mul_to(self, other, res, n_threads=None): :param res: matrix of matching dimension to store result :param n_threads: number of threads (default: single thread) """ + assert other.sizes[0] == self.sizes[0] + assert res.sizes[0] == self.sizes[1] + assert res.sizes[1] == other.sizes[1] + assert len(res.sizes) == 2 @library.for_range_multithread(n_threads, 1, self.sizes[1]) def _(i): indices = [regint(i), regint.inc(self.sizes[0])] @@ -6342,6 +6546,10 @@ def mul_trans_to(self, other, res, n_threads=None): :param res: matrix of matching dimension to store result :param n_threads: number of threads (default: single thread) """ + assert other.sizes[1] == self.sizes[1] + assert res.sizes[0] == self.sizes[0] + assert res.sizes[1] == other.sizes[0] + assert len(res.sizes) == 2 @library.for_range_multithread(n_threads, 1, self.sizes[0]) def _(i): indices = [regint(i), regint.inc(self.sizes[1])] @@ -6354,62 +6562,32 @@ def direct_mul_to_matrix(self, other): res.assign_vector(self.direct_mul(other)) return res - def budget_mul(self, other, n_rows, row, n_columns, column, reduce=True, - res=None): - assert len(self.sizes) == 2 - assert len(other.sizes) == 2 - if res is None: - if reduce: - res_matrix = Matrix(n_rows, n_columns, self.value_type) - else: - res_matrix = Matrix(n_rows, n_columns, \ - self.value_type.unreduced_type) - else: - res_matrix = res - @library.for_range_opt(n_rows) - def _(i): - @library.for_range_opt(n_columns) - def _(j): - col = column(other, j) - r = row(self, i) - if reduce: - res_matrix[i][j] = self.value_type.dot_product(r, col) - else: - entry = self.value_type.unreduced_dot_product(r, col) - res_matrix[i][j] = entry - return res_matrix - def plain_mul(self, other, res=None): - """ Alternative matrix multiplication. - - :param self: two-dimensional - :param other: two-dimensional container of matching type and size """ - assert other.sizes[0] == self.sizes[1] - return self.budget_mul(other, self.sizes[0], lambda x, i: x[i], \ - other.sizes[1], \ - lambda x, j: [x[k][j] for k in range(len(x))], - res=res) + raise CompilerError('Deprecated functionality. Use dot()') def mul_trans(self, other): """ Matrix multiplication with transpose of :py:obj:`other`. :param self: two-dimensional - :param other: two-dimensional container of matching type and size """ - assert other.sizes[1] == self.sizes[1] - return self.budget_mul(other, self.sizes[0], lambda x, i: x[i], \ - other.sizes[0], lambda x, j: x[j]) + :param other: two-dimensional container of matching type and size + :return: Matrix of matching type and size + + """ + res = Matrix(self.sizes[0], other.sizes[0], self.value_type) + self.mul_trans_to(other, res) + return res - def trans_mul(self, other, reduce=True, res=None): + def trans_mul(self, other): """ Matrix multiplication with transpose of :py:obj:`self` :param self: two-dimensional - :param other: two-dimensional container of matching type and size """ - assert other.sizes[0] == self.sizes[0] - return self.budget_mul(other, self.sizes[1], \ - lambda x, j: [x[k][j] for k in range(len(x))], \ - other.sizes[1], \ - lambda x, j: [x[k][j] for k in range(len(x))], - reduce=reduce, res=res) + :param other: two-dimensional container of matching type and size + :return: Matrix of matching type and size + + """ + res = Matrix(self.sizes[1], other.sizes[1], self.value_type) + self.trans_mul_to(other, res) + return res def parallel_mul(self, other): assert self.sizes[1] == other.sizes[0] @@ -6467,16 +6645,27 @@ def diag(self): return self.array.get(regint.inc(n, 0, n + 1)) def secure_shuffle(self): - """ Securely shuffle rows (first index). """ + """ Securely shuffle rows (first index). This uses the algorithm in + Section 4.3 of `Keller and Scholl + `_ or Section 3.2 of + `Asharov et al. `_ if applicable. + """ self.assign_vector(self.get_vector().secure_shuffle(self.part_size())) def secure_permute(self, permutation, reverse=False): - """ Securely permute rows (first index). """ + """ Securely permute rows (first index). See + :py:func:`secure_shuffle` for references. + + :param permutation: output of :py:func:`sint.get_secure_shuffle()` + :param reverse: whether to apply inverse (default: False) + + """ self.assign_vector(self.get_vector().secure_permute( permutation, self.part_size(), reverse)) def sort(self, key_indices=None, n_bits=None): """ Sort sub-arrays (different first index) in place. + This uses `radix sort `_. :param key_indices: indices to sorting keys, for example ``(1, 2)`` to sort three-dimensional array ``a`` by keys @@ -6496,15 +6685,20 @@ def sort(self, key_indices=None, n_bits=None): keys = self.get_vector_by_indices(*key_indices) sorting.radix_sort(keys, self, n_bits=n_bits) - def randomize(self, *args): - """ Randomize according to data type. """ - if self.total_size() < program.budget: + def randomize(self, *args, n_threads=None): + """ Randomize according to data type. + If it is :py:class:`sfix`, the following will sample an + individual uniformly random entry of the multi-array + :py:obj:`M` roughly in the range :math:`[a,b]`:: + + M.randomize(a, b) + + """ + @library.multithread(n_threads, self.total_size(), + max_size=program.budget) + def _(base, size): self.assign_vector( - self.value_type.get_random(*args, size=self.total_size())) - else: - @library.for_range(self.sizes[0]) - def _(i): - self[i].randomize(*args) + self.value_type.get_random(*args, size=size), base=base) def reveal(self): """ Reveal to :py:obj:`MultiArray` of same shape. """ @@ -6588,7 +6782,7 @@ def __init__(self, sizes, value_type, debug=None, address=None, alloc=True): else: self.array = Array(reduce(operator.mul, sizes), \ value_type, address=address, alloc=alloc) - SubMultiArray.__init__(self, sizes, value_type, self.array.address, 0, \ + SubMultiArray.__init__(self, sizes, value_type, self.array._address, 0, debug=debug) if len(sizes) < 2: raise CompilerError('Use Array') @@ -6626,6 +6820,12 @@ def create_from(rows): t = type(rows[0][0]) else: t = type(rows[0]) + if t != sfix: + for row in rows: + if isinstance(row, sfix) or \ + (isinstance(row, Array) and row.value_type == sfix): + raise CompilerError( + 'accidental shortening by creating matrix') res = Matrix(len(rows), len(rows[0]), t) for i in range(len(rows)): res[i].assign(rows[i]) @@ -6661,6 +6861,20 @@ def set_column(self, index, vector): self.sizes[1]) self.value_type.conv(vector).store_in_mem(addresses) + def concat_columns(self, other): + """ Concatenate two matrices by columns. """ + assert self.sizes[0] == other.sizes[0] + assert self.value_type == other.value_type + res = Matrix(self.sizes[0], self.sizes[1] + other.sizes[1], + self.value_type) + @library.for_range(self.sizes[1]) + def _(i): + res.set_column(i, self.get_column(i)) + @library.for_range(other.sizes[1]) + def _(i): + res.set_column(self.sizes[1] + i, other.get_column(i)) + return res + class VectorArray(object): def __init__(self, length, value_type, vector_size, address=None): self.array = Array(length * vector_size, value_type, address) @@ -6800,7 +7014,11 @@ def write(self, value): self.check() if isinstance(value, MemValue): value = value.read() - value = self.value_type.conv(value) + try: + value = self.value_type.conv(value) + except: + raise CompilerError('Cannot store %s as MemValue of %s' % \ + (type(value), self.value_type)) if value.size != self.size: raise CompilerError('size mismatch') self.register = value diff --git a/ExternalIO/README.md b/ExternalIO/README.md index 89328440e..b841b0bbc 100644 --- a/ExternalIO/README.md +++ b/ExternalIO/README.md @@ -1,9 +1,12 @@ -The ExternalIO directory contains an example of managing I/O between external client processes and SPDZ parties running SPDZ engines. These instructions assume that SPDZ has been built as per the [project readme](../README.md). +The ExternalIO directory contains an example of managing I/O between +external client processes and parties running MP-SPDZ engines. These +instructions assume that MP-SPDZ has been built as per the [project +readme](../README.md). ## Working Examples -[bankers-bonus-client.cpp](./bankers-bonus-client.cpp) and -[bankers-bonus-client.py](./bankers-bonus-client.py) act as a +[bankers-bonus-client.cpp](../ExternalIO/bankers-bonus-client.cpp) and +[bankers-bonus-client.py](../ExternalIO/bankers-bonus-client.py) act as a client to [bankers_bonus.mpc](../Programs/Source/bankers_bonus.mpc) and demonstrates sending input and receiving output as described by [Damgård et al.](https://eprint.iacr.org/2015/1006) The computation @@ -56,5 +59,5 @@ Only the `sint` methods used in the example are documented here, equivalent meth The example uses the `Client` class implemented in `ExternalIO/Client.hpp` to handle the communication, see -https://mp-spdz.readthedocs.io/en/latest/io.html#reference for +[this reference](https://mp-spdz.readthedocs.io/en/latest/io.html#reference) for documentation. diff --git a/GC/FakeSecret.cpp b/GC/FakeSecret.cpp index 940fc5569..f69130009 100644 --- a/GC/FakeSecret.cpp +++ b/GC/FakeSecret.cpp @@ -9,6 +9,7 @@ #include "GC/Processor.hpp" #include "GC/ShareSecret.hpp" +#include "GC/ThreadMaster.hpp" #include "Processor/Input.hpp" namespace GC @@ -121,4 +122,9 @@ void FakeSecret::finalize_input(Input& inputter, int from, int n_bits) *this = inputter.finalize(from, n_bits); } +void FakeSecret::run_tapes(const vector& args) +{ + Thread::s().master.machine.run_tapes(args); +} + } /* namespace GC */ diff --git a/GC/FakeSecret.h b/GC/FakeSecret.h index cd43ae1d5..668b5a967 100644 --- a/GC/FakeSecret.h +++ b/GC/FakeSecret.h @@ -119,6 +119,8 @@ class FakeSecret : public ShareInterface, public BitVec static void andm(GC::Processor&, const BaseInstruction&) { throw runtime_error("andm not implemented"); } + static void run_tapes(const vector& args); + static FakeSecret input(GC::Processor& processor, const InputArgs& args); static FakeSecret input(int from, word input, int n_bits); diff --git a/GC/Program.hpp b/GC/Program.hpp index f1547f592..768a09c58 100644 --- a/GC/Program.hpp +++ b/GC/Program.hpp @@ -126,7 +126,7 @@ BreakType Program::execute(Processor& Proc, U& dynamic_memory, cout << "complexity at " << time << ": " << Proc.complexity << endl; #endif } - while (Proc.complexity < (1 << 19)); + while (Proc.complexity < (size_t) OnlineOptions::singleton.batch_size); Proc.time = time; #ifdef DEBUG_ROUNDS cout << "breaking at time " << Proc.time << endl; diff --git a/GC/RuntimeBranching.h b/GC/RuntimeBranching.h index 6ba0faf06..a7cc8cdee 100644 --- a/GC/RuntimeBranching.h +++ b/GC/RuntimeBranching.h @@ -20,6 +20,9 @@ class RuntimeBranching void untaint() { +#ifdef DEBUG_YAO + cout << "untaint from " << tainted << endl; +#endif bool was_tainted = tainted; tainted = false; if (was_tainted) diff --git a/GC/Secret.h b/GC/Secret.h index 9fee3f2ff..b4f9ac8e9 100644 --- a/GC/Secret.h +++ b/GC/Secret.h @@ -133,6 +133,8 @@ class Secret static void andm(Processor& processor, const BaseInstruction& instruction) { T::andm(processor, instruction); } + static void run_tapes(const vector& args) { T::run_tapes(args); } + Secret(); Secret(const Integer& x) { *this = x; } diff --git a/GC/SemiSecret.h b/GC/SemiSecret.h index dc9e0a341..4110b9a49 100644 --- a/GC/SemiSecret.h +++ b/GC/SemiSecret.h @@ -34,6 +34,8 @@ class SemiSecretBase : public V, public ShareSecret typedef T part_type; typedef T small_type; + static const bool is_real = true; + static const int default_length = sizeof(BitVec) * 8; static string type_string() { return "binary secret"; } diff --git a/GC/ShareSecret.h b/GC/ShareSecret.h index d8c0c18c5..bdf6c9032 100644 --- a/GC/ShareSecret.h +++ b/GC/ShareSecret.h @@ -84,6 +84,9 @@ class ShareSecret static BitVec get_mask(int n) { return n >= 64 ? -1 : ((1L << n) - 1); } + static void run_tapes(const vector& args) + { Thread::s().master.machine.run_tapes(args); } + void check_length(int n, const Integer& x); void invert(int n, const U& x); @@ -160,7 +163,7 @@ class RepSecretBase : public FixedVec, public ShareSecret void bitdec(Memory& S, const vector& regs) const; void xor_(int n, const This& x, const This& y) - { *this = x ^ y; (void)n; } + { *this = (x ^ y).mask(n); } This operator&(const Clear& other) { return super::operator&(BitVec(other)); } diff --git a/GC/ThreadMaster.hpp b/GC/ThreadMaster.hpp index 03eea7813..ff0763833 100644 --- a/GC/ThreadMaster.hpp +++ b/GC/ThreadMaster.hpp @@ -97,7 +97,8 @@ void ThreadMaster::run() delete thread; } - exe_stats.print(); + if (not exe_stats.empty()) + exe_stats.print(); stats.print(); machine.print_timers(); diff --git a/GC/instructions.h b/GC/instructions.h index 62a71603f..272011947 100644 --- a/GC/instructions.h +++ b/GC/instructions.h @@ -138,7 +138,7 @@ X(PRINTINT, PROC.out << I0) \ X(STARTGRIND, CALLGRIND_START_INSTRUMENTATION) \ X(STOPGRIND, CALLGRIND_STOP_INSTRUMENTATION) \ - X(RUN_TAPE, MACH->run_tapes(EXTRA)) \ + X(RUN_TAPE, T::run_tapes(EXTRA)) \ X(JOIN_TAPE, MACH->join_tape(R0)) \ X(USE, ) \ X(USE_INP, ) \ diff --git a/License.txt b/License.txt index ab7ae3bb9..9c8f81b1c 100644 --- a/License.txt +++ b/License.txt @@ -1,4 +1,4 @@ -The Software is copyright (c) 2022, Commonwealth Scientific and Industrial Research Organisation (CSIRO) ABN 41 687 119 230. +The Software is copyright (c) 2023, Commonwealth Scientific and Industrial Research Organisation (CSIRO) ABN 41 687 119 230. CSIRO grants you a licence to the Software on the terms of the BSD 3-Clause Licence. diff --git a/Machines/TripleMachine.cpp b/Machines/TripleMachine.cpp index a6b58db53..45c62e5fa 100644 --- a/Machines/TripleMachine.cpp +++ b/Machines/TripleMachine.cpp @@ -212,6 +212,10 @@ void TripleMachine::run() generators[i] = new_generator>(setup, i, mac_keyz); else if (z2k == 66 and z2s == 48) generators[i] = new_generator>(setup, i, mac_keyz); +#ifdef RING_SIZE + else if (z2k == RING_SIZE and z2s == SPDZ2K_DEFAULT_SECURITY) + generators[i] = new_generator>(setup, i, mac_keyz); +#endif else throw runtime_error("not compiled for k=" + to_string(z2k) + " and s=" + to_string(z2s)); } diff --git a/Makefile b/Makefile index 467e6d8f1..c2cf93113 100644 --- a/Makefile +++ b/Makefile @@ -52,11 +52,11 @@ endif endif # used for dependency generation -OBJS = $(BMR) $(FHEOBJS) $(TINYOTOFFLINE) $(YAO) $(COMPLETE) $(patsubst %.cpp,%.o,$(wildcard Machines/*.cpp Utils/*.cpp)) +OBJS = $(patsubst %.cpp,%.o,$(wildcard */*.cpp)) $(STATIC_OTE) DEPS := $(wildcard */*.d */*/*.d) # never delete -.SECONDARY: $(OBJS) $(patsubst %.cpp,%.o,$(wildcard */*.cpp)) +.SECONDARY: $(OBJS) all: arithmetic binary gen_input online offline externalIO bmr ecdsa @@ -75,6 +75,10 @@ arithmetic: semi-he gear -include $(DEPS) include $(wildcard *.d static/*.d) +$(OBJS): CONFIG CONFIG.mine +CONFIG.mine: + touch CONFIG.mine + %.o: %.cpp $(CXX) -o $@ $< $(CFLAGS) -MMD -MP -c @@ -110,17 +114,17 @@ spdz2k: spdz2k-party.x ot-offline.x Check-Offline-Z2k.x galois-degree.x Fake-Off mascot: mascot-party.x spdz2k mama-party.x ifeq ($(OS), Darwin) -tldr: mac-setup +setup: mac-setup else -tldr: mpir linux-machine-setup +setup: boost mpir linux-machine-setup endif -tldr: libote +tldr: setup $(MAKE) mascot-party.x mkdir Player-Data 2> /dev/null; true ifeq ($(ARM), 1) -Tools/intrinsics.h: deps/simde/simde +$(patsubst %.cpp,%.o,$(wildcard */*.cpp)): deps/simde/simde endif shamir: shamir-party.x malicious-shamir-party.x atlas-party.x galois-degree.x @@ -317,7 +321,17 @@ boost: deps/libOTe/libOTe cd deps/libOTe; \ python3 build.py --setup --boost --install=$(CURDIR)/local -OTE_OPTS = -DENABLE_SOFTSPOKEN_OT=ON -DCMAKE_CXX_COMPILER=$(CXX) -DCMAKE_INSTALL_LIBDIR=lib +OTE_OPTS += -DENABLE_SOFTSPOKEN_OT=ON -DCMAKE_CXX_COMPILER=$(CXX) -DCMAKE_INSTALL_LIBDIR=lib + +ifeq ($(ARM), 1) +OTE_OPTS += -DENABLE_AVX=OFF -DENABLE_SSE=OFF +else +ifeq ($(AVX_OT), 0) +OTE_OPTS += -DENABLE_AVX=OFF +else +OTE_OPTS += -DENABLE_AVX=ON -DENABLE_SSE=ON +endif +endif ifeq ($(USE_SHARED_OTE), 1) OTE = $(SHARED_OTE) @@ -331,17 +345,15 @@ libote: local/lib/libcryptoTools.a: $(STATIC_OTE) local/lib/libcryptoTools.so: $(SHARED_OTE) + +ifeq ($(USE_KOS), 0) OT/OTExtensionWithMatrix.o: $(OTE) +endif -ifeq ($(ARM), 1) local/lib/liblibOTe.a: deps/libOTe/libOTe cd deps/libOTe; \ - PATH="$(CURDIR)/local/bin:$(PATH)" python3 build.py --install=$(CURDIR)/local -- -DBUILD_SHARED_LIBS=0 -DENABLE_AVX=OFF -DENABLE_SSE=OFF $(OTE_OPTS) -else -local/lib/liblibOTe.a: deps/libOTe/libOTe - cd deps/libOTe; \ - PATH="$(CURDIR)/local/bin:$(PATH)" python3 build.py --install=$(CURDIR)/local -- -DBUILD_SHARED_LIBS=0 $(OTE_OPTS) -endif + PATH="$(CURDIR)/local/bin:$(PATH)" python3 build.py --install=$(CURDIR)/local -- -DBUILD_SHARED_LIBS=0 $(OTE_OPTS) && \ + touch ../../local/lib/liblibOTe.a $(SHARED_OTE): deps/libOTe/libOTe cd deps/libOTe; \ @@ -373,4 +385,4 @@ deps/simde/simde: git submodule update --init deps/simde || git clone https://github.com/simd-everywhere/simde deps/simde clean: - -rm -f */*.o *.o */*.d *.d *.x core.* *.a gmon.out */*/*.o static/*.x *.so + -rm -f */*.o *.o */*.d *.d *.x core.* *.a gmon.out */*/*.o static/*.x *.so local/lib/liblibOTe.* diff --git a/Math/bigint.h b/Math/bigint.h index f99e3dfd7..2a929399d 100644 --- a/Math/bigint.h +++ b/Math/bigint.h @@ -37,6 +37,13 @@ namespace GC class Clear; } +/** + * Type for arbitrarily large integers. + * This is a sub-class of ``mpz_class`` from MPIR. As such, it implements + * all integers operations and input/output via C++ streams. In addition, + * the ``get_ui()`` member function allows retrieving the least significant + * 64 bits. + */ class bigint : public mpz_class { public: @@ -51,15 +58,20 @@ class bigint : public mpz_class template static void output_float(U& o, const mpf_class& x, T nan); + /// Initialize to zero. bigint() : mpz_class() {} template bigint(const T& x) : mpz_class(x) {} + /// Convert to canonical representation as non-negative number. template bigint(const gfp_& x); + /// Convert to canonical representation as non-negative number. template bigint(const gfpvar_& x); + /// Convert to canonical representation as non-negative number. template bigint(const Z2& x); + /// Convert to canonical representation as non-negative number. template bigint(const SignedZ2& x); template diff --git a/Math/gf2n.cpp b/Math/gf2n.cpp index d39a8593e..ba638d974 100644 --- a/Math/gf2n.cpp +++ b/Math/gf2n.cpp @@ -454,20 +454,16 @@ void gf2n_::randomize(PRNG& G, int n) a&=mask; } -template<> -void gf2n_::output(ostream& s,bool human) const -{ - if (human) - s << hex << showbase << word(a) << dec; - else - s.write((char*) &a, sizeof(octet)); -} - template void gf2n_::output(ostream& s,bool human) const { if (human) - { s << hex << showbase << a << dec; } + { + if (n > 64) + s << hex << a << dec; + else + s << hex << to_word(a) << dec; + } else { s.write((char*) &a, (sizeof(U))); } } @@ -484,7 +480,16 @@ void gf2n_::input(istream& s,bool human) } if (human) - { s >> hex >> a >> dec; } + { + if (n > 64) + s >> hex >> a >> dec; + else + { + word tmp; + s >> hex >> tmp >> dec; + *this = U(tmp); + } + } else { s.read((char*) &a, sizeof(U)); } diff --git a/Math/gf2n.h b/Math/gf2n.h index 235c08f5b..c44f9c0e8 100644 --- a/Math/gf2n.h +++ b/Math/gf2n.h @@ -191,9 +191,7 @@ class gf2n_ : public ValueInterface } friend istream& operator>>(istream& s,gf2n_& x) { - word tmp; - s >> hex >> tmp >> dec; - x = tmp; + x.input(s, true); return s; } diff --git a/Math/gf2nlong.cpp b/Math/gf2nlong.cpp index c2555681b..7d5f24794 100644 --- a/Math/gf2nlong.cpp +++ b/Math/gf2nlong.cpp @@ -27,26 +27,24 @@ ostream& operator<<(ostream& s, const int128& a) { word* tmp = (word*)&a.a; s << hex; - - if (tmp[1] != 0) - { - s << noshowbase; - s.width(16); - s.fill('0'); - s << tmp[1]; - s.width(16); - } - else - s << showbase; - + s << noshowbase; + s.width(16); + s.fill('0'); + s << tmp[1]; + s.width(16); s << tmp[0] << dec; return s; } istream& operator>>(istream& s, int128& a) { - gf2n_long tmp; - s >> tmp; - a = tmp.get(); + bigint tmp; + s >> hex >> tmp; + a = 0; + auto size = tmp.get_mpz_t()->_mp_size; + assert(size >= 0); + assert(size <= 2); + mpn_copyi((mp_limb_t*) &a.a, tmp.get_mpz_t()->_mp_d, size); + s >> dec; return s; } diff --git a/Math/gf2nlong.h b/Math/gf2nlong.h index 85a668a74..a15dbfc62 100644 --- a/Math/gf2nlong.h +++ b/Math/gf2nlong.h @@ -154,21 +154,6 @@ class gf2n_long : public gf2n_ gf2n_long(int g) : gf2n_long(int128(unsigned(g))) {} template gf2n_long(IntBase g) : super(g.get()) {} - - friend ostream& operator<<(ostream& s,const gf2n_long& x) - { s << hex << x.get() << dec; - return s; - } - friend istream& operator>>(istream& s,gf2n_long& x) - { bigint tmp; - s >> hex >> tmp >> dec; - x = 0; - auto size = tmp.get_mpz_t()->_mp_size; - assert(size >= 0); - assert(size <= 2); - mpn_copyi((mp_limb_t*)x.get_ptr(), tmp.get_mpz_t()->_mp_d, size); - return s; - } }; #if defined(__aarch64__) && defined(__clang__) diff --git a/Math/gfp.h b/Math/gfp.h index 9a50dc035..de00934a0 100644 --- a/Math/gfp.h +++ b/Math/gfp.h @@ -49,6 +49,7 @@ template void generate_prime_setup(string, int, int); * ``L`` is the number of 64-bit limbs, that is, * the prime has to have bit length in `[64*L-63, 64*L]`. * See ``gfpvar_`` for a more flexible alternative. + * Convert to ``bigint`` to access the canonical integer representation. */ template class gfp_ : public ValueInterface diff --git a/Networking/Exchanger.h b/Networking/Exchanger.h index 29d88797d..33c1daa0d 100644 --- a/Networking/Exchanger.h +++ b/Networking/Exchanger.h @@ -67,6 +67,9 @@ class Exchanger #endif n_send++; size_t to_send = len - sent; +#ifdef __APPLE__ + to_send = min(to_send, 1ul << 16); +#endif size_t newly_sent = send_non_blocking(send_socket, data + sent, to_send); #ifdef TIME_ROUNDS diff --git a/Networking/ServerSocket.cpp b/Networking/ServerSocket.cpp index d69fd7b8d..bf56d2565 100644 --- a/Networking/ServerSocket.cpp +++ b/Networking/ServerSocket.cpp @@ -139,9 +139,25 @@ void ServerSocket::accept_clients() #ifdef DEBUG_NETWORKING fprintf(stderr, "Accepting...\n"); #endif - int consocket = accept(main_socket, (struct sockaddr *)&dest, (socklen_t*) &socksize); + int consocket; + for (int i = 0; i < 25; i++) + { + consocket = accept(main_socket, (struct sockaddr*) &dest, + (socklen_t*) &socksize); + if (consocket < 0) + usleep(1 << i); + else + break; + } if (consocket<0) { error("set_up_socket:accept"); } +#ifdef __APPLE__ + int flags = fcntl(consocket, F_GETFL, 0); + int fl = fcntl(consocket, F_SETFL, O_NONBLOCK | flags); + if (fl < 0) + error("set non-blocking on server"); +#endif + octetStream client_id; char buf[1]; if (recv(consocket, buf, 1, MSG_PEEK | MSG_DONTWAIT) > 0) @@ -160,13 +176,6 @@ void ServerSocket::accept_clients() auto job = (new ServerJob(*this, consocket, dest)); pthread_create(&job->thread, 0, ServerJob::run, job); } - -#ifdef __APPLE__ - int flags = fcntl(consocket, F_GETFL, 0); - int fl = fcntl(consocket, F_SETFL, O_NONBLOCK | flags); - if (fl < 0) - error("set non-blocking"); -#endif } } diff --git a/Networking/sockets.cpp b/Networking/sockets.cpp index fd064cd2e..8034809e6 100644 --- a/Networking/sockets.cpp +++ b/Networking/sockets.cpp @@ -125,7 +125,7 @@ void set_up_client_socket(int& mysocket,const char* hostname,int Portnum) int flags = fcntl(mysocket, F_GETFL, 0); fl = fcntl(mysocket, F_SETFL, O_NONBLOCK | flags); if (fl < 0) - error("set non-blocking"); + error("set non-blocking on client"); #endif } diff --git a/Networking/sockets.h b/Networking/sockets.h index 37485f48a..7a4c2bd0a 100644 --- a/Networking/sockets.h +++ b/Networking/sockets.h @@ -52,9 +52,18 @@ inline size_t send_non_blocking(int socket, octet* msg, size_t len) inline void send(int socket,octet *msg,size_t len) { size_t i = 0; + long wait = 1; while (i < len) { - i += send_non_blocking(socket, msg + i, len - i); + size_t j = send_non_blocking(socket, msg + i, len - i); + i += j; + if (i > 0) + wait = 1; + else + { + usleep(wait); + wait *= 2; + } } } @@ -107,7 +116,7 @@ inline void receive(T& socket, size_t& a, size_t len) a = decode_length(blen, len); } -inline size_t check_non_blocking_result(int res) +inline ssize_t check_non_blocking_result(ssize_t res) { if (res < 0) { @@ -118,15 +127,15 @@ inline size_t check_non_blocking_result(int res) return res; } -inline size_t receive_non_blocking(int socket,octet *msg,int len) +inline ssize_t receive_non_blocking(int socket, octet *msg, size_t len) { - int res = recv(socket, msg, len, MSG_DONTWAIT); + ssize_t res = recv(socket, msg, len, MSG_DONTWAIT); return check_non_blocking_result(res); } -inline size_t receive_all_or_nothing(int socket,octet *msg,int len) +inline ssize_t receive_all_or_nothing(int socket, octet *msg, ssize_t len) { - int res = recv(socket, msg, len, MSG_DONTWAIT | MSG_PEEK); + ssize_t res = recv(socket, msg, len, MSG_DONTWAIT | MSG_PEEK); check_non_blocking_result(res); if (res == len) { diff --git a/Networking/ssl_sockets.h b/Networking/ssl_sockets.h index 816139953..a9ce63130 100644 --- a/Networking/ssl_sockets.h +++ b/Networking/ssl_sockets.h @@ -109,7 +109,7 @@ inline void receive(ssl_socket* socket, octet* data, size_t length) received += socket->read_some(boost::asio::buffer(data + received, length - received)); } -inline size_t receive_non_blocking(ssl_socket* socket, octet* data, int length) +inline size_t receive_non_blocking(ssl_socket* socket, octet* data, size_t length) { return socket->read_some(boost::asio::buffer(data, length)); } diff --git a/OT/BitMatrix.h b/OT/BitMatrix.h index a797b9798..b996d83b6 100644 --- a/OT/BitMatrix.h +++ b/OT/BitMatrix.h @@ -99,9 +99,6 @@ class aligned_allocator : public std::allocator<_Tp> _Tp* allocate(size_t __n, const void* = 0) { - if (__n > this->max_size()) - std::__throw_bad_alloc(); - _Tp* res = 0; int err = posix_memalign((void**)&res, ALIGN, __n * sizeof(_Tp)); if (err != 0 or res == 0) diff --git a/OT/OTVole.hpp b/OT/OTVole.hpp index 1dbcdbe00..13f58f7fd 100644 --- a/OT/OTVole.hpp +++ b/OT/OTVole.hpp @@ -205,7 +205,7 @@ void OTVoleBase::consistency_check(vector& os) { #endif int total_bytes = t0[0].size() * T::size(); int num_blocks = (total_bytes) / 16 + ((total_bytes % 16) != 0); - __m128i coefficients[num_blocks]; + __m128i* coefficients = new __m128i[num_blocks]; this->set_coeffs(coefficients, coef_prng_sender, num_blocks); for (int alpha = 0; alpha < S; ++alpha) @@ -225,6 +225,7 @@ void OTVoleBase::consistency_check(vector& os) { this->hash_row(os[0], t11, coefficients); } } + delete[] coefficients; #ifdef OTVOLE_TIMER gettimeofday(&totalendv, NULL); double elapsed = timeval_diff(&totalstartv, &totalendv); @@ -240,7 +241,7 @@ void OTVoleBase::consistency_check(vector& os) { #endif int total_bytes = t[0].size() * T::size(); int num_blocks = (total_bytes) / 16 + ((total_bytes % 16) != 0); - __m128i coefficients[num_blocks]; + __m128i* coefficients = new __m128i[num_blocks]; this->set_coeffs(coefficients, coef_prng_receiver, num_blocks); octet h00[VOLE_HASH_SIZE] = {0}; @@ -288,6 +289,7 @@ void OTVoleBase::consistency_check(vector& os) { } } } + delete[] coefficients; #ifdef OTVOLE_TIMER gettimeofday(&totalendv, NULL); double elapsed = timeval_diff(&totalstartv, &totalendv); diff --git a/Processor/Data_Files.hpp b/Processor/Data_Files.hpp index 46c84903c..6c189cac0 100644 --- a/Processor/Data_Files.hpp +++ b/Processor/Data_Files.hpp @@ -325,7 +325,10 @@ void Sub_Data_Files::buffer_edabits_with_queues(bool strict, int n_bits, } auto& buffer = *edabit_buffers[n_bits]; if (buffer.peek() == EOF) - buffer.seekg(file_signature().get_length()); + { + buffer.seekg(0); + check_file_signature(buffer, ""); + } edabitvec eb; eb.input(n_bits, buffer); this->edabits[{strict, n_bits}].push_back(eb); diff --git a/Processor/ExternalClients.cpp b/Processor/ExternalClients.cpp index 48bb8bd17..2c8036cda 100644 --- a/Processor/ExternalClients.cpp +++ b/Processor/ExternalClients.cpp @@ -29,6 +29,7 @@ ExternalClients::~ExternalClients() void ExternalClients::start_listening(int portnum_base) { + ScopeLock _(lock); client_connection_servers[portnum_base] = new AnonymousServerSocket(portnum_base + get_party_num()); client_connection_servers[portnum_base]->init(); cerr << "Start listening on thread " << this_thread::get_id() << endl; @@ -38,6 +39,7 @@ void ExternalClients::start_listening(int portnum_base) int ExternalClients::get_client_connection(int portnum_base) { + ScopeLock _(lock); map::iterator it = client_connection_servers.find(portnum_base); if (it == client_connection_servers.end()) { @@ -61,6 +63,7 @@ int ExternalClients::get_client_connection(int portnum_base) void ExternalClients::close_connection(int client_id) { + ScopeLock _(lock); auto it = external_client_sockets.find(client_id); if (it == external_client_sockets.end()) throw runtime_error("client id not active: " + to_string(client_id)); @@ -77,6 +80,7 @@ int ExternalClients::get_party_num() client_socket* ExternalClients::get_socket(int id) { + ScopeLock _(lock); if (external_client_sockets.find(id) == external_client_sockets.end()) throw runtime_error("external connection not found for id " + to_string(id)); return external_client_sockets[id]; diff --git a/Processor/ExternalClients.h b/Processor/ExternalClients.h index 5ea1b3fdc..bada59b40 100644 --- a/Processor/ExternalClients.h +++ b/Processor/ExternalClients.h @@ -4,6 +4,7 @@ #include "Networking/sockets.h" #include "Networking/ssl_sockets.h" #include "Tools/Exceptions.h" +#include "Tools/Lock.h" #include "ExternalIO/Client.h" #include #include @@ -32,6 +33,8 @@ class ExternalClients ssl_service io_service; client_ctx* ctx; + Lock lock; + public: ExternalClients(int party_num); diff --git a/Processor/Instruction.h b/Processor/Instruction.h index 011dcb581..a70e095cb 100644 --- a/Processor/Instruction.h +++ b/Processor/Instruction.h @@ -209,6 +209,7 @@ enum CONDPRINTPLAIN = 0xE1, INTOUTPUT = 0xE6, FLOATOUTPUT = 0xE7, + FIXINPUT = 0xE8, // GF(2^n) versions diff --git a/Processor/Instruction.hpp b/Processor/Instruction.hpp index da4dd01ea..969ae06be 100644 --- a/Processor/Instruction.hpp +++ b/Processor/Instruction.hpp @@ -200,9 +200,6 @@ void BaseInstruction::parse_operands(istream& s, int pos, int file_pos) case GSHLCI: case GSHRCI: case GSHRSI: - case USE: - case USE_INP: - case USE_EDABIT: case DIGESTC: case INPUTMASK: case GINPUTMASK: @@ -211,6 +208,12 @@ void BaseInstruction::parse_operands(istream& s, int pos, int file_pos) get_ints(r, s, 2); n = get_int(s); break; + case USE: + case USE_INP: + case USE_EDABIT: + get_ints(r, s, 2); + n = get_long(s); + break; case STARTPRIVATEOUTPUT: case GSTARTPRIVATEOUTPUT: case STOPPRIVATEOUTPUT: @@ -218,7 +221,7 @@ void BaseInstruction::parse_operands(istream& s, int pos, int file_pos) throw runtime_error("two-stage private output not supported any more"); case USE_MATMUL: get_ints(r, s, 3); - n = get_int(s); + n = get_long(s); break; // instructions with 1 register + 1 integer operand case LDI: @@ -407,7 +410,7 @@ void BaseInstruction::parse_operands(istream& s, int pos, int file_pos) case USE_PREP: case GUSE_PREP: s.read((char*)r, sizeof(r)); - n = get_int(s); + n = get_long(s); break; case REQBL: n = get_int(s); @@ -425,6 +428,7 @@ void BaseInstruction::parse_operands(istream& s, int pos, int file_pos) case XORM: case ANDM: case XORCB: + case FIXINPUT: n = get_int(s); get_ints(r, s, 3); break; @@ -507,7 +511,7 @@ bool Instruction::get_offline_data_usage(DataPositions& usage) if (r[1] >= N_DTYPE) throw invalid_program(); usage.files[r[0]][r[1]] = n; - return int(n) >= 0; + return long(n) >= 0; case USE_INP: if (r[0] >= N_DATA_FIELD_TYPE) throw invalid_program(); @@ -517,19 +521,19 @@ bool Instruction::get_offline_data_usage(DataPositions& usage) throw Processor_Error("Player number too high"); usage.inputs[r[1]][r[0]] = n; } - return int(n) >= 0; + return long(n) >= 0; case USE_EDABIT: usage.edabits[{r[0], r[1]}] = n; - return int(n) >= 0; + return long(n) >= 0; case USE_MATMUL: usage.matmuls[{{r[0], r[1], r[2]}}] = n; - return int(n) >= 0; + return long(n) >= 0; case USE_PREP: usage.extended[DATA_INT][r] = n; - return int(n) >= 0; + return long(n) >= 0; case GUSE_PREP: usage.extended[gf2n::field_type()][r] = n; - return int(n) >= 0; + return long(n) >= 0; default: return true; } @@ -623,6 +627,7 @@ int BaseInstruction::get_reg_type() const case FLOATOUTPUT: case READSOCKETC: case PRIVATEOUTPUT: + case FIXINPUT: return CINT; default: if (is_gf2n_instruction()) @@ -812,7 +817,12 @@ unsigned BaseInstruction::get_max_reg(int reg_type) const for (size_t i = offset; i < start.size(); i += skip) { if (size_offset != 0) - size = DIV_CEIL(start[i + size_offset], 64); + { + if (opcode & 0x200) + size = DIV_CEIL(start[i + size_offset], 64); + else + size = start[i + size_offset]; + } m = max(m, (unsigned)start[i] + size); } return m; @@ -1206,6 +1216,7 @@ inline void Instruction::execute(Processor& Proc) const break; case ACCEPTCLIENTCONNECTION: { + TimeScope _(Proc.client_timer); // get client connection at port number n + my_num()) int client_handle = Proc.external_clients.get_client_connection( Proc.read_Ci(r[1])); @@ -1261,11 +1272,11 @@ inline void Instruction::execute(Processor& Proc) const Proc.public_input, Proc.public_input_filename, 0).items[0]; break; case RAWOUTPUT: - Proc.read_Cp(r[0]).output(Proc.public_output, false); + Proc.read_Cp(r[0]).output(Proc.get_public_output(), false); break; case INTOUTPUT: if (n == -1 or n == Proc.P.my_num()) - Integer(Proc.read_Ci(r[0])).output(Proc.binary_output, false); + Integer(Proc.read_Ci(r[0])).output(Proc.get_binary_output(), false); break; case FLOATOUTPUT: if (n == -1 or n == Proc.P.my_num()) @@ -1273,9 +1284,13 @@ inline void Instruction::execute(Processor& Proc) const double tmp = bigint::get_float(Proc.read_Cp(start[0] + i), Proc.read_Cp(start[1] + i), Proc.read_Cp(start[2] + i), Proc.read_Cp(start[3] + i)).get_d(); - Proc.binary_output.write((char*) &tmp, sizeof(double)); + Proc.get_binary_output().write((char*) &tmp, sizeof(double)); + Proc.get_binary_output().flush(); } break; + case FIXINPUT: + Proc.fixinput(*this); + return; case PREP: Procp.DataF.get(Proc.Procp.get_S(), r, start, size); return; diff --git a/Processor/Machine.h b/Processor/Machine.h index d3c1346b2..7317d3199 100644 --- a/Processor/Machine.h +++ b/Processor/Machine.h @@ -13,6 +13,7 @@ #include "Processor/Online-Thread.h" #include "Processor/ThreadJob.h" +#include "Processor/ExternalClients.h" #include "GC/Machine.h" @@ -73,6 +74,8 @@ class Machine : public BaseMachine ExecutionStats stats; + ExternalClients external_clients; + static void init_binary_domains(int security_parameter, int lg2); Machine(Names& playerNames, bool use_encryption = true, @@ -111,6 +114,8 @@ class Machine : public BaseMachine typename sint::mac_key_type get_sint_mac_key() { return alphapi; } Player& get_player() { return *P; } + + void check_program(); }; #endif /* MACHINE_H_ */ diff --git a/Processor/Machine.hpp b/Processor/Machine.hpp index 4ff526084..e9e3eb209 100644 --- a/Processor/Machine.hpp +++ b/Processor/Machine.hpp @@ -57,10 +57,18 @@ Machine::Machine(Names& playerNames, bool use_encryption, : my_number(playerNames.my_num()), N(playerNames), direct(opts.direct), opening_sum(opts.opening_sum), receive_threads(opts.receive_threads), max_broadcast(opts.max_broadcast), - use_encryption(use_encryption), live_prep(opts.live_prep), opts(opts) + use_encryption(use_encryption), live_prep(opts.live_prep), opts(opts), + external_clients(my_number) { OnlineOptions::singleton = opts; + if (N.num_players() == 1 and sint::is_real) + { + cerr << "Need more than one player to run a protocol." << endl; + cerr << "Use 'emulate.x' for just running the virtual machine" << endl; + exit(1); + } + if (opening_sum < 2) this->opening_sum = N.num_players(); if (max_broadcast < 2) @@ -129,6 +137,7 @@ void Machine::prepare(const string& progname_str) int old_n_threads = nthreads; progs.clear(); load_schedule(progname_str); + check_program(); // keep preprocessing nthreads = max(old_n_threads, nthreads); @@ -467,17 +476,21 @@ void Machine::run(const string& progname) print_timers(); - size_t rounds = 0; - for (auto& x : comm_stats) - rounds += x.second.rounds; - cerr << "Data sent = " << comm_stats.sent / 1e6 << " MB in ~" << rounds - << " rounds (party " << my_number; - if (threads.size() > 1) - cerr << "; rounds counted double due to multi-threading"; - cerr << ")" << endl; - - auto& P = *this->P; - this->print_global_comm(P, comm_stats); + if (sint::is_real) + { + size_t rounds = 0; + for (auto& x : comm_stats) + rounds += x.second.rounds; + cerr << "Data sent = " << comm_stats.sent / 1e6 << " MB in ~" << rounds + << " rounds (party " << my_number; + if (threads.size() > 1) + cerr << "; rounds counted double due to multi-threading"; + cerr << "; use '-v' for more details"; + cerr << ")" << endl; + + auto& P = *this->P; + this->print_global_comm(P, comm_stats); + } #ifdef VERBOSE_OPTIONS if (opening_sum < N.num_players() && !direct) @@ -582,12 +595,35 @@ void Machine::suggest_optimizations() if (relevant_opts.find("split") != string::npos and sint::has_split) optimizations.append( "\tprogram.use_split(" + to_string(N.num_players()) + ")\n"); - if (relevant_opts.find("edabit") != string::npos and not sint::has_split) + if (relevant_opts.find("edabit") != string::npos and not sint::has_split and sint::is_real) optimizations.append("\tprogram.use_edabit(True)\n"); if (not optimizations.empty()) cerr << "This program might benefit from some protocol options." << endl << "Consider adding the following at the beginning of '" << progname << ".mpc':" << endl << optimizations; +#ifndef __clang__ + cerr << "This virtual machine was compiled with GCC. Recompile with " + "'CXX = clang++' in 'CONFIG.mine' for optimal performance." << endl; +#endif +} + +template +void Machine::check_program() +{ + Hash hasher; + for (auto& prog : progs) + hasher.update(prog.get_hash()); + assert(P); + Bundle bundle(*P); + hasher.final(bundle.mine); + try + { + bundle.compare(*P); + } + catch (mismatch_among_parties&) + { + throw runtime_error("program differs between parties"); + } } #endif diff --git a/Processor/OnlineOptions.cpp b/Processor/OnlineOptions.cpp index 34d7ce129..b4bf6594e 100644 --- a/Processor/OnlineOptions.cpp +++ b/Processor/OnlineOptions.cpp @@ -62,8 +62,9 @@ OnlineOptions::OnlineOptions(ez::ezOptionParser& opt, int argc, 0, // Required? 1, // Number of args expected. 0, // Delimiter if expecting multiple args. - "Prefix for input file path (default: Player-Data/Private-Input). " - "Input will be read from {prefix}-P{id}-{thread_id}.", // Help description. + "Prefix for input file path (default: Player-Data/Input). " + "Text input will be read from {prefix}-P{id}-{thread_id} and " + "binary input from {prefix}-Binary-P{id}-{thread_id}", // Help description. "-IF", // Flag token. "--input-file" // Flag token. ); @@ -95,7 +96,7 @@ OnlineOptions::OnlineOptions(ez::ezOptionParser& opt, int argc, 0, // Required? 0, // Number of args expected. 0, // Delimiter if expecting multiple args. - "Verbose output", // Help description. + "Verbose output, in particular more data on communication", // Help description. "-v", // Flag token. "--verbose" // Flag token. ); diff --git a/Processor/PrepBase.cpp b/Processor/PrepBase.cpp index 4ca77daa1..a2f79027e 100644 --- a/Processor/PrepBase.cpp +++ b/Processor/PrepBase.cpp @@ -48,9 +48,14 @@ void PrepBase::print_left(const char* name, size_t n, const string& type_string, << endl; if (n > used / 10) + { cerr << "Significant amount of unused " << name << " of " << type_string << ". For more accurate benchmarks, " - << "consider reducing the batch size with -b." << endl; + << "consider reducing the batch size with --batch-size." << endl; + cerr + << "Note that some protocols have larger minimum batch sizes." + << endl; + } } void PrepBase::print_left_edabits(size_t n, size_t n_batch, bool strict, @@ -67,6 +72,6 @@ void PrepBase::print_left_edabits(size_t n, size_t n_batch, bool strict, if (n > used / 10) cerr << "Significant amount of unused edaBits of size " << n_bits << ". For more accurate benchmarks, " - << "consider reducing the batch size with -b " - << "or increasing the bucket size with -B." << endl; + << "consider reducing the batch size with --batch-size " + << "or increasing the bucket size with --bucket-size." << endl; } diff --git a/Processor/Processor.h b/Processor/Processor.h index 37227c41b..b35eb47f6 100644 --- a/Processor/Processor.h +++ b/Processor/Processor.h @@ -118,6 +118,9 @@ class ArithmeticProcessor : public ProcessorBase protected: CheckVector Ci; + ofstream public_output; + ofstream binary_output; + public: int thread_num; @@ -126,11 +129,11 @@ class ArithmeticProcessor : public ProcessorBase string private_input_filename; string public_input_filename; + string binary_input_filename; ifstream private_input; ifstream public_input; - ofstream public_output; - ofstream binary_output; + ifstream binary_input; int sent, rounds; @@ -173,6 +176,15 @@ class ArithmeticProcessor : public ProcessorBase throw not_implemented(); } + virtual ofstream& get_public_output() + { + throw not_implemented(); + } + virtual ofstream& get_binary_output() + { + throw not_implemented(); + } + void shuffle(const Instruction& instruction); void bitdecint(const Instruction& instruction); }; @@ -203,9 +215,11 @@ class Processor : public ArithmeticProcessor unsigned int PC; TempVars temp; - ExternalClients external_clients; + ExternalClients& external_clients; Binary_File_IO binary_file_io; + Timer client_timer; + void reset(const Program& program,int arg); // Reset the state of the processor string get_filename(const char* basename, bool use_number); @@ -268,10 +282,15 @@ class Processor : public ArithmeticProcessor cint get_inverse2(unsigned m); + void fixinput(const Instruction& instruction); + // synchronize in asymmetric protocols long sync_Ci(size_t i) const; long sync(long x) const; + ofstream& get_public_output(); + ofstream& get_binary_output(); + private: template friend class SPDZ; diff --git a/Processor/Processor.hpp b/Processor/Processor.hpp index 78aba8c81..c7c6bf359 100644 --- a/Processor/Processor.hpp +++ b/Processor/Processor.hpp @@ -54,6 +54,27 @@ SubProcessor::~SubProcessor() #endif } +template +inline ofstream& Processor::get_public_output() +{ + if (not public_output.is_open()) + public_output.open(get_filename(PREP_DIR "Public-Output-", true).c_str(), + ios_base::out); + + return public_output; +} + +template +inline ofstream& Processor::get_binary_output() +{ + if (not binary_output.is_open()) + binary_output.open( + get_parameterized_filename(P.my_num(), thread_num, + PREP_DIR "Binary-Output"), ios_base::out); + + return binary_output; +} + template Processor::Processor(int thread_num,Player& P, typename sgf2n::MAC_Check& MC2,typename sint::MAC_Check& MCp, @@ -64,7 +85,7 @@ Processor::Processor(int thread_num,Player& P, share_thread(DataF.DataFb, P, machine.get_bit_mac_key()), Procb(machine.bit_memories), Proc2(*this,MC2,DataF.DataF2,P),Procp(*this,MCp,DataF.DataFp,P), - external_clients(P.my_num()), + external_clients(machine.external_clients), binary_file_io(Binary_File_IO()) { reset(program,0); @@ -73,13 +94,19 @@ Processor::Processor(int thread_num,Player& P, public_input.open(public_input_filename); private_input_filename = (get_filename(PREP_DIR "Private-Input-",true)); private_input.open(private_input_filename.c_str()); - public_output.open(get_filename(PREP_DIR "Public-Output-",true).c_str(), ios_base::out); - binary_output.open( - get_parameterized_filename(P.my_num(), thread_num, - PREP_DIR "Binary-Output"), ios_base::out); open_input_file(P.my_num(), thread_num, machine.opts.cmd_private_input_file); + string input_prefix = machine.opts.cmd_private_input_file; + if (input_prefix == OnlineOptions().cmd_private_input_file + or input_prefix == ".") + input_prefix = PREP_DIR "Input-Binary"; + else + input_prefix += "-Binary"; + binary_input_filename = get_parameterized_filename(P.my_num(), thread_num, + input_prefix); + binary_input.open(binary_input_filename); + secure_prng.ReSeed(); shared_prng.SeedGlobally(P, false); @@ -96,6 +123,8 @@ Processor::~Processor() if (sent) cerr << "Opened " << sent << " elements in " << rounds << " rounds" << endl; #endif + if (OnlineOptions::singleton.verbose and client_timer.elapsed()) + cerr << "Client communication time = " << client_timer.elapsed() << endl; } template @@ -286,6 +315,7 @@ void Processor::write_socket(const RegType reg_type, #endif try { + TimeScope _(client_timer); socket_stream.Send(external_clients.get_socket(socket_id)); } catch (bad_value& e) { @@ -302,7 +332,9 @@ void Processor::read_socket_ints(int client_id, { int m = registers.size(); socket_stream.reset_write_head(); + client_timer.start(); socket_stream.Receive(external_clients.get_socket(client_id)); + client_timer.stop(); for (int j = 0; j < size; j++) for (int i = 0; i < m; i++) { @@ -319,7 +351,9 @@ void Processor::read_socket_vector(int client_id, { int m = registers.size(); socket_stream.reset_write_head(); + client_timer.start(); socket_stream.Receive(external_clients.get_socket(client_id)); + client_timer.stop(); for (int j = 0; j < size; j++) for (int i = 0; i < m; i++) get_Cp_ref(registers[i] + j) = @@ -333,7 +367,9 @@ void Processor::read_socket_private(int client_id, { int m = registers.size(); socket_stream.reset_write_head(); + client_timer.start(); socket_stream.Receive(external_clients.get_socket(client_id)); + client_timer.stop(); for (int j = 0; j < size; j++) for (int i = 0; i < m; i++) @@ -773,6 +809,56 @@ typename sint::clear Processor::get_inverse2(unsigned m) return inverses2m[m]; } +template +void Processor::fixinput(const Instruction& instruction) +{ + int n = instruction.get_n(); + if (n == P.my_num() or n == -1) + { + typename sint::clear tmp; + bool use_double = false; + switch (instruction.get_r(2)) + { + case 0: + case 1: + break; + case 2: + use_double = true; + break; + default: + throw runtime_error("unknown format for fixed-point input"); + } + + for (int i = 0; i < instruction.get_size(); i++) + { + if (binary_input.peek() == EOF) + throw IO_Error("not enough inputs in " + binary_input_filename); + double buf; + if (instruction.get_r(2) == 0) + { + int64_t x; + binary_input.read((char*) &x, sizeof(x)); + tmp = x; + } + else + { + if (use_double) + binary_input.read((char*) &buf, sizeof(double)); + else + { + float x; + binary_input.read((char*) &x, sizeof(float)); + buf = x; + } + tmp = bigint::tmp = round(buf * exp2(instruction.get_r(1))); + } + if (binary_input.fail()) + throw IO_Error("failure reading from " + binary_input_filename); + write_Cp(instruction.get_r(0) + i, tmp); + } + } +} + template long Processor::sync_Ci(size_t i) const { diff --git a/Processor/Program.cpp b/Processor/Program.cpp index dac73400b..f9cb5c579 100644 --- a/Processor/Program.cpp +++ b/Processor/Program.cpp @@ -33,6 +33,18 @@ void Program::parse(string filename) if (pinp.fail()) throw file_error(filename); parse(pinp); + + // compute hash + pinp.clear(); + pinp.seekg(0); + Hash hasher; + while (pinp.peek(), !pinp.eof()) + { + char buf[1024]; + size_t n = pinp.readsome(buf, 1024); + hasher.update(buf, n); + } + hash = hasher.final().str(); } void Program::parse(istream& s) diff --git a/Processor/Program.h b/Processor/Program.h index 96a70e5eb..2c8470f8c 100644 --- a/Processor/Program.h +++ b/Processor/Program.h @@ -26,6 +26,8 @@ class Program // True if program contains variable-sized loop bool unknown_usage; + string hash; + void compute_constants(); public: @@ -53,6 +55,9 @@ class Program size_t direct_mem(RegType reg_type) const { return max_mem[reg_type]; } + const string& get_hash() const + { return hash; } + friend ostream& operator<<(ostream& s,const Program& P); // Execute this program, updateing the processor and memory diff --git a/Processor/instructions.h b/Processor/instructions.h index f22fde8e6..5912d8676 100644 --- a/Processor/instructions.h +++ b/Processor/instructions.h @@ -281,7 +281,7 @@ X(GCONVGF2N, auto dest = &Proc.get_Ci()[r[0]]; auto source = &C2[r[1]], \ *dest++ = source->get_word(); source++) \ X(GRAWOUTPUT, auto source = &C2[r[0]], \ - (*source++).output(Proc.public_output, false)) \ + (*source++).output(Proc.get_public_output(), false)) \ #define REMAINING_INSTRUCTIONS \ X(CONVMODP, throw not_implemented(),) \ diff --git a/Programs/Source/bankers_bonus.mpc b/Programs/Source/bankers_bonus.mpc index e3dfc9f92..674efcdad 100644 --- a/Programs/Source/bankers_bonus.mpc +++ b/Programs/Source/bankers_bonus.mpc @@ -20,6 +20,7 @@ from Compiler.util import if_else PORTNUM = 14000 MAX_NUM_CLIENTS = 8 n_rounds = 0 +n_threads = 2 if len(program.args) > 1: n_rounds = int(program.args[1]) @@ -110,7 +111,7 @@ def main(): # Clients secret input. client_values = t.Array(MAX_NUM_CLIENTS) - @for_range(number_clients) + @for_range_multithread(n_threads, 1, number_clients) def _(client_id): client_values[client_id] = client_input(t, client_id) diff --git a/Programs/Source/breast_logistic.mpc b/Programs/Source/breast_logistic.mpc new file mode 100644 index 000000000..28ee6be61 --- /dev/null +++ b/Programs/Source/breast_logistic.mpc @@ -0,0 +1,54 @@ +from sklearn.datasets import load_breast_cancer +from sklearn.model_selection import train_test_split + +X, y = load_breast_cancer(return_X_y=True) + +# normalize column-wise +X /= X.max(axis=0) +X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + +if 'horizontal' in program.args: + # split by sample + a = sfix.input_tensor_via(0, X_train[len(X_train) // 2:]) + b = sfix.input_tensor_via(1, X_train[:len(X_train) // 2]) + X_train = a.concat(b) + + a = sint.input_tensor_via(0, y_train[len(y_train) // 2:]) + b = sint.input_tensor_via(1, y_train[:len(y_train) // 2]) + y_train = a.concat(b) +elif 'vertical' in program.args: + print (X_train.shape, X_train.shape[1]) + a = sfix.input_tensor_via(0, X_train[:,:X_train.shape[1] // 2]) + b = sfix.input_tensor_via(1, X_train[:,X_train.shape[1] // 2:]) + X_train = a.concat_columns(b) + y_train = sint.input_tensor_via(0, y_train) +elif 'party0' in program.args: + a = sfix.input_tensor_via(0, X_train[:,:X_train.shape[1] // 2]) + b = sfix.input_tensor_via(1, shape=X_train[:,X_train.shape[1] // 2:].shape) + X_train = a.concat_columns(b) + y_train = sint.input_tensor_via(0, y_train) +elif 'party1' in program.args: + a = sfix.input_tensor_via(0, shape=X_train[:,:X_train.shape[1] // 2].shape) + b = sfix.input_tensor_via(1, X_train[:,X_train.shape[1] // 2:]) + X_train = a.concat_columns(b) + y_train = sint.input_tensor_via(0, shape=y_train.shape) +else: + X_train = sfix.input_tensor_via(0, X_train) + y_train = sint.input_tensor_via(0, y_train) + +if 'party1' in program.args: + X_test = sfix.input_tensor_via(0, shape=X_test.shape) + y_test = sint.input_tensor_via(0, shape=y_test.shape) +else: + X_test = sfix.input_tensor_via(0, X_test) + y_test = sint.input_tensor_via(0, y_test) + +from Compiler import ml + +log = ml.SGDLogistic(20, 2, program) + +log.fit(X_train, y_train) +print_ln('%s', (log.predict(X_test) - y_test.get_vector()).reveal()) + +log.fit_with_testing(X_train, y_train, X_test, y_test) +print_ln('%s', (log.predict_proba(X_test) - y_test.get_vector()).reveal()) diff --git a/Programs/Source/breast_tree.mpc b/Programs/Source/breast_tree.mpc new file mode 100644 index 000000000..547964528 --- /dev/null +++ b/Programs/Source/breast_tree.mpc @@ -0,0 +1,33 @@ +from sklearn.datasets import load_breast_cancer +from sklearn.model_selection import train_test_split + +X, y = load_breast_cancer(return_X_y=True) +X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + +X_train = sfix.input_tensor_via(0, X_train) +X_test = sfix.input_tensor_via(0, X_test) + +y_train = sint.input_tensor_via(0, y_train) +y_test = sint.input_tensor_via(0, y_test) + +# use "nearest" option for deterministic result +# otherwise the Gini coefficients vary slightly from run to run +# resulting in different trees + +sfix.set_precision_from_args(program) + +from Compiler.decision_tree import TreeClassifier + +tree = TreeClassifier(max_depth=5) + +# plain training +tree.fit(X_train, y_train) + +# output difference between truth and prediction +print_ln('%s', (tree.predict(X_test) - y_test.get_vector()).reveal()) + +# output tree +tree.output() + +# training with level-wise accuracy output +tree.fit_with_testing(X_train, y_train, X_test, y_test) diff --git a/Programs/Source/diabetes.mpc b/Programs/Source/diabetes.mpc new file mode 100644 index 000000000..4fdccf9c6 --- /dev/null +++ b/Programs/Source/diabetes.mpc @@ -0,0 +1,32 @@ +from sklearn import datasets, linear_model +from sklearn.model_selection import train_test_split + +X, y = datasets.load_diabetes(return_X_y=True) + +# normalize +y /= y.max() + +X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + +X_train = sfix.input_tensor_via(0, X_train) +y_train = sfix.input_tensor_via(0, y_train) + +X_test = sfix.input_tensor_via(0, X_test) +y_test = sfix.input_tensor_via(0, y_test) + +from Compiler import ml + +try: + batch_size = int(program.args[1]) +except: + batch_size = 1 + +linear = ml.SGDLinear(100, batch_size, program) + +linear.fit(X_train, y_train) +print_ln('model %s', linear.opt.layers[0].W[:].reveal()) +print_ln('diff %s', (linear.predict(X_test) - y_test).reveal()) + +linear.fit_with_testing(X_train, y_train, X_test, y_test) +print_ln('model %s', linear.opt.layers[0].W[:].reveal()) +print_ln('diff %s', (linear.predict(X_test) - y_test).reveal()) diff --git a/Programs/Source/easy_adult.mpc b/Programs/Source/easy_adult.mpc new file mode 100644 index 000000000..5f1ccc389 --- /dev/null +++ b/Programs/Source/easy_adult.mpc @@ -0,0 +1,38 @@ +import pandas +from sklearn.model_selection import train_test_split +from Compiler import decision_tree + +data = pandas.read_csv( + 'https://datahub.io/machine-learning/adult/r/adult.csv') +#'/tmp/adult.csv') + +data, attr_types = decision_tree.preprocess_pandas(data) + +# label is last column +X = data[:,:-1] +y = data[:,-1] + +X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + +X_train = sint.input_tensor_via(0, X_train) +X_test = sint.input_tensor_via(0, X_test) + +y_train = sint.input_tensor_via(0, y_train) +y_test = sint.input_tensor_via(0, y_test) + +# needed for correct Gini coefficient +sfix.round_nearest = True +sfix.set_precision(15, 31) + +# input values all fit 32 bits +program.set_bit_length(32) + +tree = decision_tree.TreeClassifier(max_depth=10) + +# training with level-wise accuracy output +tree.fit_with_testing(X_train, y_train, X_test, y_test, attr_types=attr_types) + +# plain training +tree.fit(X_train, y_train, attr_types=attr_types) + +print_ln('%s', (tree.predict(X_test) - y_test.get_vector()).reveal()) diff --git a/Programs/Source/keras_cifar_lenet.mpc b/Programs/Source/keras_cifar_lenet.mpc index 882d2e187..cfd137883 100644 --- a/Programs/Source/keras_cifar_lenet.mpc +++ b/Programs/Source/keras_cifar_lenet.mpc @@ -3,22 +3,39 @@ program.options_from_args() -training_samples = MultiArray([50000, 32, 32, 3], sfix) -training_labels = MultiArray([50000, 10], sint) - -test_samples = MultiArray([10000, 32, 32, 3], sfix) -test_labels = MultiArray([10000, 10], sint) - -training_labels.input_from(0) -training_samples.input_from(0) - -test_labels.input_from(0) -test_samples.input_from(0) - from Compiler import ml tf = ml ml.set_n_threads(36) +try: + ml.set_n_threads(int(program.args[1])) +except: + pass + +if 'torch' in program.args: + import torchvision, numpy + data = [] + for train in True, False: + ds = torchvision.datasets.CIFAR10(root='/tmp', train=train, download=True) + # normalize to [-1,1] before input + samples = sfix.input_tensor_via(0, ds.data / 255 * 2 - 1, binary=True) + labels = sint.input_tensor_via(0, ds.targets, binary=True, one_hot=True) + data += [(labels, samples)] + + (training_labels, training_samples), (test_labels, test_samples) = data +else: + training_samples = MultiArray([50000, 32, 32, 3], sfix) + training_labels = MultiArray([50000, 10], sint) + + test_samples = MultiArray([10000, 32, 32, 3], sfix) + test_labels = MultiArray([10000, 10], sint) + + training_labels.input_from(0) + training_samples.input_from(0) + + test_labels.input_from(0) + test_samples.input_from(0) + layers = [ tf.keras.layers.Conv2D(20, 5, 1, 'valid', activation='relu'), tf.keras.layers.MaxPooling2D(2), diff --git a/Programs/Source/keras_mnist_dense.mpc b/Programs/Source/keras_mnist_dense.mpc index 76b1e23f5..4b281882d 100644 --- a/Programs/Source/keras_mnist_dense.mpc +++ b/Programs/Source/keras_mnist_dense.mpc @@ -3,17 +3,29 @@ program.options_from_args() -training_samples = sfix.Tensor([60000, 28, 28]) -training_labels = sint.Tensor([60000, 10]) - -test_samples = sfix.Tensor([10000, 28, 28]) -test_labels = sint.Tensor([10000, 10]) - -training_labels.input_from(0) -training_samples.input_from(0) - -test_labels.input_from(0) -test_samples.input_from(0) +if 'torch' in program.args: + import torchvision + data = [] + for train in True, False: + ds = torchvision.datasets.MNIST(root='/tmp', train=train, download=True) + # normalize to [0,1] before input + samples = sfix.input_tensor_via(0, ds.data / 255., binary=True) + labels = sint.input_tensor_via(0, ds.targets, binary=True, one_hot=True) + data += [(labels, samples)] + + (training_labels, training_samples), (test_labels, test_samples) = data +else: + training_samples = sfix.Tensor([60000, 28, 28]) + training_labels = sint.Tensor([60000, 10]) + + test_samples = sfix.Tensor([10000, 28, 28]) + test_labels = sint.Tensor([10000, 10]) + + training_labels.input_from(0) + training_samples.input_from(0) + + test_labels.input_from(0) + test_samples.input_from(0) from Compiler import ml tf = ml diff --git a/Programs/Source/keras_mnist_lenet.mpc b/Programs/Source/keras_mnist_lenet.mpc index 90adf68e3..78acdd6a6 100644 --- a/Programs/Source/keras_mnist_lenet.mpc +++ b/Programs/Source/keras_mnist_lenet.mpc @@ -3,17 +3,29 @@ program.options_from_args() -training_samples = MultiArray([60000, 28, 28], sfix) -training_labels = MultiArray([60000, 10], sint) - -test_samples = MultiArray([10000, 28, 28], sfix) -test_labels = MultiArray([10000, 10], sint) - -training_labels.input_from(0) -training_samples.input_from(0) - -test_labels.input_from(0) -test_samples.input_from(0) +if 'torch' in program.args: + import torchvision + data = [] + for train in True, False: + ds = torchvision.datasets.MNIST(root='/tmp', train=train, download=True) + # normalize to [0,1] before input + samples = sfix.input_tensor_via(0, ds.data / 255., binary=True) + labels = sint.input_tensor_via(0, ds.targets, binary=True, one_hot=True) + data += [(labels, samples)] + + (training_labels, training_samples), (test_labels, test_samples) = data +else: + training_samples = sfix.Tensor([60000, 28, 28]) + training_labels = sint.Tensor([60000, 10]) + + test_samples = sfix.Tensor([10000, 28, 28]) + test_labels = sint.Tensor([10000, 10]) + + training_labels.input_from(0) + training_samples.input_from(0) + + test_labels.input_from(0) + test_samples.input_from(0) from Compiler import ml tf = ml diff --git a/Programs/Source/keras_mnist_lenet_predict.mpc b/Programs/Source/keras_mnist_lenet_predict.mpc index 8b55de560..100dd564a 100644 --- a/Programs/Source/keras_mnist_lenet_predict.mpc +++ b/Programs/Source/keras_mnist_lenet_predict.mpc @@ -38,7 +38,7 @@ for var in model.trainable_variables: var.assign_all(0) # start = var.read_from_file(start) -guesses = model.predict(test_samples, batch_size=1) +guesses = model.predict(test_samples) print_ln('guess %s', guesses.reveal_nested()[:3]) print_ln('truth %s', test_labels.reveal_nested()[:3]) diff --git a/Programs/Source/test_sbitfix.mpc b/Programs/Source/test_sbitfix.mpc index 513cfe010..6940799b2 100644 --- a/Programs/Source/test_sbitfix.mpc +++ b/Programs/Source/test_sbitfix.mpc @@ -5,8 +5,7 @@ sbitfix.set_precision(16, 32) def test(a, b, value_type=None): try: b = int(round((b * (1 << a.f)))) - if b < 0: - b += 2 ** sbitfix.k + b += 2 ** sbitfix.k if b < 0 else 0 a = a.v.reveal() except AttributeError: pass diff --git a/Programs/Source/torch_alex_test.mpc b/Programs/Source/torch_alex_test.mpc new file mode 100644 index 000000000..4bbccaca0 --- /dev/null +++ b/Programs/Source/torch_alex_test.mpc @@ -0,0 +1,92 @@ +# this trains an AlexNet-like network on CIFAR-10 in cleartext +# before testing it in secure computation + +program.options_from_args() + +from Compiler import ml + +try: + ml.set_n_threads(int(program.args[1])) +except: + pass + +import torchvision +import torch +import numpy + +get_data = lambda train, transform=None: torchvision.datasets.CIFAR10( + root='/tmp', train=train, download=True, transform=transform) + +ds = get_data(False) + +# get 100 random samples +indices = numpy.random.randint(len(ds.data), size=(100,)) +# normalize to [-1,1] before input +test_samples = sfix.input_tensor_via( + 0, numpy.take(ds.data / 255 * 2 - 1, indices, 0)) +test_labels = sint.input_tensor_via( + 0, numpy.take(ds.targets, indices, 0), one_hot=True) + +import torch.nn as nn + +net = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=2), + nn.ReLU(), + nn.MaxPool2d(kernel_size=2), + nn.Conv2d(64, 96, kernel_size=3, padding=2), + nn.ReLU(), + nn.MaxPool2d(kernel_size=2), + nn.Conv2d(96, 96, kernel_size=3, padding=1), + nn.ReLU(), + nn.Conv2d(96, 64, kernel_size=3, padding=1), + nn.ReLU(), + nn.Conv2d(64, 64, kernel_size=3, padding=1), + nn.ReLU(), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Flatten(), + nn.Linear(1024, 128), + nn.ReLU(), + nn.Linear(128, 256), + nn.ReLU(), + nn.Linear(256, 10), +) + +# train for a bit +transform = torchvision.transforms.Compose( + [torchvision.transforms.ToTensor(), lambda x: 2 * x - 1]) +ds = get_data(train=True, transform=transform) +optimizer = torch.optim.Adam(net.parameters(), amsgrad=True) +criterion = nn.CrossEntropyLoss() + +for i, data in enumerate(torch.utils.data.DataLoader(ds, batch_size=128)): + inputs, labels = data + optimizer.zero_grad() + outputs = net(inputs) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + +with torch.no_grad(): + ds = get_data(False, transform) + total = correct_classified = 0 + for data in torch.utils.data.DataLoader(ds, batch_size=128): + inputs, labels = data + outputs = net(inputs) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct_classified += (predicted == labels).sum().item() + test_acc = (100 * correct_classified / total) + print('Cleartext test accuracy of the network: %.2f %%' % test_acc) + +from Compiler import ml + +layers = ml.layers_from_torch(net, test_samples.shape, 128, input_via=0) + +optimizer = ml.SGD(layers) + +# output to be used in Scripts/torch_cifar_alex_import.py +optimizer.reveal_model_to_binary() + +n_correct, loss = optimizer.reveal_correctness(test_samples, test_labels, 128) +print_ln('Secure accuracy: %s (%s/%s)', cfix(n_correct) / len(test_samples), + n_correct, len(test_samples)) diff --git a/Programs/Source/torch_cifar_alex.mpc b/Programs/Source/torch_cifar_alex.mpc new file mode 100644 index 000000000..54f4b0d69 --- /dev/null +++ b/Programs/Source/torch_cifar_alex.mpc @@ -0,0 +1,70 @@ +# this trains LeNet on CIFAR-10 + +program.options_from_args() + +from Compiler import ml + +try: + ml.set_n_threads(int(program.args[2])) +except: + pass + +import torchvision, numpy +data = [] +for train in True, False: + ds = torchvision.datasets.CIFAR10(root='/tmp', train=train, download=True) + # normalize to [-1,1] before input + samples = sfix.input_tensor_via(0, ds.data / 255 * 2 - 1, binary=True) + labels = sint.input_tensor_via(0, ds.targets, binary=True, one_hot=True) + data += [(labels, samples)] + +(training_labels, training_samples), (test_labels, test_samples) = data + +import torch +import torch.nn as nn + +net = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=2), + nn.ReLU(), + nn.BatchNorm2d(64), + nn.MaxPool2d(kernel_size=2), + nn.Conv2d(64, 96, kernel_size=3, padding=2), + nn.ReLU(), + nn.BatchNorm2d(96), + nn.MaxPool2d(kernel_size=2), + nn.Conv2d(96, 96, kernel_size=3, padding=1), + nn.ReLU(), + nn.BatchNorm2d(96), + nn.Conv2d(96, 64, kernel_size=3, padding=1), + nn.ReLU(), + nn.BatchNorm2d(64), + nn.Conv2d(64, 64, kernel_size=3, padding=1), + nn.ReLU(), + nn.BatchNorm2d(64), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Flatten(), + nn.Linear(1024, 128), + nn.ReLU(), + nn.Linear(128, 256), + nn.ReLU(), + nn.Linear(256, 10), +) + +# test network +ds = torchvision.datasets.CIFAR10( + root='/tmp', transform=torchvision.transforms.ToTensor()) +inputs = next(iter(torch.utils.data.DataLoader(ds)))[0] +print(inputs.shape) +outputs = net(inputs) + +layers = ml.layers_from_torch(net, training_samples.shape, 128) + +optimizer = ml.SGD(layers) +optimizer.fit( + training_samples, + training_labels, + epochs=int(program.args[1]), + batch_size=128, + validation_data=(test_samples, test_labels), + program=program +) diff --git a/Programs/Source/torch_cifar_lenet.mpc b/Programs/Source/torch_cifar_lenet.mpc new file mode 100644 index 000000000..3d9ea7cda --- /dev/null +++ b/Programs/Source/torch_cifar_lenet.mpc @@ -0,0 +1,57 @@ +# this trains LeNet on CIFAR-10 + +program.options_from_args() + +from Compiler import ml + +try: + ml.set_n_threads(int(program.args[2])) +except: + pass + +import torchvision, numpy +data = [] +for train in True, False: + ds = torchvision.datasets.CIFAR10(root='/tmp', train=train, download=True) + # normalize to [-1,1] before input + samples = sfix.input_tensor_via(0, ds.data / 255 * 2 - 1, binary=True) + labels = sint.input_tensor_via(0, ds.targets, binary=True, one_hot=True) + data += [(labels, samples)] + +(training_labels, training_samples), (test_labels, test_samples) = data + +import torch +import torch.nn as nn + +net = nn.Sequential( + nn.Conv2d(3, 20, 5), + nn.ReLU(), + nn.MaxPool2d(2), + nn.Conv2d(20, 50, 5), + nn.ReLU(), + nn.MaxPool2d(2), + nn.Flatten(), + nn.ReLU(), + nn.Linear(1250, 500), + nn.ReLU(), + nn.Linear(500, 10) +) + +# test network +ds = torchvision.datasets.CIFAR10( + root='/tmp', transform=torchvision.transforms.ToTensor()) +inputs = next(iter(torch.utils.data.DataLoader(ds)))[0] +print(inputs.shape) +outputs = net(inputs) + +layers = ml.layers_from_torch(net, training_samples.shape, 128) + +optimizer = ml.SGD(layers) +optimizer.fit( + training_samples, + training_labels, + epochs=int(program.args[1]), + batch_size=128, + validation_data=(test_samples, test_labels), + program=program +) diff --git a/Programs/Source/torch_cifar_lenet_pretrain.mpc b/Programs/Source/torch_cifar_lenet_pretrain.mpc new file mode 100644 index 000000000..216a2d68f --- /dev/null +++ b/Programs/Source/torch_cifar_lenet_pretrain.mpc @@ -0,0 +1,81 @@ +# this trains LeNet on CIFAR-10 on a model pretrained in cleartext + +program.options_from_args() + +from Compiler import ml + +try: + ml.set_n_threads(int(program.args[2])) +except: + pass + +get_data = lambda train, transform=None: torchvision.datasets.CIFAR10( + root='/tmp', train=train, download=True, transform=transform) + +import torchvision, numpy +data = [] +for train in True, False: + ds = get_data(train) + # normalize to [-1,1] before input + samples = sfix.input_tensor_via(0, ds.data / 255 * 2 - 1, binary=True) + labels = sint.input_tensor_via(0, ds.targets, binary=True, one_hot=True) + data += [(labels, samples)] + +(training_labels, training_samples), (test_labels, test_samples) = data + +import torch +import torch.nn as nn + +net = nn.Sequential( + nn.Conv2d(3, 20, 5), + nn.ReLU(), + nn.MaxPool2d(2), + nn.Conv2d(20, 50, 5), + nn.ReLU(), + nn.MaxPool2d(2), + nn.Flatten(), + nn.ReLU(), + nn.Linear(1250, 500), + nn.ReLU(), + nn.Linear(500, 10) +) + +# train for a bit +transform = torchvision.transforms.Compose( + [torchvision.transforms.ToTensor(), lambda x: 2 * x - 1]) +ds = get_data(train=True, transform=transform) +optimizer = torch.optim.Adam(net.parameters(), amsgrad=True) +criterion = nn.CrossEntropyLoss() + +for i, data in enumerate(torch.utils.data.DataLoader(ds, batch_size=128)): + inputs, labels = data + optimizer.zero_grad() + outputs = net(inputs) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + +with torch.no_grad(): + ds = get_data(False, transform) + total = correct_classified = 0 + for data in torch.utils.data.DataLoader(ds, batch_size=128): + inputs, labels = data + outputs = net(inputs) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct_classified += (predicted == labels).sum().item() + test_acc = (100 * correct_classified / total) + print('Cleartext test accuracy of the network: %.2f %%' % test_acc) + +layers = ml.layers_from_torch(net, training_samples.shape, 128, input_via=0) + +optimizer = ml.SGD(layers) +optimizer.fit( + training_samples, + training_labels, + epochs=int(program.args[1]), + batch_size=128, + validation_data=(test_samples, test_labels), + program=program, + reset=False +) diff --git a/Programs/Source/torch_mnist_dense.mpc b/Programs/Source/torch_mnist_dense.mpc new file mode 100644 index 000000000..7b49a6a10 --- /dev/null +++ b/Programs/Source/torch_mnist_dense.mpc @@ -0,0 +1,57 @@ +# this trains a dense neural network on MNIST + +program.options_from_args() + +import torchvision + +data = [] +for train in True, False: + ds = torchvision.datasets.MNIST(root='/tmp', train=train, download=True) + # normalize to [0,1] before input + samples = sfix.input_tensor_via(0, ds.data / 255) + labels = sint.input_tensor_via(0, ds.targets, one_hot=True) + data += [(labels, samples)] + +(training_labels, training_samples), (test_labels, test_samples) = data + +import torch +import torch.nn as nn + +net = nn.Sequential( + nn.Flatten(), + nn.Linear(28 * 28, 128), + nn.ReLU(), + nn.Linear(128, 128), + nn.ReLU(), + nn.Linear(128, 10) +) + +# test network +ds = torchvision.datasets.MNIST( + root='/tmp', transform=torchvision.transforms.ToTensor()) +inputs = next(iter(torch.utils.data.DataLoader(ds)))[0] +print(inputs.shape) +outputs = net(inputs) + +from Compiler import ml + +ml.set_n_threads(int(program.args[2])) + +layers = ml.layers_from_torch(net, training_samples.shape, 128) + +optimizer = ml.SGD(layers) +optimizer.fit( + training_samples, + training_labels, + epochs=int(program.args[1]), + batch_size=128, + validation_data=(test_samples, test_labels), + program=program +) + +# store secret model for use in torch_mnist_dense_test +for var in optimizer.trainable_variables: + var.write_to_file() + +# output to be used in Scripts/torch_mnist_lenet_import.py +optimizer.reveal_model_to_binary() diff --git a/Programs/Source/torch_mnist_dense_pretrain.mpc b/Programs/Source/torch_mnist_dense_pretrain.mpc new file mode 100644 index 000000000..4745d02d3 --- /dev/null +++ b/Programs/Source/torch_mnist_dense_pretrain.mpc @@ -0,0 +1,72 @@ +# this trains a dense neural network on MNIST + +program.options_from_args() + +import torchvision + +data = [] +for train in True, False: + ds = torchvision.datasets.MNIST(root='/tmp', train=train, download=True) + # normalize to [0,1] before input + samples = sfix.input_tensor_via(0, ds.data / 255., binary=True) + labels = sint.input_tensor_via(0, ds.targets, binary=True, one_hot=True) + data += [(labels, samples)] + +(training_labels, training_samples), (test_labels, test_samples) = data + +import torch +import torch.nn as nn + +net = nn.Sequential( + nn.Flatten(), + nn.Linear(28 * 28, 128), + nn.ReLU(), + nn.Linear(128, 128), + nn.ReLU(), + nn.Linear(128, 10) +) + +# train for a bit +transform = torchvision.transforms.Compose( + [torchvision.transforms.ToTensor()]) +ds = torchvision.datasets.MNIST(root='/tmp', transform=transform, train=True) +optimizer = torch.optim.Adam(net.parameters(), amsgrad=True) +criterion = nn.CrossEntropyLoss() + +for i, data in enumerate(torch.utils.data.DataLoader(ds, batch_size=128)): + inputs, labels = data + optimizer.zero_grad() + outputs = net(inputs) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + +with torch.no_grad(): + ds = torchvision.datasets.MNIST(root='/tmp', transform=transform, + train=False) + total = correct_classified = 0 + for data in torch.utils.data.DataLoader(ds, batch_size=128): + inputs, labels = data + outputs = net(inputs) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct_classified += (predicted == labels).sum().item() + test_acc = (100 * correct_classified / total) + print('Test accuracy of the network: %.2f %%' % test_acc) + +from Compiler import ml + +ml.set_n_threads(int(program.args[2])) + +layers = ml.layers_from_torch(net, training_samples.shape, 128, input_via=0) + +optimizer = ml.SGD(layers) +optimizer.fit( + training_samples, + training_labels, + epochs=int(program.args[1]), + batch_size=128, + validation_data=(test_samples, test_labels), + program=program, + reset=False +) diff --git a/Programs/Source/torch_mnist_dense_test.mpc b/Programs/Source/torch_mnist_dense_test.mpc new file mode 100644 index 000000000..ceb6d72e1 --- /dev/null +++ b/Programs/Source/torch_mnist_dense_test.mpc @@ -0,0 +1,40 @@ +# this tests a previously stored dense neural network on MNIST + +program.options_from_args() + +import torchvision + +data = [] +for train in True, False: + ds = torchvision.datasets.MNIST(root='/tmp', train=train, download=True) + # normalize to [0,1] before input + samples = sfix.input_tensor_via(0, ds.data / 255) + labels = sint.input_tensor_via(0, ds.targets, one_hot=True) + data += [(labels, samples)] + +(training_labels, training_samples), (test_labels, test_samples) = data + +import torch +import torch.nn as nn + +net = nn.Sequential( + nn.Flatten(), + nn.Linear(28 * 28, 128), + nn.ReLU(), + nn.Linear(128, 128), + nn.ReLU(), + nn.Linear(128, 10) +) + +from Compiler import ml + +layers = ml.layers_from_torch(net, training_samples.shape, 128) + +optimizer = ml.Optimizer(layers) + +start = 0 +for var in optimizer.trainable_variables: + start = var.read_from_file(start) + +n_correct, loss = optimizer.reveal_correctness(test_samples, test_labels, 128) +print_ln('Accuracy: %s/%s', n_correct, len(test_samples)) diff --git a/Programs/Source/torch_mnist_lenet.mpc b/Programs/Source/torch_mnist_lenet.mpc new file mode 100644 index 000000000..75ccf24d6 --- /dev/null +++ b/Programs/Source/torch_mnist_lenet.mpc @@ -0,0 +1,49 @@ +# this trains a dense neural network on MNIST + +program.options_from_args() + +import torchvision + +data = [] +for train in True, False: + ds = torchvision.datasets.MNIST(root='/tmp', train=train, download=True) + # normalize to [0,1] before input + samples = sfix.input_tensor_via(0, ds.data / 255., binary=True) + labels = sint.input_tensor_via(0, ds.targets, binary=True, one_hot=True) + data += [(labels, samples)] + +import torch +import torch.nn as nn + +net = nn.Sequential( + nn.Conv2d(1, 20, 5), + nn.ReLU(), + nn.MaxPool2d(2), + nn.Conv2d(20, 50, 5), + nn.ReLU(), + nn.MaxPool2d(2), + nn.Flatten(), + nn.ReLU(), + nn.Linear(800, 500), + nn.ReLU(), + nn.Linear(500, 10) +) + +# test network +ds = torchvision.datasets.MNIST( + root='/tmp', transform=torchvision.transforms.ToTensor()) +inputs = next(iter(torch.utils.data.DataLoader(ds)))[0] +print(inputs.shape) +outputs = net(inputs) + +from Compiler import ml + +ml.set_n_threads(int(program.args[2])) + +layers = ml.layers_from_torch(net, data[0][1].shape, 128) +layers[0].X = data[0][1] +layers[-1].Y = data[0][0] + +optimizer = ml.SGD(layers) +optimizer.run_by_args(program, int(program.args[1]), 128, + data[1][1], data[1][0]) diff --git a/Programs/Source/torch_mnist_lenet_predict.mpc b/Programs/Source/torch_mnist_lenet_predict.mpc new file mode 100644 index 000000000..8e8b54cb1 --- /dev/null +++ b/Programs/Source/torch_mnist_lenet_predict.mpc @@ -0,0 +1,74 @@ +# this trains a LeNet on MNIST in cleartext and tests it securely + +program.options_from_args() + +import torchvision + +data = [] +for train in True, False: + ds = torchvision.datasets.MNIST(root='/tmp', train=train, download=True) + # normalize to [0,1] before input + samples = sfix.input_tensor_via(0, ds.data / 255., binary=True) + labels = sint.input_tensor_via(0, ds.targets, binary=True, one_hot=True) + data += [(labels, samples)] + +(training_labels, training_samples), (test_labels, test_samples) = data + +import torch +import torch.nn as nn + +net = nn.Sequential( + nn.Conv2d(1, 20, 5), + nn.ReLU(), + nn.MaxPool2d(2), + nn.Conv2d(20, 50, 5), + nn.ReLU(), + nn.MaxPool2d(2), + nn.Flatten(), + nn.ReLU(), + nn.Linear(800, 500), + nn.ReLU(), + nn.Linear(500, 10) +) + +# train for a bit +transform = torchvision.transforms.Compose( + [torchvision.transforms.ToTensor()]) +ds = torchvision.datasets.MNIST(root='/tmp', transform=transform, train=True) +optimizer = torch.optim.Adam(net.parameters(), amsgrad=True) +criterion = nn.CrossEntropyLoss() + +for i, data in enumerate(torch.utils.data.DataLoader(ds, batch_size=128)): + inputs, labels = data + optimizer.zero_grad() + outputs = net(inputs) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + +with torch.no_grad(): + ds = torchvision.datasets.MNIST(root='/tmp', transform=transform, + train=False) + total = correct_classified = 0 + for data in torch.utils.data.DataLoader(ds, batch_size=128): + inputs, labels = data + outputs = net(inputs) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct_classified += (predicted == labels).sum().item() + test_acc = (100 * correct_classified / total) + print('Cleartext accuracy of the network: %.2f %%' % test_acc) + +from Compiler import ml + +layers = ml.layers_from_torch(net, training_samples.shape, 128, input_via=0) + +ml.set_n_threads(8) + +optimizer = ml.Optimizer(layers) + +# output to be used in Scripts/torch_mnist_lenet_import.py +optimizer.reveal_model_to_binary() + +n_correct, loss = optimizer.reveal_correctness(test_samples, test_labels, 128, running=True) +print_ln('Secure accuracy: %s/%s', n_correct, len(test_samples)) diff --git a/Protocols/FakeShare.h b/Protocols/FakeShare.h index e5bb9e9e5..a73142b75 100644 --- a/Protocols/FakeShare.h +++ b/Protocols/FakeShare.h @@ -34,6 +34,7 @@ class FakeShare : public T, public ShareInterface static const bool has_trunc_pr = true; static const bool dishonest_majority = false; static const bool malicious = false; + static const bool is_real = false; static string type_short() { diff --git a/Protocols/Hemi.hpp b/Protocols/Hemi.hpp index 1549e2cf4..9ba85290f 100644 --- a/Protocols/Hemi.hpp +++ b/Protocols/Hemi.hpp @@ -25,10 +25,10 @@ typename T::MatrixPrep& Hemi::get_matrix_prep(const array& dims, SubProcessor& processor) { if (matrix_preps.find(dims) == matrix_preps.end()) - matrix_preps.insert({dims, + matrix_preps.insert(pair, typename T::MatrixPrep*>(dims, new typename T::MatrixPrep(dims[0], dims[1], dims[2], dynamic_cast(processor.DataF), - matrix_usage)}); + matrix_usage))); return *matrix_preps.at(dims); } diff --git a/Protocols/SemiInput.hpp b/Protocols/SemiInput.hpp index 7ab4a855a..5cdfae792 100644 --- a/Protocols/SemiInput.hpp +++ b/Protocols/SemiInput.hpp @@ -68,7 +68,7 @@ template void SemiInput::finalize_other(int player, T& target, octetStream&, int) { - target = this->recv_prngs[player].template get(); + target = this->recv_prngs.at(player).template get(); } template diff --git a/Protocols/ShareInterface.h b/Protocols/ShareInterface.h index 4e6b975cb..c168a464e 100644 --- a/Protocols/ShareInterface.h +++ b/Protocols/ShareInterface.h @@ -46,6 +46,8 @@ class ShareInterface const static bool symmetric = true; + static const bool is_real = true; + static const int default_length = 1; static string type_short() { throw runtime_error("shorthand undefined"); } diff --git a/README.md b/README.md index c14f41ce9..5c96b7e15 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,9 @@ solutions](https://mp-spdz.readthedocs.io/en/latest/troubleshooting.html). ##### Filing Issues Please file complete code examples because it's usually not possible -to reproduce problems from incomplete code. +to reproduce problems from incomplete code, and please include which +protocol you have used (if applicable) because there are considerable +differences between the various protocols. #### Frequently Asked Questions @@ -40,10 +42,9 @@ the top folder: ``` Scripts/tldr.sh -./compile.py tutorial echo 1 2 3 4 > Player-Data/Input-P0-0 echo 1 2 3 4 > Player-Data/Input-P1-0 -Scripts/mascot.sh tutorial +Scripts/compile-run.sh -E mascot tutorial ``` This runs [the tutorial](Programs/Source/tutorial.mpc) with two @@ -54,7 +55,7 @@ parties and malicious security. On Linux, this requires a working toolchain and [all requirements](#requirements). On Ubuntu, the following might suffice: ``` -sudo apt-get install automake build-essential cmake git libboost-dev libboost-thread-dev libntl-dev libsodium-dev libssl-dev libtool m4 python3 texinfo yasm +sudo apt-get install automake build-essential clang cmake git libboost-dev libboost-thread-dev libntl-dev libsodium-dev libssl-dev libtool m4 python3 texinfo yasm ``` On MacOS, this requires [brew](https://brew.sh) to be installed, which will be used for all dependencies. @@ -62,17 +63,16 @@ It will execute [the tutorial](Programs/Source/tutorial.mpc) with two parties and malicious security. -Note that this only works with a git clone but not with a binary -release. - ``` -make -j 8 tldr -./compile.py tutorial +make setup echo 1 2 3 4 > Player-Data/Input-P0-0 echo 1 2 3 4 > Player-Data/Input-P1-0 -Scripts/mascot.sh tutorial +Scripts/compile-run.sh -E mascot tutorial ``` +On strong enough hardware setups (several cores and GB of RAM), you +can speed up the last step by running `make -j8 mascot-party.x` beforehand. + #### TL;DR (Docker) Build a docker image for `mascot-party.x`: @@ -271,7 +271,7 @@ compute the preprocessing time for a particular computation. #### Requirements - GCC 5 or later (tested with up to 11) or LLVM/clang 6 or later - (tested with up to 14). We recommend clang because it performs + (tested with up to 14). The default is to use clang because it performs better. Note that GCC 5/6 and clang 9 don't support libOTe, so you need to deactivate its use for these compilers (see the next section). @@ -284,16 +284,16 @@ compute the preprocessing time for a particular computation. install it locally. libOTe also requires boost of version at least 1.75, which is not available by default on relatively recent systems such as Ubuntu - 20.04. You can install it locally by running `make boost`. + 22.04. You can install it locally by running `make boost`. - MPIR library, compiled with C++ support (use flag `--enable-cxx` when running configure). You can use `make -j8 mpir` to install it locally. - libsodium library, tested against 1.0.18 - - OpenSSL, tested against 1.1.1 - - Boost.Asio with SSL support (`libboost-dev` on Ubuntu), tested against 1.71 - - Boost.Thread for BMR (`libboost-thread-dev` on Ubuntu), tested against 1.71 + - OpenSSL, tested against 3.0.2 + - Boost.Asio with SSL support (`libboost-dev` on Ubuntu), tested against 1.81 + - Boost.Thread for BMR (`libboost-thread-dev` on Ubuntu), tested against 1.81 - x86 or ARM 64-bit CPU (the latter tested with AWS Gravitron and Apple Silicon) - Python 3.5 or later - - NTL library for homomorphic encryption (optional; tested with NTL 10.5) + - NTL library for homomorphic encryption (optional; tested with NTL 11.5.1) - If using macOS, Sierra or later - Windows/VirtualBox: see [this issue](https://github.com/data61/MP-SPDZ/issues/557) for a discussion @@ -328,14 +328,67 @@ compute the preprocessing time for a particular computation. parts only. Remember to run `make clean` first after changing `CONFIG` or `CONFIG.mine`. -# Running computation +# Running Computation See `Programs/Source/` for some example MPC programs, in particular `tutorial.mpc`. Furthermore, [Read the Docs](https://mp-spdz.readthedocs.io/en/latest/) hosts a more -detailed reference of the high-level functionality extracted from the -Python code in the `Compiler` directory as well as a summary of -relevant compiler options. +detailed reference of all aspects of MP-SPDZ. + +There are three ways of running computation: + +1. Separate compilation and execution. This is the default in the + further documentation. It allows to run the same program several + times while only compiling once, for example: + + ``` + ./compile.py + Scripts/mascot.sh - [...] + Scripts/mascot.sh - [...] + ``` + +2. One-command local execution. This compiles the program and the + virtual machine if necessary before executing it locally with the + given protocol. The name of the protocols correspond to the script + names below (without the `.sh`). Furthermore, some + protocol-specific optimization options are automatically used as + well as required options. + + ``` + Scripts/compile-run.py -E mascot -- [...] + ``` + +3. One-command remote execution. This compiles the program and the + virtual machine if necessary before uploading them together with + all necessary input and certificate files via SSH. + + ``` + Scripts/compile-run.py -HOSTS -E mascot -- [...] + ``` + + `HOSTS` has to be a text file in the following format: + + ``` + [@][/] + [@][/] + ... + ``` + + If does not start with `/` (only one `/` after the + hostname), the path with be relative to the home directory of the + user. Otherwise (`//` after the hostname it will be relative to the + root directory. + +Even with the integrated execution it is important to keep in mind +that there are two different phases, the compilation and the run-time +phase. Any secret data is only available in the second phase, when the +Python compilation has concluded. Therefore, the types like `sint` and +`sfix` are mere placeholders for data to be used later, and they don't +contain any shares. See also [the +documentation](https://mp-spdz.readthedocs.io/en/latest/compilation.html#compilation-vs-run-time) +for what this means when using Python data structures and Python +language features. + ### Compiling high-level programs @@ -347,8 +400,10 @@ to be compiled accordingly. ```./compile.py [-F ] [-P ] ``` The integer bit length defaults to 64, and the prime defaults to none -given. If a prime is given, it has to be at least two bits longer -than the integer length. +given. If a prime is given, it has to be at least two bits longer than +the integer length. Note that `-P` is optional, and it involves +algorithms that are more expensive while allowing for a wider range of +integer lengths. Note that in this context integers do not wrap around according to the bit integer bit length but the length is used for non-linear @@ -763,7 +818,7 @@ for computation modulo a power of two. It involves sharing both a secret value and information-theoretic tag similar to SPDZ but not with additive secret sharing, hence the name. Rep4 refers to the four-party protocol by [Dalskov et -al.](https://eprint.iacr.org/2020/1330). +al.](https://eprint.iacr.org/2020/1330) `malicious-rep-bin-party.x` is based on cut-and-choose triple generation by [Furukawa et al.](https://eprint.iacr.org/2016/944) but using Beaver multiplication instead of their post-sacrifice diff --git a/Scripts/build.sh b/Scripts/build.sh index 1c3f72866..0aaf49525 100755 --- a/Scripts/build.sh +++ b/Scripts/build.sh @@ -4,11 +4,8 @@ function build { echo ARCH = $1 >> CONFIG.mine echo GDEBUG = >> CONFIG.mine - root=`pwd` - cd deps/libOTe - rm -R out - python3 build.py --install=$root/local -- -DENABLE_SOFTSPOKEN_OT=ON -DBUILD_SHARED_LIBS=0 -DCMAKE_INSTALL_LIBDIR=lib $3 - cd $root + echo OTE_OPTS= -DENABLE_SOFTSPOKEN_OT=ON -DBUILD_SHARED_LIBS=0 -DCMAKE_INSTALL_LIBDIR=lib $3 >> CONFIG.mine + rm -R deps/libOTe/out make clean rm -R static mkdir static diff --git a/Scripts/compile-emulate.py b/Scripts/compile-emulate.py new file mode 100755 index 000000000..4f346c325 --- /dev/null +++ b/Scripts/compile-emulate.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 + +import os, sys + +sys.path.append('.') + +from Compiler.compilerLib import Compiler + +compiler = Compiler() +compiler.prep_compile(build=False) +compiler.execute = True +compiler.options.execute = 'emulate' +compiler.options.ring = compiler.options.ring or '64' +compiler.options.keep_cisc = compiler.options.keep_cisc or '' +compiler.build() +prog = compiler.compile_file() +compiler.local_execution() diff --git a/Scripts/compile-run.py b/Scripts/compile-run.py new file mode 100755 index 000000000..d7f2711b3 --- /dev/null +++ b/Scripts/compile-run.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 + +import os, sys + +sys.path.append('.') + +from Compiler.compilerLib import Compiler + +try: + split = sys.argv.index('--') +except ValueError: + split = len(sys.argv) + +compiler_args = sys.argv[1:split] +runtime_args = sys.argv[split + 1:] +compiler = Compiler(execute=True, custom_args=compiler_args) +compiler.prep_compile() +prog = compiler.compile_file() + +if prog.options.hostfile: + compiler.remote_execution(runtime_args) +else: + compiler.local_execution(runtime_args) diff --git a/Scripts/memory-usage.py b/Scripts/memory-usage.py index eaec677fd..1977fc2c6 100755 --- a/Scripts/memory-usage.py +++ b/Scripts/memory-usage.py @@ -13,8 +13,9 @@ res = collections.defaultdict(lambda: 0) regs = collections.defaultdict(lambda: 0) +thread_regs = collections.defaultdict(lambda: 0) -for tapename in Program.read_tapes(sys.argv[1]): +def process(tapename, res, regs): for inst in Tape.read_instructions(tapename): t = inst.type if issubclass(t, DirectMemoryInstruction): @@ -24,7 +25,17 @@ if isinstance(arg, RegisterArgFormat): regs[type(arg)] = max(regs[type(arg)], arg.i + inst.size) +tapes = Program.read_tapes(sys.argv[1]) + +process(next(tapes), res, regs) + +for tapename in tapes: + process(tapename, res, thread_regs) + reverse_formats = dict((v, k) for k, v in ArgFormats.items()) +regout = lambda regs: dict((reverse_formats[t], n) for t, n in regs.items()) + print ('Memory:', dict(res)) -print ('Registers:', dict((reverse_formats[t], n) for t, n in regs.items())) +print ('Registers in main thread:', regout(regs)) +print ('Registers in other threads:', regout(thread_regs)) diff --git a/Scripts/setup-clients.sh b/Scripts/setup-clients.sh index 74010c266..e07a3f978 100755 --- a/Scripts/setup-clients.sh +++ b/Scripts/setup-clients.sh @@ -1,5 +1,8 @@ #!/usr/bin/env bash +# brew-installed OpenSSL on MacOS +PATH="/opt/homebrew/opt/openssl@3/bin:$PATH" + n=$1 test -e Player-Data || mkdir Player-Data diff --git a/Scripts/setup-ssl.sh b/Scripts/setup-ssl.sh index 01113f166..b479a31f3 100755 --- a/Scripts/setup-ssl.sh +++ b/Scripts/setup-ssl.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # brew-installed OpenSSL on MacOS -PATH=/usr/local/opt/openssl/bin:$PATH +PATH="/opt/homebrew/opt/openssl@3/bin:$PATH" n=${1:-4} ssl_dir=${2:-"Player-Data"} diff --git a/Scripts/test_tutorial.sh b/Scripts/test_tutorial.sh index 60157c934..094a6393d 100755 --- a/Scripts/test_tutorial.sh +++ b/Scripts/test_tutorial.sh @@ -83,15 +83,17 @@ fi ./compile.py tutorial -for i in cowgear chaigear; do - test_vm $i $run_opts -S 3 -c 2 -J -done +if test $no_top_gear; then + for i in cowgear chaigear; do + test_vm $i $run_opts -S 3 -c 2 -J + done +fi if test $skip_binary; then exit fi -./compile.py -B 16 $compile_opts tutorial +./compile.py -GB 16 $compile_opts tutorial for i in replicated mal-rep-bin ps-rep-bin semi-bin ccd mal-ccd; do test_vm $i $run_opts diff --git a/Scripts/tldr.sh b/Scripts/tldr.sh index bd5b396a7..54cde516e 100755 --- a/Scripts/tldr.sh +++ b/Scripts/tldr.sh @@ -11,9 +11,10 @@ elif test `uname` = Darwin; then echo Aborting exit 1 else - /usr/bin/env ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" fi fi + make mac-setup make tldr else echo OS unknown diff --git a/Scripts/torch_cifar_alex_import.py b/Scripts/torch_cifar_alex_import.py new file mode 100755 index 000000000..1adb34a5a --- /dev/null +++ b/Scripts/torch_cifar_alex_import.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 + +# test model output by torch_alex_test.mpc + +import torchvision +import torch +import torch.nn as nn +import numpy + +net = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=2), + nn.ReLU(), + nn.MaxPool2d(kernel_size=2), + nn.Conv2d(64, 96, kernel_size=3, padding=2), + nn.ReLU(), + nn.MaxPool2d(kernel_size=2), + nn.Conv2d(96, 96, kernel_size=3, padding=1), + nn.ReLU(), + nn.Conv2d(96, 64, kernel_size=3, padding=1), + nn.ReLU(), + nn.Conv2d(64, 64, kernel_size=3, padding=1), + nn.ReLU(), + nn.MaxPool2d(kernel_size=3, stride=2), + nn.Flatten(), + nn.Linear(1024, 128), + nn.ReLU(), + nn.Linear(128, 256), + nn.ReLU(), + nn.Linear(256, 10), +) + +f = open('Player-Data/Binary-Output-P0-0') + +state = net.state_dict() + +for name in state: + shape = state[name].shape + size = numpy.prod(shape) + var = numpy.fromfile(f, 'double', count=size) + var = var.reshape(shape) + state[name] = torch.Tensor(var) + +net.load_state_dict(state) + +get_data = lambda train, transform=None: torchvision.datasets.CIFAR10( + root='/tmp', train=train, download=True, transform=transform) + +transform = torchvision.transforms.Compose( + [torchvision.transforms.ToTensor(), lambda x: 2 * x - 1]) + +with torch.no_grad(): + ds = get_data(False, transform) + total = correct_classified = 0 + for data in torch.utils.data.DataLoader(ds, batch_size=128): + inputs, labels = data + outputs = net(inputs) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct_classified += (predicted == labels).sum().item() + test_acc = (100 * correct_classified / total) + print('Test accuracy of the network: %.2f %%' % test_acc) diff --git a/Scripts/torch_mnist_dense_import.py b/Scripts/torch_mnist_dense_import.py new file mode 100755 index 000000000..9286cc72e --- /dev/null +++ b/Scripts/torch_mnist_dense_import.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 + +# test model output by torch_mnist_dense.mpc + +import torchvision +import torch +import torch.nn as nn +import numpy + +net = nn.Sequential( + nn.Flatten(), + nn.Linear(28 * 28, 128), + nn.ReLU(), + nn.Linear(128, 128), + nn.ReLU(), + nn.Linear(128, 10) +) + +f = open('Player-Data/Binary-Output-P0-0') + +state = net.state_dict() + +for name in state: + shape = state[name].shape + size = numpy.prod(shape) + var = numpy.fromfile(f, 'double', count=size) + var = var.reshape(shape) + state[name] = torch.Tensor(var) + +net.load_state_dict(state) + +transform = torchvision.transforms.Compose( + [torchvision.transforms.ToTensor()]) + +with torch.no_grad(): + ds = torchvision.datasets.MNIST(root='/tmp', transform=transform, + train=False) + total = correct_classified = 0 + for data in torch.utils.data.DataLoader(ds, batch_size=128): + inputs, labels = data + outputs = net(inputs) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct_classified += (predicted == labels).sum().item() + test_acc = (100 * correct_classified / total) + print('Test accuracy of the network: %.2f %%' % test_acc) diff --git a/Scripts/torch_mnist_lenet_import.py b/Scripts/torch_mnist_lenet_import.py new file mode 100755 index 000000000..9df05285d --- /dev/null +++ b/Scripts/torch_mnist_lenet_import.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 + +# test model output by torch_mnist_lenet_predict.mpc + +import torchvision +import torch +import torch.nn as nn +import numpy + +net = nn.Sequential( + nn.Conv2d(1, 20, 5), + nn.ReLU(), + nn.MaxPool2d(2), + nn.Conv2d(20, 50, 5), + nn.ReLU(), + nn.MaxPool2d(2), + nn.Flatten(), + nn.ReLU(), + nn.Linear(800, 500), + nn.ReLU(), + nn.Linear(500, 10) +) + +f = open('Player-Data/Binary-Output-P0-0') + +state = net.state_dict() + +for name in state: + shape = state[name].shape + size = numpy.prod(shape) + var = numpy.fromfile(f, 'double', count=size) + var = var.reshape(shape) + state[name] = torch.Tensor(var) + +net.load_state_dict(state) + +transform = torchvision.transforms.Compose( + [torchvision.transforms.ToTensor()]) + +with torch.no_grad(): + ds = torchvision.datasets.MNIST(root='/tmp', transform=transform, + train=False) + total = correct_classified = 0 + for data in torch.utils.data.DataLoader(ds, batch_size=128): + inputs, labels = data + outputs = net(inputs) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct_classified += (predicted == labels).sum().item() + test_acc = (100 * correct_classified / total) + print('Test accuracy of the network: %.2f %%' % test_acc) diff --git a/Tools/FlexBuffer.cpp b/Tools/FlexBuffer.cpp index 1dabd8c26..6c663cb63 100644 --- a/Tools/FlexBuffer.cpp +++ b/Tools/FlexBuffer.cpp @@ -20,6 +20,8 @@ ReceivedMsgStore::~ReceivedMsgStore() << push_timer.elapsed() << " seconds and retrieved them in " << pop_timer.elapsed() << " seconds " << endl; #endif + for (auto& file : files) + remove(file.c_str()); } void ReceivedMsgStore::push(ReceivedMsg& msg) @@ -41,7 +43,8 @@ void ReceivedMsgStore::push(ReceivedMsg& msg) sprintf(filename, "%s/%d.XXXXXX", BUFFER_DIR, getpid()); FILE* file = fdopen(mkstemp(filename), "w"); if (!file) - throw runtime_error("can't open file"); + throw runtime_error("can't open file, check space on " + BUFFER_DIR); size_t len = msg.size(); size_t ptr = msg.ptr - msg.buf; if (fwrite(&len, sizeof(len), 1, file) != 1) diff --git a/Tools/Hash.cpp b/Tools/Hash.cpp index 680bec969..bee5adfb9 100644 --- a/Tools/Hash.cpp +++ b/Tools/Hash.cpp @@ -35,6 +35,11 @@ void Hash::update(const octetStream& os) update(os.get_data(), os.get_length()); } +void Hash::update(const string& str) +{ + update(str.data(), str.size()); +} + void Hash::final(octetStream& os) { os.resize_precise(hash_length); diff --git a/Tools/Hash.h b/Tools/Hash.h index 6d1938ca2..706ddf326 100644 --- a/Tools/Hash.h +++ b/Tools/Hash.h @@ -41,6 +41,7 @@ class Hash v[i].pack(tmp, bit_lengths[i]); update(tmp); } + void update(const string& str); void final(unsigned char hashout[hash_length]) { diff --git a/Tools/Lock.h b/Tools/Lock.h index 299aa62b5..459e1f01b 100644 --- a/Tools/Lock.h +++ b/Tools/Lock.h @@ -19,4 +19,21 @@ class Lock void unlock(); }; +class ScopeLock +{ + Lock& lock; + +public: + ScopeLock(Lock& lock) : + lock(lock) + { + lock.lock(); + } + + ~ScopeLock() + { + lock.unlock(); + } +}; + #endif /* TOOLS_LOCK_H_ */ diff --git a/Tools/ezOptionParser.h b/Tools/ezOptionParser.h index 500ffee72..dac9db6b7 100644 --- a/Tools/ezOptionParser.h +++ b/Tools/ezOptionParser.h @@ -2094,7 +2094,7 @@ void ezOptionParser::prettyPrint(std::string & out) { out += "First Args:\n"; for(i=0; i < (long int)firstArgs.size(); ++i) { - sprintf(tmp, "%d: %s\n", i+1, firstArgs[i]->c_str()); + snprintf(tmp, 256, "%d: %s\n", i+1, firstArgs[i]->c_str()); out += tmp; } @@ -2115,46 +2115,46 @@ void ezOptionParser::prettyPrint(std::string & out) { out += "\n"; // The flag names: for(j=0; j < (long int)g->flags.size()-1; ++j) { - sprintf(tmp, "%s, ", g->flags[j]->c_str()); + snprintf(tmp, 256, "%s, ", g->flags[j]->c_str()); out += tmp; } - sprintf(tmp, "%s:\n", g->flags.back()->c_str()); + snprintf(tmp, 256, "%s:\n", g->flags.back()->c_str()); out += tmp; if (g->isSet) { if (g->expectArgs) { if (g->args.empty()) { - sprintf(tmp, "%s (default)\n", g->defaults.c_str()); + snprintf(tmp, 256, "%s (default)\n", g->defaults.c_str()); out += tmp; } else { for(k=0; k < (long int)g->args.size(); ++k) { for(j=0; j < (long int)g->args[k]->size()-1; ++j) { - sprintf(tmp, "%s%c", g->args[k]->at(j)->c_str(), g->delim); + snprintf(tmp, 256, "%s%c", g->args[k]->at(j)->c_str(), g->delim); out += tmp; } - sprintf(tmp, "%s\n", g->args[k]->back()->c_str()); + snprintf(tmp, 256, "%s\n", g->args[k]->back()->c_str()); out += tmp; } } } else { // Set but no args expected. - sprintf(tmp, "Set\n"); + snprintf(tmp, 256, "Set\n"); out += tmp; } } else { - sprintf(tmp, "Not set\n"); + snprintf(tmp, 256, "Not set\n"); out += tmp; } } out += "\nLast Args:\n"; for(i=0; i < (long int)lastArgs.size(); ++i) { - sprintf(tmp, "%d: %s\n", i+1, lastArgs[i]->c_str()); + snprintf(tmp, 256, "%d: %s\n", i+1, lastArgs[i]->c_str()); out += tmp; } out += "\nUnknown Args:\n"; for(i=0; i < (long int)unknownArgs.size(); ++i) { - sprintf(tmp, "%d: %s\n", i+1, unknownArgs[i]->c_str()); + snprintf(tmp, 256, "%d: %s\n", i+1, unknownArgs[i]->c_str()); out += tmp; } }; diff --git a/Yao/YaoEvalWire.cpp b/Yao/YaoEvalWire.cpp index c8a4bad34..456a68922 100644 --- a/Yao/YaoEvalWire.cpp +++ b/Yao/YaoEvalWire.cpp @@ -16,6 +16,7 @@ #include "GC/Secret.hpp" #include "GC/Thread.hpp" #include "GC/ShareSecret.hpp" +#include "GC/ThreadMaster.hpp" #include "YaoCommon.hpp" void YaoEvalWire::random() @@ -256,6 +257,14 @@ void YaoEvalWire::convcbit2s(GC::Processor& processor, } } +void YaoEvalWire::run_tapes(const vector& args) +{ + auto& party = YaoEvaluator::s(); + party.master.machine.run_tapes(args); + if (party.continuous()) + party.untaint(); +} + template void YaoEvalWire::and_( GC::Processor >& processor, const vector& args); diff --git a/Yao/YaoEvalWire.h b/Yao/YaoEvalWire.h index 7257e5ad3..0f082657e 100644 --- a/Yao/YaoEvalWire.h +++ b/Yao/YaoEvalWire.h @@ -65,6 +65,8 @@ class YaoEvalWire : public YaoWire static void convcbit2s(GC::Processor& processor, const BaseInstruction& instruction); + static void run_tapes(const vector& args); + void set(const Key& key); void set(Key key, bool external); diff --git a/Yao/YaoEvaluator.cpp b/Yao/YaoEvaluator.cpp index 7b1b60154..652f5798e 100644 --- a/Yao/YaoEvaluator.cpp +++ b/Yao/YaoEvaluator.cpp @@ -77,13 +77,17 @@ void YaoEvaluator::run_from_store(GC::Program& program) bool YaoEvaluator::receive(Player& P) { +#ifdef DEBUG_YAO + printf("waiting to receive at %d in thread %d\n", processor.PC, thread_num); +#endif if (P.receive_long(0) == YaoCommon::DONE) return false; P.receive_player(0, gates); P.receive_player(0, output_masks); #ifdef DEBUG_YAO - cout << "received " << gates.size() << " gates and " << output_masks.size() - << " output masks at " << processor.PC << endl; + cout << "received " << gates.size() << " bytes for gates and " + << output_masks.size() << " output masks at " << processor.PC + << " in thread " << thread_num << endl; #endif return true; } diff --git a/Yao/YaoEvaluator.h b/Yao/YaoEvaluator.h index 749ba2878..416118eaf 100644 --- a/Yao/YaoEvaluator.h +++ b/Yao/YaoEvaluator.h @@ -43,7 +43,7 @@ class YaoEvaluator: public GC::Thread>, YaoEvaluator(int thread_num, YaoEvalMaster& master); - bool continuous() { return master.continuous and master.machine.nthreads == 1; } + bool continuous() { return master.continuous; } void pre_run(); void run(GC::Program& program); diff --git a/Yao/YaoGarbleWire.cpp b/Yao/YaoGarbleWire.cpp index fb1a534ee..e9f7e2d90 100644 --- a/Yao/YaoGarbleWire.cpp +++ b/Yao/YaoGarbleWire.cpp @@ -14,6 +14,7 @@ #include "GC/Secret.hpp" #include "GC/Thread.hpp" #include "GC/ShareSecret.hpp" +#include "GC/ThreadMaster.hpp" #include "YaoCommon.hpp" void YaoGarbleWire::random() @@ -245,3 +246,11 @@ void YaoGarbleWire::convcbit2s(GC::Processor& processor, processor.C[instruction.get_r(1) + i].get_bit(j)); } } + +void YaoGarbleWire::run_tapes(const vector& args) +{ + auto& garbler = YaoGarbler::s(); + if (garbler.continuous()) + garbler.untaint(); + garbler.master.machine.run_tapes(args); +} diff --git a/Yao/YaoGarbleWire.h b/Yao/YaoGarbleWire.h index 65feb8da2..20d56ef8f 100644 --- a/Yao/YaoGarbleWire.h +++ b/Yao/YaoGarbleWire.h @@ -66,6 +66,8 @@ class YaoGarbleWire : public YaoWire static void convcbit2s(GC::Processor& processor, const BaseInstruction& instruction); + static void run_tapes(const vector& args); + void randomize(PRNG& prng); void set(Key key, bool mask); diff --git a/Yao/YaoGarbler.cpp b/Yao/YaoGarbler.cpp index 647369a15..b9112c4b9 100644 --- a/Yao/YaoGarbler.cpp +++ b/Yao/YaoGarbler.cpp @@ -94,8 +94,9 @@ void YaoGarbler::post_run() void YaoGarbler::send(Player& P) { #ifdef DEBUG_YAO - cerr << "sending " << gates.size() << " gates and " << - output_masks.size() << " output masks at " << processor.PC << endl; + cerr << "sending " << gates.size() << " bytes for gates and " + << output_masks.size() << " output masks at " << processor.PC + << " in thread " << thread_num << endl; #endif P.send_long(1, YaoCommon::MORE); size_t size = gates.size(); diff --git a/Yao/YaoGarbler.h b/Yao/YaoGarbler.h index 0608336c8..8597182aa 100644 --- a/Yao/YaoGarbler.h +++ b/Yao/YaoGarbler.h @@ -56,7 +56,7 @@ class YaoGarbler: public GC::Thread>, YaoGarbler(int thread_num, YaoGarbleMaster& master); ~YaoGarbler(); - bool continuous() { return master.continuous and master.machine.nthreads == 1; } + bool continuous() { return master.continuous; } void run(GC::Program& program); void run(Player& P, bool continuous); diff --git a/Yao/YaoPlayer.cpp b/Yao/YaoPlayer.cpp index b1e0e0736..f943a9545 100644 --- a/Yao/YaoPlayer.cpp +++ b/Yao/YaoPlayer.cpp @@ -19,7 +19,7 @@ YaoPlayer::YaoPlayer(int argc, const char** argv) 0, // Required? 0, // Number of args expected. 0, // Delimiter if expecting multiple args. - "Evaluate only after garbling (default only with multi-threading).", // Help description. + "Evaluate only after garbling (very limited functionality).", // Help description. "-O", // Flag token. "--oneshot" // Flag token. ); diff --git a/deps/libOTe b/deps/libOTe index db02f8b8d..5d9f9c400 160000 --- a/deps/libOTe +++ b/deps/libOTe @@ -1 +1 @@ -Subproject commit db02f8b8d1e4805fb3bd86f1e06442d8acdc010c +Subproject commit 5d9f9c400c6acda734cbd20b5b8ea02392c0f75e diff --git a/doc/Compiler.rst b/doc/Compiler.rst index 34343c51e..593ddbc25 100644 --- a/doc/Compiler.rst +++ b/doc/Compiler.rst @@ -75,6 +75,7 @@ Compiler.ml module :no-undoc-members: :exclude-members: Tensor :show-inheritance: + :inherited-members: .. autofunction:: approx_sigmoid Compiler.decision_tree module @@ -129,3 +130,10 @@ Compiler.sqrt_oram module :no-undoc-members: :exclude-members: LinearPositionMap, PositionMap, RecursivePositionMap, refresh, shuffle_the_shuffle + + +Compiler.sorting module +----------------------- +.. automodule:: Compiler.sorting + :members: + :no-undoc-members: diff --git a/doc/Doxyfile b/doc/Doxyfile index f82046ebc..d816a9727 100644 --- a/doc/Doxyfile +++ b/doc/Doxyfile @@ -829,7 +829,7 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = ../Networking ../Tools/octetStream.h ../Processor/Data_Files.h ../Protocols/Replicated.h ../Protocols/ReplicatedPrep.h ../Protocols/MAC_Check_Base.h ../Processor/Input.h ../ExternalIO/Client.h ../Protocols/ProtocolSet.h ../Protocols/ProtocolSetup.h ../Math/gfp.h ../Math/gfpvar.h ../Math/Z2k.h ../FHE/Ciphertext.h ../FHE/FHE_Keys.h ../FHE/FHE_Params.h ../FHE/Plaintext.h ../Tools/random.h +INPUT = ../Networking ../Tools/octetStream.h ../Processor/Data_Files.h ../Protocols/Replicated.h ../Protocols/ReplicatedPrep.h ../Protocols/MAC_Check_Base.h ../Processor/Input.h ../ExternalIO/Client.h ../Protocols/ProtocolSet.h ../Protocols/ProtocolSetup.h ../Math/gfp.h ../Math/gfpvar.h ../Math/Z2k.h ../FHE/Ciphertext.h ../FHE/FHE_Keys.h ../FHE/FHE_Params.h ../FHE/Plaintext.h ../Tools/random.h ../Math/bigint.h # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses diff --git a/doc/add-protocol.rst b/doc/add-protocol.rst index 7b7199c03..1f741f36a 100644 --- a/doc/add-protocol.rst +++ b/doc/add-protocol.rst @@ -49,11 +49,21 @@ found in ``Protocols/Replicated.h``. Constant sharing and public output allows to execute the following program:: - print_ln('%s', sint(123).reveal()) + print_ln('result: %s', sint(123).reveal()) This allows to check the correct execution of further functionality. + Put the above code in ``Programs/Source/test.mpc`` and run the + following if your protocol works for two parties (otherwise add + more parties and change the ``-N`` argument accordingly):: + + make no-party.x + ./compile.py test + ./no-party.x 0 test -N 2 & ./no-party.1 test -N 2 + + This should output ``result: 123``. + 2. Fill in the operator functions in :c:type:`NoShare` and check them:: diff --git a/doc/compilation.rst b/doc/compilation.rst index 01753edde..993f75da6 100644 --- a/doc/compilation.rst +++ b/doc/compilation.rst @@ -1,4 +1,4 @@ -Compilation process +Compilation Process ------------------- The easiest way of using MP-SPDZ is using ``compile.py`` as @@ -38,7 +38,11 @@ The following options influence the computation domain: Specify a concrete prime modulus for computation. This can be used together with :option:`-F`, in which case *integer length* has to be at most the prime length minus two. The security implications of - overflows in the secrets do not go beyond incorrect results. + overflows in the secrets do not go beyond incorrect results. You + can use prime order domains without specifying this option. + Using this option involves algorithms for non-linear computation + which are generally more expensive but allow for integer lengths + that are close to the bit length of the prime. .. cmdoption:: -R --ring= diff --git a/doc/gen-readme.sh b/doc/gen-readme.sh index e7b825975..9e40fbc41 100755 --- a/doc/gen-readme.sh +++ b/doc/gen-readme.sh @@ -1,4 +1,7 @@ #!/bin/sh -echo '# Getting started' > readme.md -sed -e '1 d' ../README.md >> readme.md +echo '# Getting Started' > readme.md +sed -e '1 d' -e 's#(Programs/Source#(../Programs/Source#g' -e 's#(./Dockerfile#(../Dockerfile#' ../README.md >> readme.md + +echo '# Client Interface' > client-interface.md +cat ../ExternalIO/README.md >> client-interface.md diff --git a/doc/index.rst b/doc/index.rst index 648546c89..f072135bc 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,12 +1,18 @@ Welcome to MP-SPDZ's documentation! =================================== +MP-SPDZ is a framework for multi-party computation, a +privacy-enhancing technology focused on input privacy. Please see +`this gentle introduction `_ for +more information on multi-party computation. + If you're new to MP-SPDZ, consider the following: 1. `Quickstart tutorial `_ -2. `Implemented protocols `_ -3. :ref:`troubleshooting` -4. :ref:`io` lists all the ways of getting data in and out. +2. :ref:`Machine learning quickstart ` +3. `Implemented protocols `_ +4. :ref:`troubleshooting` +5. :ref:`io` lists all the ways of getting data in and out. .. toctree:: :maxdepth: 4 @@ -17,9 +23,11 @@ If you're new to MP-SPDZ, consider the following: Compiler instructions low-level + ml-quickstart machine-learning networking io + client-interface non-linear preprocessing add-protocol diff --git a/doc/io.rst b/doc/io.rst index 50128d945..bd6b4db88 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -78,8 +78,8 @@ Clients (Non-computing Parties) :py:func:`Compiler.types.sint.receive_from_client` and :py:func:`Compiler.types.sint.reveal_to_clients` allow -communicating securely with the clients. See `this example -`_ +communicating securely with the clients. See `the relevant section +`_ covering both client code and server-side high-level code. :py:func:`Compiler.types.sint.input_tensor_from_client` and :py:func:`Compiler.types.MultiArray.reveal_to_clients`. The same diff --git a/doc/low-level.rst b/doc/low-level.rst index e8b7a4cc6..89302d97e 100644 --- a/doc/low-level.rst +++ b/doc/low-level.rst @@ -381,3 +381,9 @@ Domain Reference .. doxygenclass:: SignedZ2 :members: + +The following is not used as a domain, but it helps using the above types, +in particular ``gfp_`` and ``gfpvar_``. + +.. doxygenclass:: bigint + :members: diff --git a/doc/machine-learning.rst b/doc/machine-learning.rst index 54764e37f..e7ed6f70c 100644 --- a/doc/machine-learning.rst +++ b/doc/machine-learning.rst @@ -1,15 +1,156 @@ Machine Learning ---------------- -MP-SPDZ supports a limited subset of the Keras interface for machine -learning. This includes the SGD and Adam optimizers and the following -layer types: dense, 2D convolution, 2D max-pooling, and dropout. +The purpose of this document is to demonstrate the machine learning +functionality of MP-SPDZ, a software implementing multi-party +computation, one of the most important privacy-enhancing +techniques. Please see `this gentle introduction +`_ for more information on +multi-party computation and the `installation instructions +`_ +on how to install the software. + +MP-SPDZ supports a number of machine learning algorithms such as +logistic and linear regression, decision trees, and some common deep +learning functionality. The latter includes the SGD and Adam +optimizers and the following layer types: dense, 2D convolution, 2D +max-pooling, and dropout. The machine learning code only works in with arithmetic machines, that is, you cannot compile it with ``-B``. -In the following we will walk through the example code in -``keras_mnist_dense.mpc``, which trains a dense neural network for +This document explains how to input data, how to train a model, and +how to use an existing model for prediction. + + +Data Input +~~~~~~~~~~ + +It's easiest to input data if it's available during compilation, +either centrally or per party. Another way is to only define the data +size in the high-level code and put the data independently into the +right files used by the virtual machine. + + +Integrated Data Input +===================== + +If the data is available during compilation, for example as a PyTorch +or numpy tensor, you can use +:py:func:`Compiler.types.sfix.input_tensor_via` and +:py:func:`Compiler.types.sint.input_tensor_via`. Consider the +following code from ``breast_logistic.mpc`` (requiring +`scikit-learn `_):: + + from sklearn.datasets import load_breast_cancer + from sklearn.model_selection import train_test_split + + X, y = load_breast_cancer(return_X_y=True) + + # normalize column-wise + X /= X.max(axis=0) + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + + X_train = sfix.input_tensor_via(0, X_train) + y_train = sint.input_tensor_via(0, y_train) + +This downloads the Wisconsin Breast Cancer dataset, normalizes the +sample data, splits it into a training and a test set, and then +converts it to an the relevant MP-SPDZ data structures. Under the +hood, the data is stored in ``Player-Data/Input-Binary-P0-0``, which +is where binary-encoded inputs for player 0 are read from. You +therefore have to copy said file if you execute it in another place +than where you compiled it. + +MP-SPDZ also allows splitting the data input between parties, for +example horizontally:: + + a = sfix.input_tensor_via(0, X_train[len(X_train) // 2:]) + b = sfix.input_tensor_via(1, X_train[:len(X_train) // 2]) + X_train = a.concat(b) + + a = sint.input_tensor_via(0, y_train[len(y_train) // 2:]) + b = sint.input_tensor_via(1, y_train[:len(y_train) // 2]) + y_train = a.concat(b) + +The concatenation creates a unified secret tensor that can be used for +training over the whole dataset. Similarly, you can split a dataset +vertically:: + + a = sfix.input_tensor_via(0, X_train[:,:X_train.shape[1] // 2]) + b = sfix.input_tensor_via(1, X_train[:,X_train.shape[1] // 2:]) + X_train = a.concat_columns(b) + +The three approaches in this section can be run as follows:: + + Scripts/emulate-run.py -E ring breast_logistic + Scripts/emulate-run.py -E ring breast_logistic horizontal + Scripts/emulate-run.py -E ring breast_logistic vertical + +In the last variants, the labels are all input via party 0. + +Finally, MP-SPDZ also facilitates inputting data that is also +available party by party. Party 0 can run:: + + a = sfix.input_tensor_via(0, X_train[:,:X_train.shape[1] // 2]) + b = sfix.input_tensor_via(1, shape=X_train[:,X_train.shape[1] // 2:].shape) + X_train = a.concat_columns(b) + y_train = sint.input_tensor_via(0, y_train) + +while party 1 runs:: + + a = sfix.input_tensor_via(0, shape=X_train[:,:X_train.shape[1] // 2].shape) + b = sfix.input_tensor_via(1, X_train[:,X_train.shape[1] // 2:]) + X_train = a.concat_columns(b) + y_train = sint.input_tensor_via(0, shape=y_train.shape) + +Note that that the respective party only accesses the shape of data +they don't input. + +You can run this case by running on one hand: + +.. code-block:: console + + ./compile.py breast_logistic party0 + ./semi-party.x 0 breast_logistic-party0 + +and on the other (but on the same host): + +.. code-block:: console + + ./compile.py breast_logistic party1 + ./semi-party.x 1 breast_logistic-party1 + +The compilation will output a hash at the end, which has to agree +between the parties. Otherwise the virtual machine will abort with an +error message. To run the two parties on different hosts, use the +:ref:`networking options `. + + +Data preprocessing +"""""""""""""""""" + +Sometimes it's necessary to preprocess data. We're using the following +code from ``torch_mnist_dense.mpc`` to demonstrate this:: + + ds = torchvision.datasets.MNIST(root='/tmp', train=train, download=True) + # normalize to [0,1] before input + samples = sfix.input_tensor_via(0, ds.data / 255) + labels = sint.input_tensor_via(0, ds.targets, one_hot=True) + +This downloads the default training or the test set of MNIST +(depending on :py:obj:`train`) and then processes it to make it +usable. The sample data is normalized from an 8-bit integer to the +interval :math:`[0,1]` by dividing by 255. This is done within PyTorch +for efficiency. Then, the labels are encoded as one-hot vectors +because this is necessary for multi-label training in MP-SPDZ. + + +Independent Data Input +====================== + +The example code in +``keras_mnist_dense.mpc`` trains a dense neural network for MNIST. It starts by defining tensors to hold data:: training_samples = sfix.Tensor([60000, 28, 28]) @@ -28,8 +169,122 @@ is used by ``convert.sh`` in `the preparation code test_labels.input_from(0) test_samples.input_from(0) -This is followed by Keras-like code setting up the model and training -it:: +The virtual machine then expect the data as whitespace-separated text +in ``Player-Data/Input-P0-0``. If you use ``binary=True`` with +:py:func:`input_from`, the input is expected in +``Player-Data/Input-Binary-P0-0``, value by value as single-precision +float or 64-bit integer in the machine byte order (most likely +little-endian these days). + + +Training +~~~~~~~~ + +There are a number of interfaces for different algorithms. + + +Logistic regression with SGD +============================ + +This is available via :py:class:`~Compiler.ml.SGDLogistic`. We will +use ``breast_logistic.mpc`` as an example. + +After inputting the data as above, you can call the following:: + + log = ml.SGDLogistic(20, 2, program) + log.fit(X_train, y_train) + +This trains a logistic regression model in secret for 20 epochs with +mini-batches of size 2. Adding the :py:obj:`program` object as a +parameter uses further command-line parameters. Most notably, you can +add ``approx`` to use a three-piece approximate sigmoid function: + +.. code-block:: console + + Scripts/compile-emulate.py breast_logistic approx + +Omitting it invokes the default sigmoid function. + +To check accuracy during training, you can call the following instead +of :py:func:`~Compiler.ml.SGDLogistic.fit`:: + + log.fit_with_testing(X_train, y_train, X_test, y_test) + +This outputs losses and accuracy for both the training and test set +after every epoch. + +You can use :py:func:`~Compiler.ml.SGDLogistic.predict` to predict +labels and :py:func:`~Compiler.ml.SGDLogistic.predict_proba` to +predict probabilities. The following outputs the correctness (0 for +correct, :math:`\pm 1` for incorrect) and a measure of how much off +the probability estimate is:: + + print_ln('%s', (log.predict(X_test) - y_test.get_vector()).reveal()) + print_ln('%s', (log.predict_proba(X_test) - y_test.get_vector()).reveal()) + + +Linear regression with SGD +========================== + +This is available via :py:class:`~Compiler.ml.SGDLinear`. It +implements an interface similar to logistic regression. The main +difference is that there is only +:py:func:`~Compiler.ml.SGDLinear.predict` for prediction as there is +no notion of labels in this case. See ``diabetes.mpc`` for an example +of linear regression. + + +PyTorch interface +================= + +MP-SPDZ supports importing sequential models from PyTorch as shown in +this code snippet in ``torch_mnist_dense.mpc``:: + + import torch.nn as nn + + net = nn.Sequential( + nn.Flatten(), + nn.Linear(28 * 28, 128), + nn.ReLU(), + nn.Linear(128, 128), + nn.ReLU(), + nn.Linear(128, 10) + ) + + from Compiler import ml + + ml.set_n_threads(int(program.args[2])) + + layers = ml.layers_from_torch(net, training_samples.shape, 128) + + optimizer = ml.SGD(layers) + optimizer.fit( + training_samples, + training_labels, + epochs=int(program.args[1]), + batch_size=128, + validation_data=(test_samples, test_labels), + program=program + ) + +This trains a network with three dense layers on MNIST using SGD, +softmax, and cross-entropy loss. The number of epochs and threads is +taken from the command line. For example, the following trains the +network for 10 epochs using 4 threads:: + + Scripts/compile-emulate.py torch_mnist_dense 10 4 + +See ``Programs/Source/torch_*.mpc`` for further examples of the +PyTorch functionality, :py:func:`~Compiler.ml.Optimizer.fit` for +further training options, and :py:class:`~Compiler.ml.Adam` for an +alternative Optimizer. + + +Keras interface +=============== + +The following Keras-like code sets up a model with three dense layers +and then trains it:: from Compiler import ml tf = ml @@ -55,28 +310,209 @@ it:: validation_data=(test_samples, test_labels) ) -Lastly, the model is stored on disk in secret-shared form:: - for var in model.trainable_variables: - var.write_to_file() +Decision trees +============== +MP-SPDZ can train decision trees for binary labels by using the +algorithm by `Hamada et al.`_ The following example in +``breast_tree.mpc`` trains a tree of height five before outputting the +difference between the prediction on a test set and the ground truth:: -Prediction -~~~~~~~~~~ + from Compiler.decision_tree import TreeClassifier + tree = TreeClassifier(max_depth=5) + tree.fit(X_train, y_train) + print_ln('%s', (tree.predict(X_test) - y_test.get_vector()).reveal()) + +You can run the example as follows: + +.. code-block:: console + + Scripts/compile-emulate.py breast_tree + +It is also possible to output the accuracy after every level:: + + tree.fit_with_testing(X_train, y_train, X_test, y_test) + +You can output the trained tree as follows:: + + tree.output() + +The format of the output follows the description of `Hamada et al.`_ + +MP-SPDZ by default uses probabilistic rounding for fixed-point +division, which is used to compute Gini coefficients in decision tree +training. This has the effect that the tree isn't deterministic. You +can switch to deterministic rounding as follows:: + + sfix.round_nearest = True + +The ``breast_tree.mpc`` uses the following code to allow switching on +the command line:: + + sfix.set_precision_from_args(program) + +Nearest rounding can then be activated as follows: + +.. code-block:: console + + Scripts/compile-emulate.py breast_tree nearest + +.. _`Hamada et al.`: https://arxiv.org/abs/2112.12906 + + +Data preparation +"""""""""""""""" + +MP-SPDZ currently support continuous and binary attributes but not +discrete non-binary attributes. However, such attributes can be +converted as follows using the `pandas `_ +library:: + + import pandas + from sklearn.model_selection import train_test_split + from Compiler import decision_tree + + data = pandas.read_csv( + 'https://datahub.io/machine-learning/adult/r/adult.csv') -The example code in ``keras_mnist_dense_predict.mpc`` uses the model -stored above for prediction. Much of the setup is the same, but + data, attr_types = decision_tree.preprocess_pandas(data) + + # label is last column + X = data[:,:-1] + y = data[:,-1] + + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) + +This downloads the adult dataset and convert discrete attributes to +binary using one-hot encoding. See ``easy_adult`` for the full +example. :py:obj:`attr_types` has to be used to indicates the +attribute types during training:: + + tree.fit(X_train, y_train, attr_types=attr_types) + + +Loading pre-trained models +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is possible to import pre-trained from PyTorch as shown in +``torch_mnist_lenet_predict.mpc``:: + + net = nn.Sequential( + nn.Conv2d(1, 20, 5), + nn.ReLU(), + nn.MaxPool2d(2), + nn.Conv2d(20, 50, 5), + nn.ReLU(), + nn.MaxPool2d(2), + nn.Flatten(), + nn.ReLU(), + nn.Linear(800, 500), + nn.ReLU(), + nn.Linear(500, 10) + ) + + # train for a bit + transform = torchvision.transforms.Compose( + [torchvision.transforms.ToTensor()]) + ds = torchvision.datasets.MNIST(root='/tmp', transform=transform, train=True) + optimizer = torch.optim.Adam(net.parameters(), amsgrad=True) + criterion = nn.CrossEntropyLoss() + + for i, data in enumerate(torch.utils.data.DataLoader(ds, batch_size=128)): + inputs, labels = data + optimizer.zero_grad() + outputs = net(inputs) + loss = criterion(outputs, labels) + loss.backward() + optimizer.step() + +This trains LeNet on MNIST for one epoch. The model can then be input +and used in MP-SPDZ:: + + from Compiler import ml + layers = ml.layers_from_torch(net, training_samples.shape, 128, input_via=0) + optimizer = ml.Optimizer(layers) + n_correct, loss = optimizer.reveal_correctness(test_samples, test_labels, 128, running=True) + print_ln('Secure accuracy: %s/%s', n_correct, len(test_samples)) + +This outputs the accuracy of the network. + + +Storing and loading models +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Both the Keras interface and the native +:py:class:`~Compiler.ml.Optimizer` class support an interface to +iterate through all model parameters. The following code from +``torch_mnist_dense.mpc`` uses it to store the model on disk in +secret-shared form:: + + for var in optimizer.trainable_variables: + var.write_to_file() + +The example code in ``torch_mnist_dense_predict.mpc`` then uses the +model stored above for prediction. Much of the setup is the same, but instead of training it reads the model from disk:: - model.build(test_samples.sizes) + optimizer = ml.Optimizer(layers) start = 0 - for var in model.trainable_variables: + for var in optimizer.trainable_variables: start = var.read_from_file(start) -Then it runs the prediction:: +Then it runs the accuracy test:: - guesses = model.predict(test_samples) + n_correct, loss = optimizer.reveal_correctness(test_samples, test_labels, 128) + print_ln('Accuracy: %s/%s', n_correct, len(test_samples)) Using ``var.input_from(player)`` instead the model would be input privately by a party. + + +Exporting models +~~~~~~~~~~~~~~~~ + +Models can be exported as follows:: + + optimizer.reveal_model_to_binary() + +if :py:obj:`optimizer` is an instance of +:py:class:`Compiler.ml.Optimizer`. The model parameters are then +stored in ``Player-Data/Binary-Output-P-0``. They can be +imported for use in PyTorch:: + + f = open('Player-Data/Binary-Output-P0-0') + + state = net.state_dict() + + for name in state: + shape = state[name].shape + size = numpy.prod(shape) + var = numpy.fromfile(f, 'double', count=size) + var = var.reshape(shape) + state[name] = torch.Tensor(var) + + net.load_state_dict(state) + +if :py:obj:`net` is a PyTorch module with the correct meta-parameters. +This demonstrates that the parameters are stored with double precision +in the canonical order. + +There are a number of scripts in ``Scripts``, namely +``torch_cifar_alex_import.py``, ``torch_mnist_dense_import.py``, and +``torch_mnist_lenet_import.py``, which import the models output by +``torch_alex_test.mpc``, ``torch_mnist_dense.mpc``, and +``torch_mnist_lenet_predict.mpc``. For example you can run: + +.. code-block:: console + + $ Scripts/compile-emulate.py torch_mnist_lenet_predict + ... + Secure accuracy: 9822/10000 + ... + $ Scripts/torch_mnist_lenet_import.py + Test accuracy of the network: 98.22 % + +The accuracy values might vary as the model is freshly trained, but +they should match. diff --git a/doc/ml-quickstart.rst b/doc/ml-quickstart.rst new file mode 100644 index 000000000..f6114378c --- /dev/null +++ b/doc/ml-quickstart.rst @@ -0,0 +1,92 @@ +.. _ml-quickstart: + +Machine Learning Quickstart +--------------------------- + +This document is a short introduction to running privacy-preserving +logistic regression in MP-SPDZ. It assumes that you have the framework +already installed as explained in the `installation instructions +`_. +For more information on how to run machine learning algorithms in MP-SPDZ, +see the `full machine learning section +`_. + +The easiest way to use is to put Python code in an ``.mpc`` in +``Programs/Source``, for example ``Programs/Source/foo.mpc``. Put the +following code there to use the breast cancer dataset:: + + X = sfix.input_tensor_via(0, [[1, 2, 3], # 2 samples + [11, 12, 13]]) + y = sint.input_tensor_via(0, [0, 1]) # 2 labels + + from Compiler import ml + log = ml.SGDLogistic(100) + log.fit(X, y) + + print_ln('%s', log.predict(X).reveal()) + +The first two lines make the data available to the secure +computation. The next lines create a logistic regression instance and +train it (for one hundred epochs). Finally, the last line uses the +instances for predictions and outputs the results. + +After adding all the above code to ``Programs/Source/foo.mpc``, you +can run it either insecurely: + +.. code-block:: console + + Scripts/compile-emulate.py foo + +or securely with three parties on the same machine: + +.. code-block:: console + + Scripts/compile-run.py -E ring foo + +The first call should give the following output: + +.. code-block:: console + + $ Scripts/compile-emulate.py foo + Default bit length: 63 + Default security parameter: 40 + Compiling file Programs/Source/foo.mpc + Writing binary data to Player-Data/Input-Binary-P0-0 + Setting learning rate to 0.01 + Using SGD + Initializing dense weights in [-1.224745,1.224745] + Writing to Programs/Bytecode/foo-multithread-1.bc + 2 runs per epoch + Writing to Programs/Bytecode/foo-multithread-3.bc + Writing to Programs/Bytecode/foo-multithread-4.bc + Writing to Programs/Bytecode/foo-multithread-5.bc + Initializing dense weights in [-1.224745,1.224745] + Writing to Programs/Bytecode/foo-multithread-7.bc + Writing to Programs/Bytecode/foo-multithread-8.bc + Writing to Programs/Bytecode/foo-multithread-9.bc + Writing to Programs/Schedules/foo.sch + Writing to Programs/Bytecode/foo-0.bc + Hash: 33f8d22d99960897f41fb2da31e7f5a0501d2e1071789e52d73b4043e5343831 + Program requires at most: + 8 integer inputs from player 0 + 61054 integer bits + 190109 integer triples + 200 matrix multiplications (1x3 * 3x1) + 200 matrix multiplications (3x1 * 1x1) + 1 matrix multiplications (2x3 * 3x1) + 28406 virtual machine rounds + Using security parameter 40 + Trying to run 64-bit computation + Using SGD + done with epoch 99 + [0, 1] + The following benchmarks are including preprocessing (offline phase). + Time = 0.0250086 seconds + +See `the documentation +`_ +for further +options such as different protocols or running remotely and `the +machine learning section +`_ for +other machine learning methods. diff --git a/doc/networking.rst b/doc/networking.rst index c7e031f10..1ec9e1581 100644 --- a/doc/networking.rst +++ b/doc/networking.rst @@ -1,3 +1,5 @@ +.. _networking: + Networking ---------- diff --git a/doc/troubleshooting.rst b/doc/troubleshooting.rst index 6a32bd37e..f2295bc9e 100644 --- a/doc/troubleshooting.rst +++ b/doc/troubleshooting.rst @@ -25,6 +25,16 @@ lists only exists at compile time. Consider using :py:class:`~Compiler.types.Array`. +Local variable referenced before assignment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This error can occur if you try to reassign a variable in a run-time +loop like :py:func:`~Compiler.library.for_range`. Use +:py:func:`~Compiler.program.Tape.Register.update` instead of assignment. See +:py:func:`~Compiler.library.for_range` for an example. +You can also use :py:func:`~Compiler.types.sint.iadd` instead of ``+=``. + + ``compile.py`` takes too long or runs out of memory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -35,6 +45,16 @@ resulting in potentially too much virtual machine code. Consider using version. +Incorrect results when using :py:class:`~Compiler.types.sfix` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is most likely caused by an overflow of the precision +parameters because the default choice unlike accommodates numbers up +to around 16,000. See :py:class:`~Compiler.types.sfix` for an +introduction and :py:func:`~Compiler.types.sfix.set_precision` for how +to change the precision. + + Order of memory instructions not preserved ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~