diff --git a/include/helib/EncryptedArray.h b/include/helib/EncryptedArray.h index 9359feb19..d34b09ab4 100644 --- a/include/helib/EncryptedArray.h +++ b/include/helib/EncryptedArray.h @@ -2651,7 +2651,7 @@ inline void totalSums(Ctxt& ctxt) //! @brief Map all non-zero slots to 1, leaving zero slots as zero. //! Assumes that r=1, and that all the slots contain elements from GF(p^d). -void mapTo01(const EncryptedArray& ea, Ctxt& ctxt); +void mapTo01(const EncryptedArray& ea, Ctxt& ctxt, bool multithread = true); // Implemented in eqtesting.cpp. We compute // x^{p^d-1} = x^{(1+p+...+p^{d-1})*(p-1)} // by setting y=x^{p-1} and then outputting y * y^p * ... * y^{p^{d-1}}, diff --git a/misc/psi/io/io.h b/misc/psi/io/io.h index ee0059bf7..abfed3382 100644 --- a/misc/psi/io/io.h +++ b/misc/psi/io/io.h @@ -76,11 +76,14 @@ helib::Database readDbFromFile(const std::string& databaseFilePath, } } } else { // Ctxt query - for (long i = 0; i < nrow; ++i) { - for (long j = 0; j < ncol; ++j) { - reader.value().readDatum(data(i, j), i, j); - } + NTL_EXEC_RANGE(nrow * ncol, first, last) + Reader threadReader(reader.value()); + for (long i = first; i < last; ++i) { + long row = i / ncol; + long col = i % ncol; + threadReader.readDatum(data(row, col), row, col); } + NTL_EXEC_RANGE_END } return helib::Database(data, contextp); @@ -128,11 +131,14 @@ helib::Matrix readQueryFromFile(const std::string& queryFilePath, } } else { // Ctxt query // Read in ctxts - for (long i = 0; i < nrow; ++i) { - for (long j = 0; j < ncol; ++j) { - reader.value().readDatum(query(i, j), i, j); - } + NTL_EXEC_RANGE(nrow * ncol, first, last) + Reader threadReader(reader.value()); + for (long i = first; i < last; ++i) { + long row = i / ncol; + long col = i % ncol; + threadReader.readDatum(query(row, col), row, col); } + NTL_EXEC_RANGE_END if (ncol == 1) { // Transpose to make row vector query.transpose(); } diff --git a/src/eqtesting.cpp b/src/eqtesting.cpp index b310190d5..d11bb1786 100644 --- a/src/eqtesting.cpp +++ b/src/eqtesting.cpp @@ -9,6 +9,21 @@ * See the License for the specific language governing permissions and * limitations under the License. See accompanying LICENSE file. */ + +/* Copyright (C) 2022 Intel Corporation + * SPDX-License-Identifier: Apache-2.0 + * + * Modifying HElib to optimize the 01 map. + * Contributions include + * Modified: + * mapTo01 + * added parallelism to existing logic for norm calculation + * added alternative logic for norm calculation which uses log(d) + * automorphisms on a single core + * added an additional optional argument `multithread` which determines + * which version to run + * + */ /** * @file eqtesting.cpp * @brief Useful functions for equality testing... @@ -17,6 +32,7 @@ #include #include #include +#include #include @@ -29,10 +45,7 @@ namespace helib { // and then outputting y * y^p * ... * y^{p^{d-1}}, with exponentiation to // powers of p done via Frobenius. -// FIXME: the computation of the "norm" y * y^p * ... * y^{p^{d-1}} -// can be done using O(log d) automorphisms, rather than O(d). - -void mapTo01(const EncryptedArray& ea, Ctxt& ctxt) +void mapTo01(const EncryptedArray& ea, Ctxt& ctxt, bool multithread) { long p = ctxt.getPtxtSpace(); if (p != ea.getPAlgebra().getP()) // ptxt space is p^r for r>1 @@ -40,13 +53,39 @@ void mapTo01(const EncryptedArray& ea, Ctxt& ctxt) if (p > 2) ctxt.power(p - 1); // set y = x^{p-1} - long d = ea.getDegree(); - if (d > 1) { // compute the product of the d automorphisms - std::vector v(d, ctxt); - for (long i = 1; i < d; i++) - v[i].frobeniusAutomorph(i); - totalProduct(ctxt, v); + // TODO: investigate this trade off more thoroughly + // Computing in parallel over t threads has runtime approximately + // (d - 1)/t, whereas single thread has runtime approx log(d) + if ((NTL::AvailableThreads() > 1) && multithread) { + // Compute O(d) Frobenius automorphisms in parallel + if (d > 1) { + // compute the d - 1 automorphisms in parallel + std::vector v(d, ctxt); + NTL_EXEC_RANGE(d - 1, first, last) + for (long i = first; i < last; i++) + v[i + 1].frobeniusAutomorph(i + 1); + NTL_EXEC_RANGE_END + // and compute the product of the d automorphisms + totalProduct(ctxt, v); + } + } else { + // Compute of the "norm" y * y^p * ... * y^{p^{d-1}} + // using O(log d) automorphisms, rather than O(d). + long e = 1; + long b = NTL::NumBits(d); + Ctxt orig = ctxt; + for (long i = b - 2; i >= 0; i--) { + Ctxt tmp = ctxt; + tmp.frobeniusAutomorph(e); + ctxt *= tmp; + e *= 2; + if (NTL::bit(d, i)) { + ctxt.frobeniusAutomorph(1); + ctxt *= orig; + e++; + } + } } }