Skip to content

Commit

Permalink
Updates - Frobenius automorphism and multithread reader for PSI I/O
Browse files Browse the repository at this point in the history
Optimizations of Frobenius automorphisms
Multi-threaded reader for PSI iO
  • Loading branch information
faberga authored Dec 6, 2022
2 parents 4ae5466 + fc1dc72 commit d7be6f0
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 19 deletions.
2 changes: 1 addition & 1 deletion include/helib/EncryptedArray.h
Original file line number Diff line number Diff line change
Expand Up @@ -2651,7 +2651,7 @@ inline void totalSums(Ctxt& ctxt)

//! @brief Map all non-zero slots to 1, leaving zero slots as zero.
//! Assumes that r=1, and that all the slots contain elements from GF(p^d).
void mapTo01(const EncryptedArray& ea, Ctxt& ctxt);
void mapTo01(const EncryptedArray& ea, Ctxt& ctxt, bool multithread = true);
// Implemented in eqtesting.cpp. We compute
// x^{p^d-1} = x^{(1+p+...+p^{d-1})*(p-1)}
// by setting y=x^{p-1} and then outputting y * y^p * ... * y^{p^{d-1}},
Expand Down
22 changes: 14 additions & 8 deletions misc/psi/io/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,14 @@ helib::Database<TXT> readDbFromFile(const std::string& databaseFilePath,
}
}
} else { // Ctxt query
for (long i = 0; i < nrow; ++i) {
for (long j = 0; j < ncol; ++j) {
reader.value().readDatum(data(i, j), i, j);
}
NTL_EXEC_RANGE(nrow * ncol, first, last)
Reader<TXT> threadReader(reader.value());
for (long i = first; i < last; ++i) {
long row = i / ncol;
long col = i % ncol;
threadReader.readDatum(data(row, col), row, col);
}
NTL_EXEC_RANGE_END
}

return helib::Database<TXT>(data, contextp);
Expand Down Expand Up @@ -128,11 +131,14 @@ helib::Matrix<TXT> readQueryFromFile(const std::string& queryFilePath,
}
} else { // Ctxt query
// Read in ctxts
for (long i = 0; i < nrow; ++i) {
for (long j = 0; j < ncol; ++j) {
reader.value().readDatum(query(i, j), i, j);
}
NTL_EXEC_RANGE(nrow * ncol, first, last)
Reader<TXT> threadReader(reader.value());
for (long i = first; i < last; ++i) {
long row = i / ncol;
long col = i % ncol;
threadReader.readDatum(query(row, col), row, col);
}
NTL_EXEC_RANGE_END
if (ncol == 1) { // Transpose to make row vector
query.transpose();
}
Expand Down
59 changes: 49 additions & 10 deletions src/eqtesting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,21 @@
* See the License for the specific language governing permissions and
* limitations under the License. See accompanying LICENSE file.
*/

/* Copyright (C) 2022 Intel Corporation
* SPDX-License-Identifier: Apache-2.0
*
* Modifying HElib to optimize the 01 map.
* Contributions include
* Modified:
* mapTo01
* added parallelism to existing logic for norm calculation
* added alternative logic for norm calculation which uses log(d)
* automorphisms on a single core
* added an additional optional argument `multithread` which determines
* which version to run
*
*/
/**
* @file eqtesting.cpp
* @brief Useful functions for equality testing...
Expand All @@ -17,6 +32,7 @@
#include <helib/timing.h>
#include <helib/EncryptedArray.h>
#include <helib/Ptxt.h>
#include <NTL/BasicThreadPool.h>

#include <cstdio>

Expand All @@ -29,24 +45,47 @@ namespace helib {
// and then outputting y * y^p * ... * y^{p^{d-1}}, with exponentiation to
// powers of p done via Frobenius.

// FIXME: the computation of the "norm" y * y^p * ... * y^{p^{d-1}}
// can be done using O(log d) automorphisms, rather than O(d).

void mapTo01(const EncryptedArray& ea, Ctxt& ctxt)
void mapTo01(const EncryptedArray& ea, Ctxt& ctxt, bool multithread)
{
long p = ctxt.getPtxtSpace();
if (p != ea.getPAlgebra().getP()) // ptxt space is p^r for r>1
throw LogicError("mapTo01 not implemented for r>1");

if (p > 2)
ctxt.power(p - 1); // set y = x^{p-1}

long d = ea.getDegree();
if (d > 1) { // compute the product of the d automorphisms
std::vector<Ctxt> v(d, ctxt);
for (long i = 1; i < d; i++)
v[i].frobeniusAutomorph(i);
totalProduct(ctxt, v);
// TODO: investigate this trade off more thoroughly
// Computing in parallel over t threads has runtime approximately
// (d - 1)/t, whereas single thread has runtime approx log(d)
if ((NTL::AvailableThreads() > 1) && multithread) {
// Compute O(d) Frobenius automorphisms in parallel
if (d > 1) {
// compute the d - 1 automorphisms in parallel
std::vector<Ctxt> v(d, ctxt);
NTL_EXEC_RANGE(d - 1, first, last)
for (long i = first; i < last; i++)
v[i + 1].frobeniusAutomorph(i + 1);
NTL_EXEC_RANGE_END
// and compute the product of the d automorphisms
totalProduct(ctxt, v);
}
} else {
// Compute of the "norm" y * y^p * ... * y^{p^{d-1}}
// using O(log d) automorphisms, rather than O(d).
long e = 1;
long b = NTL::NumBits(d);
Ctxt orig = ctxt;
for (long i = b - 2; i >= 0; i--) {
Ctxt tmp = ctxt;
tmp.frobeniusAutomorph(e);
ctxt *= tmp;
e *= 2;
if (NTL::bit(d, i)) {
ctxt.frobeniusAutomorph(1);
ctxt *= orig;
e++;
}
}
}
}

Expand Down

0 comments on commit d7be6f0

Please sign in to comment.