Skip to content

Commit

Permalink
Improve multi-thread mode
Browse files Browse the repository at this point in the history
  • Loading branch information
xevisalle committed Nov 4, 2023
1 parent bd24ced commit b028643
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 255 deletions.
13 changes: 6 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ LIBMAC = /opt/homebrew/lib/libgmp.a /opt/homebrew/opt/libomp/lib/libomp.a /opt/h
LIBCROSS = $(MCLPATH)/lib/libmclbn384_256.a $(MCLPATH)/lib/libmcl.a $(GMPPATH)/lib/libgmp.a -I $(MCLPATH)/include -I $(GMPPATH)/include -lstdc++
SRC = $(shell pwd)/src/*.c $(shell pwd)/circuits/*.c $(shell pwd)/src/*.h

MULEXP = MCL_MULEXP
CURVE = BN128
ARCH = None
MULTI = off
Expand All @@ -22,22 +21,22 @@ endif

zpie: $(SRC)
ifeq ($(ARCH), x86)
$(CC) -m32 $(COMMON) $(LIBCROSS) -D $(MULEXP) -D $(CURVE) $(MULTI_SET)
$(CC) -m32 $(COMMON) $(LIBCROSS) -D $(CURVE) $(MULTI_SET)

else ifeq ($(ARCH), x86_64)
$(CC) -m64 $(COMMON) $(LIBCROSS) -D $(MULEXP) -D $(CURVE) $(MULTI_SET)
$(CC) -m64 $(COMMON) $(LIBCROSS) -D $(CURVE) $(MULTI_SET)

else ifeq ($(ARCH), aarch64)
$(CAARCH64) $(COMMON) $(LIBCROSS) -D $(MULEXP) -D $(CURVE) $(MULTI_SET)
$(CAARCH64) $(COMMON) $(LIBCROSS) -D $(CURVE) $(MULTI_SET)

else ifeq ($(ARCH), arm)
$(CARM) $(COMMON) $(LIBCROSS) -D $(MULEXP) -D $(CURVE) $(MULTI_SET)
$(CARM) $(COMMON) $(LIBCROSS) -D $(CURVE) $(MULTI_SET)

else ifeq ($(shell uname), Darwin)
$(CC) $(COMMON) $(LIBMAC) -D $(MULEXP) -D $(CURVE) $(MULTI_SET)
$(CC) $(COMMON) $(LIBMAC)-D $(CURVE) $(MULTI_SET)

else
$(CC) $(COMMON) $(LIB) -D $(MULEXP) -D $(CURVE) $(MULTI_SET)
$(CC) $(COMMON) $(LIB) -D $(CURVE) $(MULTI_SET)

endif
test:
Expand Down
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ ZPiE needs [GMP](https://gmplib.org/) and [MCL](https://github.com/herumi/mcl).
sudo apt install libgmp-dev libcunit1-dev
git clone https://github.com/herumi/mcl
cd mcl
make -j4
make -j8
```

If willing to use the multi-thread execution, compile MCL using `make -j8 MCL_USE_OMP=1`.

## Test
ZPiE can be tested as follows:

Expand Down Expand Up @@ -57,7 +59,7 @@ BN128 (default)
BLS12_381
```

We can specify to run the code in multi-thread mode:
We can specify to run the code in multi-thread mode (if MCL was compiled accordingly):

```
make bench MULTI=on
Expand Down
7 changes: 1 addition & 6 deletions src/common/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,7 @@ void bos_coster_bp(mclBnG1 *chunk, mclBnG1 *points, mclBnFr *scalars, int heapsi

static inline void mult_exp(mclBnG1 *chunk, mclBnG1 *points, mclBnFr *scalars, int heapsize)
{
#ifdef BOSCOSTER_MULEXP
if ((heapsize > 32) && ((heapsize != 0) && ((heapsize & (heapsize - 1)) == 0))) bos_coster_bp(chunk, points, scalars, heapsize);
else mclBnG1_mulVec(chunk, points, scalars, heapsize);
#elif MCL_MULEXP
mclBnG1_mulVec(chunk, points, scalars, heapsize);
#endif
mclBnG1_mulVec(chunk, points, scalars, heapsize);
}

char *to_hex(const unsigned char *array, size_t length)
Expand Down
248 changes: 8 additions & 240 deletions src/gro16/prover.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,158 +96,6 @@ void mul_exp(struct mulExpResult *result, mpz_t *uwProof, proving_key pk)
{
int n = mpz_get_ui(pk.Ne);

int totTh = 16;

for (int i = nConst; i < nPublic + nConst; i++)
{
mpz_set(uwProof[i-nConst], uw[i]);
}

#ifdef MULTI_SET
mclBnG1 htdeltaTh[totTh];

#pragma omp parallel num_threads(totTh)
{
mpz_t *exp[n/totTh];
mclBnFr frFactor;
int th = omp_get_thread_num();

for (int i = th*(n/totTh); i < (th+1)*(n/totTh); i++)
{
fr_to_mpz(&wM[i], &AsFr[i]);
exp[i-(th*(n/totTh))] = &wM[i];
}

bos_coster(exp, n/totTh, 1, &pk);
mpz_to_fr(&frFactor, exp[0]);
mclBnG1_mul(&htdeltaTh[th], &pk.xt1_rand[exp[0]-wM], &frFactor);
}

mclBnG1_add(&result->htdelta, &htdeltaTh[0], &htdeltaTh[1]);

for (int i = 2; i < totTh; i++)
{
mclBnG1_add(&result->htdelta, &result->htdelta, &htdeltaTh[i]);
}

mclBnG1 thA1[totTh];
mclBnG1 thB1[totTh];
mclBnG2 thB2[totTh];
mclBnG1 thC1[totTh];

#pragma omp parallel num_threads(totTh)
{
int sizeM;
int end;
int th = omp_get_thread_num();

if (th == totTh-1)
{
sizeM = (M - (totTh*(M/totTh))) + (M/totTh);
end = M;
}
else
{
sizeM = M/totTh;
end = (th+1)*(M/totTh);
}

mclBnFr frFactor;
mpz_t *exp[sizeM];

int start = th*(M/totTh);

for (int i = start; i < end; i++)
{
exp[i-start] = &uw[i];
}

bos_coster(exp, sizeM, 0, &pk);

mpz_to_fr(&frFactor, exp[0]);
mclBnG1_mul(&thA1[th], &pk.A1[exp[0]-uw], &frFactor);
mclBnG1_mul(&thB1[th], &pk.B1[exp[0]-uw], &frFactor);
mclBnG2_mul(&thB2[th], &pk.B2[exp[0]-uw], &frFactor);
mclBnG1_mul(&thC1[th], &pk.pk1[exp[0]-uw], &frFactor);
}

mclBnG1_add(&result->uwA1, &thA1[0], &thA1[1]);
mclBnG1_add(&result->uwB1, &thB1[0], &thB1[1]);
mclBnG2_add(&result->uwB2, &thB2[0], &thB2[1]);
mclBnG1_add(&result->uwC1, &thC1[0], &thC1[1]);

for (int i = 2; i < totTh; i++)
{
mclBnG1_add(&result->uwA1, &result->uwA1, &thA1[i]);
mclBnG1_add(&result->uwB1, &result->uwB1, &thB1[i]);
mclBnG2_add(&result->uwB2, &result->uwB2, &thB2[i]);
mclBnG1_add(&result->uwC1, &result->uwC1, &thC1[i]);
}

#else
mpz_t *exp[n];
mclBnFr frFactor;
proving_key bpk;
bpk.xt1 = (mclBnG1*) malloc((n) * sizeof(mclBnG1));

for (int i = 0; i < n; i++)
{
fr_to_mpz(&wM[i], &AsFr[i]);
exp[i] = &wM[i];
mclBnG1_add(&bpk.xt1[i], &bpk.xt1[i], &pk.xt1_rand[i]);
}

bos_coster(exp, n, 1, &bpk);
mpz_to_fr(&frFactor, exp[0]);
mclBnG1_mul(&result->htdelta, &bpk.xt1[exp[0]-wM], &frFactor);

/*mpz_t *expM[M];
bpk.A1 = (mclBnG1*) malloc((M) * sizeof(mclBnG1));
bpk.B1 = (mclBnG1*) malloc((M) * sizeof(mclBnG1));
bpk.B2 = (mclBnG2*) malloc((M) * sizeof(mclBnG2));
bpk.pk1 = (mclBnG1*) malloc((M) * sizeof(mclBnG1));
#pragma omp parallel for
for (int i = 0; i < M; i++)
{
expM[i] = &uw[i];
mclBnG1_add(&bpk.A1[i], &bpk.A1[i], &pk.A1[i]);
mclBnG1_add(&bpk.B1[i], &bpk.B1[i], &pk.B1[i]);
mclBnG2_add(&bpk.B2[i], &bpk.B2[i], &pk.B2[i]);
mclBnG1_add(&bpk.pk1[i], &bpk.pk1[i], &pk.pk1[i]);
}
bos_coster(expM, M, 0, &bpk);
mpz_to_fr(&frFactor, expM[0]);
mclBnG1_mul(&result->uwA1, &bpk.A1[expM[0]-uw], &frFactor);
mclBnG1_mul(&result->uwB1, &bpk.B1[expM[0]-uw], &frFactor);
mclBnG2_mul(&result->uwB2, &bpk.B2[expM[0]-uw], &frFactor);
mclBnG1_mul(&result->uwC1, &bpk.pk1[expM[0]-uw], &frFactor);*/

// to be replaced ---->
mclBnFr uwFactor[M];
mclBnFr uwFactorPublic[M-(nPublic + nConst)];

#pragma omp parallel for
for (int i = 0; i < M; i++)
{
mpz_to_fr(&uwFactor[i], &uw[i]);
if(i >= (nPublic + nConst)) mpz_to_fr(&uwFactorPublic[i-(nPublic + nConst)], &uw[i]);
}

mclBnG1_mulVec(&result->uwA1, pk.A1, uwFactor, M);
mclBnG1_mulVec(&result->uwB1, pk.B1, uwFactor, M);
mclBnG2_mulVec(&result->uwB2, pk.B2, uwFactor, M);
mclBnG1_mulVec(&result->uwC1, pk.pk1, uwFactorPublic, M-(nPublic + nConst));
// <------ to be replaced
#endif
}

void mcl_mul_exp(struct mulExpResult *result, mpz_t *uwProof, proving_key pk)
{
int n = mpz_get_ui(pk.Ne);

mclBnFr uwFactor[M];
mclBnFr uwFactorPublic[M-(nPublic + nConst)];

Expand All @@ -263,75 +111,13 @@ void mcl_mul_exp(struct mulExpResult *result, mpz_t *uwProof, proving_key pk)
if(i >= (nPublic + nConst)) mpz_to_fr(&uwFactorPublic[i-(nPublic + nConst)], &uw[i]);
}

#pragma omp parallel num_threads(5)
{
switch (get_thread())
{
case 0: mclBnG1_mulVec(&result->uwA1, pk.A1, uwFactor, M); break;
case 1: mclBnG1_mulVec(&result->uwB1, pk.B1, uwFactor, M); break;
case 2: mclBnG2_mulVec(&result->uwB2, pk.B2, uwFactor, M); break;
case 3: mclBnG1_mulVec(&result->uwC1, pk.pk1, uwFactorPublic, M-(nPublic + nConst)); break;
case 4: mclBnG1_mulVec(&result->htdelta, pk.xt1_rand, AsFr, n); break;
case 99:
mclBnG1_mulVec(&result->uwA1, pk.A1, uwFactor, M);
mclBnG1_mulVec(&result->uwB1, pk.B1, uwFactor, M);
mclBnG2_mulVec(&result->uwB2, pk.B2, uwFactor, M);
mclBnG1_mulVec(&result->uwC1, pk.pk1, uwFactorPublic, M-(nPublic + nConst));
mclBnG1_mulVec(&result->htdelta, pk.xt1_rand, AsFr, n);
break;
}
}
}
int num_threads = get_nprocs();

void naive_mul_exp(struct mulExpResult *result, mpz_t *uwProof, proving_key pk)
{
int n = mpz_get_ui(pk.Ne);

mclBnFr frFactor[M];
for (int i = nConst; i < (nPublic + nConst); i++)
{
mpz_set(uwProof[i-nConst], uw[i]);
}

#pragma omp parallel for
for (int i = 0; i < M; i++)
{
mpz_to_fr(&frFactor[i], &uw[i]);
// Auw = Auw + u[i] * s1.A[i];
mclBnG1_mul(&pk.A1[i], &pk.A1[i], &frFactor[i]);
// B1uw = B1uw + u[i] * s1.B[i];
mclBnG1_mul(&pk.B1[i], &pk.B1[i], &frFactor[i]);
// B2uw = B2uw + u[i] * s2.B[i];
mclBnG2_mul(&pk.B2[i], &pk.B2[i], &frFactor[i]);
// Cw = Cw + w[i] * s1.pk[i];
if(i < M-(nPublic + nConst)) mclBnG1_mul(&pk.pk1[i], &pk.pk1[i], &frFactor[i]);
}

mclBnG1_clear(&result->uwA1);
mclBnG1_clear(&result->uwB1);
mclBnG2_clear(&result->uwB2);
mclBnG1_clear(&result->uwC1);

for (int i = M; i--;)
{
mclBnG1_add(&result->uwA1, &result->uwA1, &pk.A1[i]);
mclBnG1_add(&result->uwB1, &result->uwB1, &pk.B1[i]);
mclBnG2_add(&result->uwB2, &result->uwB2, &pk.B2[i]);
if(i < M-(nPublic + nConst)) mclBnG1_add(&result->uwC1, &result->uwC1, &pk.pk1[i]);
}

#pragma omp parallel for
for (int i = 0; i < n; i++)
{
mclBnG1_mul(&pk.xt1_rand[i], &pk.xt1_rand[i], &AsFr[i]);
}

mclBnG1_clear(&result->htdelta);

for (int i = n; i--;)
{
mclBnG1_add(&result->htdelta, &result->htdelta, &pk.xt1_rand[i]);
}
mclBnG1_mulVecMT(&result->uwA1, pk.A1, uwFactor, M, num_threads);
mclBnG1_mulVecMT(&result->uwB1, pk.B1, uwFactor, M, num_threads);
mclBnG2_mulVecMT(&result->uwB2, pk.B2, uwFactor, M, num_threads);
mclBnG1_mulVecMT(&result->uwC1, pk.pk1, uwFactorPublic, M-(nPublic + nConst), num_threads);
mclBnG1_mulVecMT(&result->htdelta, pk.xt1_rand, AsFr, n, num_threads);
}

void prove(int *circuit, mclBnG1 *piA, mclBnG2 *piB2, mclBnG1 *piC, mpz_t *uwProof, proving_key pk)
Expand Down Expand Up @@ -363,31 +149,13 @@ void prove(int *circuit, mclBnG1 *piA, mclBnG2 *piB2, mclBnG1 *piC, mpz_t *uwPro
clock_gettime(CLOCK_MONOTONIC, &begin);

struct mulExpResult result;

#ifdef AUTO_MULEXP
if(M > 1000) mul_exp(&result, uwProof, pk);
else mcl_mul_exp(&result, uwProof, pk);
#elif BOSCOSTER_MULEXP
mul_exp(&result, uwProof, pk);
#elif NAIVE_MULEXP
naive_mul_exp(&result, uwProof, pk);
#elif MCL_MULEXP
mcl_mul_exp(&result, uwProof, pk);
#endif
mul_exp(&result, uwProof, pk);

clock_gettime(CLOCK_MONOTONIC, &end);
elapsed = (end.tv_sec - begin.tv_sec);
elapsed += (end.tv_nsec - begin.tv_nsec) / 1000000000.0;
if (bench) printf(" |--- G1, G2 multiexponentiations: [%fs]\n", elapsed);
#ifdef MULTI
#elif BOSCOSTER_MULEXP
if (bench) printf(" |--- Bos-Coster: [%fs]\n", elapsedBosCoster);
if (bench) printf(" |--- Heap sorting: [%fs]\n", elapsedSort);
#elif AUTO_MULEXP
if (bench) printf(" |--- Bos-Coster: [%fs]\n", elapsedBosCoster);
if (bench) printf(" |--- Heap sorting: [%fs]\n", elapsedSort);
#endif

if (bench) printf(" |--- G1, G2 multiexponentiations: [%fs]\n", elapsed);
log_message("Computing piA, piB1, piB2, piC, htdelta...");

mclBnG1 piB1;
Expand Down

0 comments on commit b028643

Please sign in to comment.