From 77025cc572dc96a90febda5cd70522a102c35c1f Mon Sep 17 00:00:00 2001 From: Heng Zhang Date: Tue, 25 Oct 2022 21:22:43 +0800 Subject: [PATCH] feat: FIELD_eval_h_lookups --- ec-gpu-gen/Cargo.toml | 3 +- ec-gpu-gen/src/cl/fft.cl | 65 ++++++++++++++++++++++++++++++++++++++-- ec-gpu-gen/src/fft.rs | 7 +++-- 3 files changed, 70 insertions(+), 5 deletions(-) diff --git a/ec-gpu-gen/Cargo.toml b/ec-gpu-gen/Cargo.toml index fabde4b..d26fdbc 100644 --- a/ec-gpu-gen/Cargo.toml +++ b/ec-gpu-gen/Cargo.toml @@ -11,7 +11,7 @@ license = "MIT/Apache-2.0" [dependencies] bitvec = "1.0.1" crossbeam-channel = "0.5.1" -ec-gpu = "0.2.0" +ec-gpu = { path = "../ec-gpu" } execute = "0.2.9" ff = { version = "0.12.0", default-features = false } group = "0.12.0" @@ -24,6 +24,7 @@ rust-gpu-tools = { version = "0.6.1", default-features = false, optional = true sha2 = "0.10" thiserror = "1.0.30" yastl = "0.1.2" +ark-std = { version = "0.3", features = ["print-trace"] } [dev-dependencies] # NOTE vmx 2022-07-07: Using the `__private_bench` feature of `blstrs` is just diff --git a/ec-gpu-gen/src/cl/fft.cl b/ec-gpu-gen/src/cl/fft.cl index 856c7ba..a221145 100644 --- a/ec-gpu-gen/src/cl/fft.cl +++ b/ec-gpu-gen/src/cl/fft.cl @@ -1,9 +1,9 @@ /* * FFT algorithm is inspired from: http://www.bealto.com/gpu-fft_group-1.html */ -KERNEL void FIELD_radix_fft(GLOBAL FIELD* x, // Source buffer +KERNEL void FIELD_radix_fft(const GLOBAL FIELD* x, // Source buffer GLOBAL FIELD* y, // Destination buffer - GLOBAL FIELD* pq, // Precalculated twiddle factors + const GLOBAL FIELD* pq, // Precalculated twiddle factors GLOBAL FIELD* omegas, // [omega, omega^2, omega^4, ...] LOCAL FIELD* u_arg, // Local buffer to store intermediary values uint n, // Number of elements @@ -74,3 +74,64 @@ KERNEL void FIELD_mul_by_field(GLOBAL FIELD* elements, const uint gid = GET_GLOBAL_ID(); elements[gid] = FIELD_mul(elements[gid], field); } + +KERNEL void FIELD_eval_h_lookups( + GLOBAL FIELD* value, + GLOBAL FIELD* table, + GLOBAL FIELD* permuted_input_coset, + GLOBAL FIELD* permuted_table_coset, + GLOBAL FIELD* product_coset, + GLOBAL FIELD* l0, + GLOBAL FIELD* l_last, + GLOBAL FIELD* l_active_row, + GLOBAL FIELD* y_beta_gamma, + uint32_t rot_scale, + uint32_t size +) { + uint gid = GET_GLOBAL_ID(); + uint idx = gid; + + uint32_t r_next = (idx + rot_scale) & (size - 1); + uint32_t r_prev = (idx + size - rot_scale) & (size - 1); + + // l_0(X) * (1 - z(X)) = 0 + value[idx] = FIELD_mul(value[idx], y_beta_gamma[0]); + FIELD tmp = FIELD_sub(FIELD_ONE, product_coset[idx]); + tmp = FIELD_mul(tmp, l0[idx]); + value[idx] = FIELD_add(value[idx], tmp); + + // l_last(X) * (z(X)^2 - z(X)) = 0 + value[idx] = FIELD_mul(value[idx], y_beta_gamma[0]); + tmp = FIELD_sqr(product_coset[idx]); + tmp = FIELD_sub(tmp, product_coset[idx]); + tmp = FIELD_mul(tmp, l_last[idx]); + value[idx] = FIELD_add(value[idx], tmp); + + // (1 - (l_last(X) + l_blind(X))) * ( + // z(\omega X) (a'(X) + \beta) (s'(X) + \gamma) + // - z(X) (\theta^{m-1} a_0(X) + ... + a_{m-1}(X) + \beta) + // (\theta^{m-1} s_0(X) + ... + s_{m-1}(X) + \gamma) + // ) = 0 + value[idx] = FIELD_mul(value[idx], y_beta_gamma[0]); + tmp = FIELD_add(permuted_input_coset[idx], y_beta_gamma[1]); + FIELD tmp2 = FIELD_add(permuted_table_coset[idx], y_beta_gamma[2]); + tmp = FIELD_mul(tmp, tmp2); + tmp = FIELD_mul(tmp, product_coset[r_next]); + tmp2 = FIELD_mul(product_coset[idx], table[idx]); + tmp = FIELD_sub(tmp, tmp2); + tmp = FIELD_mul(tmp, l_active_row[idx]); + value[idx] = FIELD_add(value[idx], tmp); + + // l_0(X) * (a'(X) - s'(X)) = 0 + value[idx] = FIELD_mul(value[idx], y_beta_gamma[0]); + tmp2 = FIELD_sub(permuted_input_coset[idx], permuted_table_coset[idx]); + tmp = FIELD_mul(tmp2, l0[idx]); + value[idx] = FIELD_add(value[idx], tmp); + + // (1 - (l_last + l_blind)) * (a′(X) − s′(X))⋅(a′(X) − a′(\omega^{-1} X)) = 0 + value[idx] = FIELD_mul(value[idx], y_beta_gamma[0]); + tmp = FIELD_sub(permuted_input_coset[idx], permuted_input_coset[r_prev]); + tmp = FIELD_mul(tmp, tmp2); + tmp = FIELD_mul(tmp, l_active_row[idx]); + value[idx] = FIELD_add(value[idx], tmp); +} diff --git a/ec-gpu-gen/src/fft.rs b/ec-gpu-gen/src/fft.rs index d3d9ded..9efac8f 100644 --- a/ec-gpu-gen/src/fft.rs +++ b/ec-gpu-gen/src/fft.rs @@ -1,6 +1,7 @@ use std::cmp; use std::sync::{Arc, RwLock}; +use ark_std::{start_timer, end_timer}; use ec_gpu::GpuName; use ff::Field; use log::{error, info}; @@ -18,7 +19,7 @@ pub struct SingleFftKernel<'a, F> where F: Field + GpuName, { - program: Program, + pub program: Program, /// An optional function which will be called at places where it is possible to abort the FFT /// calculations. If it returns true, the calculation will be aborted with an /// [`EcError::Aborted`]. @@ -77,7 +78,9 @@ impl<'a, F: Field + GpuName> SingleFftKernel<'a, F> { } let omegas_buffer = program.create_buffer_from_slice(&omegas)?; + //let timer = start_timer!(|| format!("copy {}", log_n)); program.write_from_buffer(&mut src_buffer, &*input)?; + //end_timer!(timer); // Specifies log2 of `p`, (http://www.bealto.com/gpu-fft_group-1.html) let mut log_p = 0u32; // Each iteration performs a FFT round @@ -130,7 +133,7 @@ pub struct FftKernel<'a, F> where F: Field + GpuName, { - kernels: Vec>, + pub kernels: Vec>, } impl<'a, F> FftKernel<'a, F>