Skip to content

Commit

Permalink
test(benchmark): add CosetLDEBatch benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
batzor committed Aug 13, 2024
1 parent 482a319 commit 2d3611e
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 15 deletions.
6 changes: 5 additions & 1 deletion Cargo.Bazel.lock
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"checksum": "13a3e635e211fd5829278f0398081863ed1429e454e4c51cbab29f530430fca1",
"checksum": "68c114157b91e9302354f99f6882db00164b615bcde824a10c0647ee13ffb8a5",
"crates": {
"addchain 0.2.0": {
"name": "addchain",
Expand Down Expand Up @@ -13499,6 +13499,10 @@
"id": "p3-dft 0.1.3-succinct",
"target": "p3_dft"
},
{
"id": "p3-field 0.1.3-succinct",
"target": "p3_field"
},
{
"id": "p3-matrix 0.1.3-succinct",
"target": "p3_matrix"
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions benchmark/fft_batch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ CPU Caches:
L2 Unified 4096 KiB (x12)
```

### FFTBatch

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/fft_batch:fft_batch_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor plonky3 -p baby_bear
```
Expand Down Expand Up @@ -48,3 +50,39 @@ bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/f
| 23 | 1.74514 | **1.138950** |

![image](/benchmark/fft_batch/fft_batch_mac_m3.png)

### CosetLDEBatch

```shell
bazel run -c opt --//:has_openmp --//:has_rtti --//:has_matplotlib //benchmark/fft_batch:fft_batch_benchmark -- -k 16 -k 17 -k 18 -k 19 -k 20 -k 21 -k 22 -k 23 --vendor plonky3 -p baby_bear --run_coset_lde
```

#### On Intel i9-13900K

| Exponent | Tachyon | Plonky3 |
| :------- | ------------ | -------- |
| 16 | **0.008185** | 0.013976 |
| 17 | **0.016475** | 0.037277 |
| 18 | **0.044939** | 0.085922 |
| 19 | **0.107282** | 0.2239 |
| 20 | **0.229844** | 0.443987 |
| 21 | **0.557652** | 0.789448 |
| 22 | **1.205040** | 1.63763 |
| 23 | **2.309380** | 3.89975 |

![image](/benchmark/fft_batch/coset_lde_batch_ubuntu_i9.png)

#### On Mac M3 Pro

| Exponent | Tachyon | Plonky3 |
| :------- | ------- | ----------- |
| 16 | 0.013529 | **0.014598** |
| 17 | 0.028898 | **0.033878** |
| 18 | 0.061598 | **0.054566** |
| 19 | 0.131265 | **0.101513** |
| 20 | 0.260599 | **0.219394** |
| 21 | 0.553045 | **0.469239** |
| 22 | 1.23732 | **1.002570** |
| 23 | 3.23449 | **2.420740** |

![image](/benchmark/fft_batch/coset_lde_batch_mac_m3.png)
Binary file added benchmark/fft_batch/coset_lde_batch_mac_m3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added benchmark/fft_batch/coset_lde_batch_ubuntu_i9.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
33 changes: 21 additions & 12 deletions benchmark/fft_batch/fft_batch_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ extern "C" void* run_fft_batch_plonky3_baby_bear(uint64_t* duration,
uint32_t n_log,
size_t batch_size);

extern "C" void* run_coset_lde_batch_plonky3_baby_bear(uint64_t* duration,
math::BabyBear* data,
uint32_t n_log,
size_t batch_size);

template <typename F>
void CheckResults(bool check_results,
const std::vector<math::RowMajorMatrix<F>>& results,
Expand Down Expand Up @@ -62,20 +67,24 @@ void Run(const tachyon::FFTBatchConfig& config) {
runner.set_inputs(absl::MakeSpan(inputs));

std::vector<math::RowMajorMatrix<F>> results;
if (config.run_coset_lde()) {
NOTIMPLEMENTED() << "CosetLDEBatch not implemented yet";
} else {
runner.Run("tachyon", results, true);
for (const benchmark::Vendor vendor : config.vendors()) {
std::vector<math::RowMajorMatrix<F>> results_vendor;
switch (vendor.value()) {
case benchmark::Vendor::kPlonky3:
runner.Run("tachyon", config.run_coset_lde(), results, true);
for (const benchmark::Vendor vendor : config.vendors()) {
std::vector<math::RowMajorMatrix<F>> results_vendor;
switch (vendor.value()) {
case benchmark::Vendor::kPlonky3:
if (config.run_coset_lde()) {
// clang-format off
runner.RunExternal(vendor.ToString(), run_coset_lde_batch_plonky3_baby_bear,
results_vendor);
// clang-format on
} else {
runner.RunExternal(vendor.ToString(), run_fft_batch_plonky3_baby_bear,
results_vendor);
CheckResults(config.check_results(), results, results_vendor);
default:
NOTIMPLEMENTED() << "Unsupported vendor";
}
}
CheckResults(config.check_results(), results, results_vendor);
break;
default:
NOTIMPLEMENTED() << "Unsupported vendor";
}
}

Expand Down
8 changes: 6 additions & 2 deletions benchmark/fft_batch/fft_batch_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class FFTBatchRunner {
domains_ = domains;
}

void Run(std::string_view vendor,
void Run(std::string_view vendor, bool run_coset_lde,
std::vector<math::RowMajorMatrix<F>>& results,
bool should_record = true) {
results.clear();
Expand All @@ -49,7 +49,11 @@ class FFTBatchRunner {
for (size_t i = 0; i < domains_.size(); ++i) {
math::RowMajorMatrix<F> matrix = inputs_[i];
base::TimeTicks start = base::TimeTicks::Now();
domains_[i]->FFTBatch(matrix);
if (run_coset_lde) {
domains_[i]->CosetLDEBatch(matrix, 0, F::Zero());
} else {
domains_[i]->FFTBatch(matrix);
}
if (should_record) {
reporter_->AddTime(vendor,
(base::TimeTicks::Now() - start).InSecondsF());
Expand Down
1 change: 1 addition & 0 deletions benchmark/fft_batch/plonky3/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ publish = false
ff = { version = "0.13", features = ["derive", "derive_bits"] }
p3-baby-bear= "0.1.3-succinct"
p3-dft = "0.1.3-succinct"
p3-field = "0.1.3-succinct"
p3-matrix = "0.1.3-succinct"
rand = "0.8.4"
tachyon_rs = { path = "../../../tachyon/rs" }
24 changes: 24 additions & 0 deletions benchmark/fft_batch/plonky3/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use p3_baby_bear::BabyBear;
use p3_dft::{Radix2DitParallel, TwoAdicSubgroupDft};
use p3_field::AbstractField;
use p3_matrix::{dense::RowMajorMatrix, Matrix};
use std::time::Instant;

Expand All @@ -24,3 +25,26 @@ pub extern "C" fn run_fft_batch_plonky3_baby_bear(
}
Box::into_raw(Box::new(dft_result)) as *mut RowMajorMatrix<BabyBear>
}

#[no_mangle]
pub extern "C" fn run_coset_lde_batch_plonky3_baby_bear(
duration: *mut u64,
data: *mut BabyBear,
n_log: u32,
batch_size: usize,
) -> *mut RowMajorMatrix<BabyBear> {
let n = 1 << n_log;
let size = n * batch_size;
let values: Vec<BabyBear> = unsafe { Vec::from_raw_parts(data, size, size) };

let messages = RowMajorMatrix::<BabyBear>::new(values, batch_size);
let dft = Radix2DitParallel::default();

let start = Instant::now();
let shift = BabyBear::zero();
let dft_result = dft.coset_lde_batch(messages, 0, shift).to_row_major_matrix();
unsafe {
duration.write(start.elapsed().as_micros() as u64);
}
Box::into_raw(Box::new(dft_result)) as *mut RowMajorMatrix<BabyBear>
}

0 comments on commit 2d3611e

Please sign in to comment.