From 7ac0b693e97ace76fcdcca22c56e9f6b64513224 Mon Sep 17 00:00:00 2001 From: Mathieu Poumeyrol Date: Wed, 4 Oct 2023 17:05:56 +0200 Subject: [PATCH] missing file --- linalg/benches/leaky_relu.rs | 62 ++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 linalg/benches/leaky_relu.rs diff --git a/linalg/benches/leaky_relu.rs b/linalg/benches/leaky_relu.rs new file mode 100644 index 0000000000..9c70553f5b --- /dev/null +++ b/linalg/benches/leaky_relu.rs @@ -0,0 +1,62 @@ +use criterion::*; +use tract_data::prelude::*; + +use tract_linalg::element_wise::ElementWiseKer; + +fn leaky_relu_f16(c: &mut Criterion) { + let mut group = c.benchmark_group("leaky_relu_f16"); + group.throughput(Throughput::Elements(1024)); + let mut input = unsafe { Tensor::uninitialized_aligned::(&[1024], 16).unwrap() }; + let input = input.as_slice_mut::().unwrap(); + let alpha = f16::from_f32(0.1); + group.bench_function("rust", |b| b.iter(|| rust_fp16(input, alpha))); + group.bench_function("rust_with_f16", |b| b.iter(|| unsafe { rust_with_fp16(input, alpha) })); + group.bench_function("linalg", |b| b.iter(|| linalg16(input, alpha))); +} + +#[inline(never)] +fn rust_fp16(input: &mut [f16], alpha: f16) { + for x in input { + *x = if *x > f16::ZERO { *x } else { *x * alpha } + } +} + +#[target_feature(enable = "fp16")] +#[inline(never)] +unsafe fn rust_with_fp16(input: &mut [f16], alpha: f16) { + for x in input { + *x = if *x > f16::ZERO { *x } else { *x * alpha } + } +} + +#[inline(never)] +fn linalg16(input: &mut [f16], alpha: f16) { + (tract_linalg::ops().leaky_relu_f16)().run_with_params(input, alpha).unwrap(); +} + +fn leaky_relu_f32(c: &mut Criterion) { + let mut group = c.benchmark_group("leaky_relu_f32"); + group.throughput(Throughput::Elements(1024)); + let mut input = unsafe { Tensor::uninitialized_aligned::(&[1024], 16).unwrap() }; + let input = input.as_slice_mut::().unwrap(); + let alpha = 0.1f32; + group.bench_function("rust", |b| b.iter(|| rust_fp32(input, alpha))); + group.bench_function("linalg", |b| b.iter(|| linalg32(input, alpha))); + group.bench_function("linalg-asm", |b| b.iter(|| tract_linalg::arm64::arm64simd_leaky_relu_f32_8n::run(input, alpha))); +} + +#[inline(never)] +fn rust_fp32(input: &mut [f32], alpha: f32) { + for x in input { + *x = if *x > 0.0 { *x } else { *x * alpha } + } +} + +#[inline(never)] +fn linalg32(input: &mut [f32], alpha: f32) { + (tract_linalg::ops().leaky_relu_f32)().run_with_params(input, alpha).unwrap(); +} + + +criterion_group!(benches, leaky_relu_f32, leaky_relu_f16); +criterion_main!(benches);