This repository has been archived by the owner on Jul 16, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 90
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7 from markdryan/benchmarking
Add benchmark code and fix various issues
- Loading branch information
Showing
329 changed files
with
7,395 additions
and
487 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
# Intel | ||
- [email protected] | ||
- [email protected] | ||
- [email protected] | ||
- [email protected] | ||
- [email protected] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,10 @@ | ||
add_executable(avx_ex1_tests ex1_test.cpp transform_sse.c transform_avx.c) | ||
set(avx_ex1_srcs transform_sse.c transform_avx.c) | ||
add_executable(avx_ex1_tests ex1_test.cpp ${avx_ex1_srcs}) | ||
target_link_libraries(avx_ex1_tests gtest_main) | ||
|
||
IF( benchmark_FOUND ) | ||
add_executable(avx_ex1_bench ex1_bench.cpp ${avx_ex1_srcs}) | ||
target_link_libraries(avx_ex1_bench benchmark::benchmark) | ||
ENDIF() | ||
|
||
add_test(NAME avx_ex1_test COMMAND avx_ex1_tests) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* Copyright (C) 2021 by Intel Corporation | ||
* | ||
* Permission to use, copy, modify, and/or distribute this software for any | ||
* purpose with or without fee is hereby granted. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH | ||
* REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY | ||
* AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, | ||
* INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | ||
* LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | ||
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | ||
* PERFORMANCE OF THIS SOFTWARE. | ||
*/ | ||
|
||
#include <benchmark/benchmark.h> | ||
#include <xmmintrin.h> | ||
|
||
#include "transform_avx.h" | ||
#include "transform_sse.h" | ||
|
||
static void BM_transform_sse(benchmark::State &state) | ||
{ | ||
int len = state.range(0); | ||
// Dynamic memory allocation with 16byte | ||
// alignment | ||
float *pInVector = (float *)_mm_malloc(len * sizeof(float), 16); | ||
float *pOutVector = (float *)_mm_malloc(len * sizeof(float), 16); | ||
// init data | ||
for (int i = 0; i < len; i++) | ||
pInVector[i] = 1; | ||
float cos_teta = 0.8660254037; | ||
float sin_teta = 0.5; | ||
|
||
for (auto _ : state) { | ||
transform_sse(sin_teta, cos_teta, pInVector, pOutVector, len); | ||
} | ||
state.SetBytesProcessed(int64_t(state.iterations()) * int64_t(len) * | ||
int64_t(sizeof(pInVector[0]))); | ||
|
||
_mm_free(pInVector); | ||
_mm_free(pOutVector); | ||
} | ||
|
||
static void BM_transform_avx(benchmark::State &state) | ||
{ | ||
int len = state.range(0); | ||
// Dynamic memory allocation with 32byte | ||
// alignment | ||
float *pInVector = (float *)_mm_malloc(len * sizeof(float), 32); | ||
float *pOutVector = (float *)_mm_malloc(len * sizeof(float), 32); | ||
// init data | ||
for (int i = 0; i < len; i++) | ||
pInVector[i] = 1; | ||
float cos_teta = 0.8660254037; | ||
float sin_teta = 0.5; | ||
|
||
for (auto _ : state) { | ||
transform_avx(sin_teta, cos_teta, pInVector, pOutVector, len); | ||
} | ||
state.SetBytesProcessed(int64_t(state.iterations()) * int64_t(len) * | ||
int64_t(sizeof(pInVector[0]))); | ||
|
||
_mm_free(pInVector); | ||
_mm_free(pOutVector); | ||
} | ||
|
||
BENCHMARK(BM_transform_sse) | ||
->Arg(1 << 6) | ||
->Arg(1 << 8) | ||
->Arg(1 << 10) | ||
->Arg(1 << 12) | ||
->Arg(1 << 14) | ||
->Arg(1 << 16) | ||
->Arg(1 << 18); | ||
BENCHMARK(BM_transform_avx) | ||
->Arg(1 << 6) | ||
->Arg(1 << 8) | ||
->Arg(1 << 10) | ||
->Arg(1 << 12) | ||
->Arg(1 << 14) | ||
->Arg(1 << 16) | ||
->Arg(1 << 18); | ||
BENCHMARK_MAIN(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,14 @@ | ||
set(avx_ex10_srcs ex10_test.cpp saxpy32.c) | ||
if(CMAKE_CXX_COMPILER_ID MATCHES Clang OR CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES AppleClang) | ||
set(avx_ex10_srcs ${avx_ex10_srcs} saxpy32.s) | ||
set(avx_ex10_ass saxpy32.s) | ||
elseif(MSVC) | ||
set(avx_ex10_srcs ${avx_ex10_srcs} saxpy32.asm) | ||
set(avx_ex10_ass saxpy32.asm) | ||
endif() | ||
add_executable(avx_ex10_tests ${avx_ex10_srcs}) | ||
add_executable(avx_ex10_tests ex10_test.cpp saxpy32.c ${avx_ex10_ass}) | ||
target_link_libraries(avx_ex10_tests gtest_main) | ||
|
||
IF( benchmark_FOUND ) | ||
add_executable(avx_ex10_bench ex10_bench.cpp ${avx_ex10_ass}) | ||
target_link_libraries(avx_ex10_bench benchmark::benchmark) | ||
ENDIF() | ||
|
||
add_test(NAME avx_ex10_test COMMAND avx_ex10_tests) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
/* | ||
* Copyright (C) 2022 by Intel Corporation | ||
* | ||
* Permission to use, copy, modify, and/or distribute this software for any | ||
* purpose with or without fee is hereby granted. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH | ||
* REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY | ||
* AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, | ||
* INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | ||
* LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | ||
* OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | ||
* PERFORMANCE OF THIS SOFTWARE. | ||
*/ | ||
|
||
#include <benchmark/benchmark.h> | ||
#include <xmmintrin.h> | ||
|
||
#include "saxpy32.h" | ||
|
||
static void init_sources(float *src, float *src2, int len) | ||
{ | ||
for (int i = 0; i < len; i++) { | ||
src[i] = 2.0f * i; | ||
src2[i] = 3.0f * i; | ||
} | ||
} | ||
|
||
static void BM_saxpy_avx_aligned(benchmark::State &state) | ||
{ | ||
int len = state.range(0); | ||
float *src = (float *)_mm_malloc(len * sizeof(float), 32); | ||
float *src2 = (float *)_mm_malloc(len * sizeof(float), 32); | ||
float *dest = (float *)_mm_malloc(len * sizeof(float), 32); | ||
|
||
init_sources(src, src2, len); | ||
|
||
for (auto _ : state) { | ||
saxpy32(src, src2, len * sizeof(float), dest, 10.0); | ||
} | ||
|
||
state.SetBytesProcessed(int64_t(state.iterations()) * int64_t(len) * | ||
int64_t(sizeof(float) * 2)); | ||
|
||
_mm_free(dest); | ||
_mm_free(src2); | ||
_mm_free(src); | ||
} | ||
|
||
static void BM_saxpy_avx_misaligned1(benchmark::State &state) | ||
{ | ||
int len = state.range(0); | ||
float *src_mem = (float *)_mm_malloc((len + 1) * sizeof(float), 32); | ||
float *src = &src_mem[1]; | ||
float *src2 = (float *)_mm_malloc(len * sizeof(float), 32); | ||
float *dest = (float *)_mm_malloc(len * sizeof(float), 32); | ||
|
||
init_sources(src, src2, len); | ||
|
||
for (auto _ : state) { | ||
saxpy32(src, src2, len * sizeof(float), dest, 10.0); | ||
} | ||
|
||
state.SetBytesProcessed(int64_t(state.iterations()) * int64_t(len) * | ||
int64_t(sizeof(float) * 2)); | ||
|
||
_mm_free(dest); | ||
_mm_free(src2); | ||
_mm_free(src_mem); | ||
} | ||
|
||
static void BM_saxpy_avx_misaligned3(benchmark::State &state) | ||
{ | ||
int len = state.range(0); | ||
float *src_mem = (float *)_mm_malloc((len + 1) * sizeof(float), 32); | ||
float *src = &src_mem[1]; | ||
float *src2_mem = (float *)_mm_malloc((len + 1) * sizeof(float), 32); | ||
float *src2 = &src2_mem[1]; | ||
float *dest_mem = (float *)_mm_malloc((len + 1) * sizeof(float), 32); | ||
float *dest = &dest_mem[1]; | ||
|
||
init_sources(src, src2, len); | ||
|
||
for (auto _ : state) { | ||
saxpy32(src, src2, len * sizeof(float), dest, 10.0); | ||
} | ||
|
||
state.SetBytesProcessed(int64_t(state.iterations()) * int64_t(len) * | ||
int64_t(sizeof(float) * 2)); | ||
|
||
_mm_free(dest_mem); | ||
_mm_free(src2_mem); | ||
_mm_free(src_mem); | ||
} | ||
|
||
BENCHMARK(BM_saxpy_avx_aligned) | ||
->Arg(1 << 6) | ||
->Arg(1 << 8) | ||
->Arg(1 << 10) | ||
->Arg(1 << 12) | ||
->Arg(1 << 14) | ||
->Arg(1 << 16) | ||
->Arg(1 << 18); | ||
BENCHMARK(BM_saxpy_avx_misaligned1) | ||
->Arg(1 << 6) | ||
->Arg(1 << 8) | ||
->Arg(1 << 10) | ||
->Arg(1 << 12) | ||
->Arg(1 << 14) | ||
->Arg(1 << 16) | ||
->Arg(1 << 18); | ||
BENCHMARK(BM_saxpy_avx_misaligned3) | ||
->Arg(1 << 6) | ||
->Arg(1 << 8) | ||
->Arg(1 << 10) | ||
->Arg(1 << 12) | ||
->Arg(1 << 14) | ||
->Arg(1 << 16) | ||
->Arg(1 << 18); | ||
BENCHMARK_MAIN(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,14 @@ | ||
set(avx_ex12_srcs ex12_test.cpp saxpy32.c saxpy16.c) | ||
if(CMAKE_CXX_COMPILER_ID MATCHES Clang OR CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES AppleClang) | ||
set(avx_ex12_srcs ${avx_ex12_srcs} saxpy32.s saxpy16.s) | ||
set(avx_ex12_ass saxpy32.s saxpy16.s) | ||
elseif(MSVC) | ||
set(avx_ex12_srcs ${avx_ex12_srcs} saxpy32.asm saxpy16.asm) | ||
set(avx_ex12_ass saxpy32.asm saxpy16.asm) | ||
endif() | ||
add_executable(avx_ex12_tests ${avx_ex12_srcs}) | ||
add_executable(avx_ex12_tests ex12_test.cpp saxpy32.c saxpy16.c ${avx_ex12_ass}) | ||
target_link_libraries(avx_ex12_tests gtest_main) | ||
|
||
IF( benchmark_FOUND ) | ||
add_executable(avx_ex12_bench ex12_bench.cpp ${avx_ex12_ass}) | ||
target_link_libraries(avx_ex12_bench benchmark::benchmark) | ||
ENDIF() | ||
|
||
add_test(NAME avx_ex12_test COMMAND avx_ex12_tests) |
Oops, something went wrong.