From 459864ba41626626254a68d89caaca84a54c15c9 Mon Sep 17 00:00:00 2001 From: Sylvain Joube Date: Tue, 27 Feb 2024 11:25:29 +0100 Subject: [PATCH] WIP: Update tests (CPU + SYCL) --- .gitignore | 8 + test/algorithm/algos/bench/chrono.hpp | 74 ++++ test/algorithm/algos/context.cpp | 400 +++++++++--------- test/algorithm/algos/sycl/reduce_ext.cpp | 30 ++ test/sycl_sandbox/bench/reduce.cpp | 255 +++++++++++ test/sycl_sandbox/bench/transform.cpp | 237 ++++++++--- .../archives/plot_diff_2023-05-01 copy.py | 305 +++++++++++++ .../plot/archives/plot_distribution.py | 200 +++++++++ ...023-11-04.py => plot_2023-11-04_reduce.py} | 29 +- .../plot/plot_2023-11-10_transform.py | 343 +++++++++++++++ .../plot/plot_2023-11-11_transform.py | 345 +++++++++++++++ test/sycl_sandbox/plot/plot_utils.py | 8 + 12 files changed, 1970 insertions(+), 264 deletions(-) create mode 100644 test/algorithm/algos/bench/chrono.hpp create mode 100644 test/algorithm/algos/sycl/reduce_ext.cpp create mode 100644 test/sycl_sandbox/bench/reduce.cpp create mode 100644 test/sycl_sandbox/plot/archives/plot_diff_2023-05-01 copy.py create mode 100644 test/sycl_sandbox/plot/archives/plot_distribution.py rename test/sycl_sandbox/plot/{plot_2023-11-04.py => plot_2023-11-04_reduce.py} (89%) create mode 100644 test/sycl_sandbox/plot/plot_2023-11-10_transform.py create mode 100644 test/sycl_sandbox/plot/plot_2023-11-11_transform.py diff --git a/.gitignore b/.gitignore index f122bea7..901caaee 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,11 @@ doc/conf.py .vscode/* .cache/* .ideas/* + + +test/sycl_sandbox/obsolete +test/sycl_sandbox/test_templates_include.hpp +test/sycl_sandbox/test_templates.cpp +exec.sh +*.save +*.txt \ No newline at end of file diff --git a/test/algorithm/algos/bench/chrono.hpp b/test/algorithm/algos/bench/chrono.hpp new file mode 100644 index 00000000..bbf2c92e --- /dev/null +++ b/test/algorithm/algos/bench/chrono.hpp @@ -0,0 +1,74 @@ +#pragma once + +#include +#include +#include + +namespace bench +{ + struct metrics_t + { + double AccessCount, AccessRate, LoadBytes, LoadBandwidth, ArithmeticIntensity, Time; + void print() + { + std::cout + << "Time=" << Time << " " + << "AccessCount=" << AccessCount << " " + << "AccessRate=" << AccessRate << " " + << "ArithmeticIntensity=" << ArithmeticIntensity << " " + << "LoadBandwidth=" << LoadBandwidth << " " + << "LoadBytes=" << LoadBytes << "\n"; + + // 72M AccessRate=20.8765M/s ArithmeticIntensity=1.75 LoadBandwidth=250.519M/s LoadBytes=201.327M + } + }; + + struct chrono_t + { + void Init() + { + elapsed_ = 0; + ResumeTiming(); + } + + void PauseTiming() + { + auto now = std::chrono::system_clock::now(); + std::chrono::duration e = now - last_point_; + elapsed_ += e.count(); + } + + void ResumeTiming() + { + last_point_ = std::chrono::system_clock::now(); + } + + double ElapsedTime() + { + PauseTiming(); + ResumeTiming(); + return elapsed_; + } + + std::size_t ElapsedTimeMS() + { + PauseTiming(); + ResumeTiming(); + return elapsed_ * 1000; + } + + std::string Str() + { + // long t = static_cast(elapsed_); + // std::string res = + // std::to_string(t) + // + "." + std::to_string(elapsed_ - t); + std::string res = std::to_string(elapsed_) + "s"; + return res; + } + + private: + std::chrono::time_point last_point_; + double elapsed_; + }; +} diff --git a/test/algorithm/algos/context.cpp b/test/algorithm/algos/context.cpp index a52c092a..84f8c31c 100644 --- a/test/algorithm/algos/context.cpp +++ b/test/algorithm/algos/context.cpp @@ -689,206 +689,206 @@ TTS_CASE("Check for context overload - predicates: all_of, any_of, none_of, coun // TODO: uncomment "find" tests when kwk::reverse will work again // TODO: uncomment "find" tests when kwk::reverse will work again -// namespace test -// { -// struct c_find : public context_for_each {}; -// struct c_find_if : public context_for_each {}; -// struct c_find_if_not : public context_for_each {}; -// struct c_find_first_of : public context_for_each {}; -// struct c_find_last : public context_for_each {}; -// struct c_find_last_if : public context_for_each {}; -// struct c_find_last_if_not : public context_for_each {}; -// } -// namespace kwk -// { -// template -// constexpr auto find_if(test::c_find_if& ctx, Container const& c, Check cc) -// { -// ctx.set_message("find_if used!"); -// return find_if(cpu, c, cc); -// } - -// template -// constexpr auto find(test::c_find& ctx, Out const& o, auto v) -// { -// ctx.set_message("find used!"); -// return find(cpu, o, v); -// } - -// template -// constexpr auto find_if_not(test::c_find_if_not& ctx, Out const& o, Func f) -// { -// ctx.set_message("find_if_not used!"); -// return find_if_not(cpu, o, f); -// } - -// template -// constexpr auto find_first_of(test::c_find_first_of& ctx, Out const& o, Values const& v) -// { -// ctx.set_message("find_first_of used!"); -// return find_first_of(cpu, o, v); -// } - -// template -// constexpr auto find_last_if(test::c_find_last_if& ctx, Out const& o, Func f) -// { -// ctx.set_message("find_last_if used!"); -// return find_last_if(cpu, o, f); -// } - -// template -// constexpr auto find_last(test::c_find_last& ctx, Out const& o, auto v) -// { -// ctx.set_message("find_last used!"); -// return find_last(cpu, o, v); -// } - -// template -// constexpr auto find_last_if_not(test::c_find_last_if_not& ctx, Out const& o, Func f) -// { -// ctx.set_message("find_last_if_not used!"); -// return find_last_if_not(cpu, o, f); -// } - -// // find_if -> kwk::__::for_until -// // find_if_not -> find_if -// // find_first_of -> find_if -// // find_first_of -> any_of -> reduce -> for_each -// // find -> find_if -// // find_last_if -> find_if -// // find_last -> find_last_if -> find_if -// // find_last_if_not -> find_last_if -> find_if -// }; - -// TTS_CASE("Check for context overload - initially from find.hpp") -// { -// auto v = test::make_view_1(); - -// // find_if -> kwk::__::for_until -// { -// test::c_find_if c1; -// TTS_EQUAL(c1.get_message(), test::messaging_context::base_message); -// kwk::find_if(c1, v, [&](auto){ return true; }); -// TTS_EQUAL(c1.get_message(), std::string{"find_if used!"}); - -// // kwk::__::for_until -// // test::context_for_until c2; -// // TTS_EQUAL(c2.get_message(), test::messaging_context::base_message); -// // kwk::find_if(c2, v, [&](auto e){ return true; }); -// // TTS_EQUAL(c2.get_message(), std::string{"for_until used!"}); -// } - -// // find -> find_if -> kwk::__::for_until -// { -// test::c_find c1; -// TTS_EQUAL(c1.get_message(), test::messaging_context::base_message); -// kwk::find(c1, v, 12); -// TTS_EQUAL(c1.get_message(), std::string{"find used!"}); - -// test::c_find_if c2; -// TTS_EQUAL(c2.get_message(), test::messaging_context::base_message); -// kwk::find(c2, v, 12); -// TTS_EQUAL(c2.get_message(), std::string{"find_if used!"}); -// } - -// // find_if_not -> find_if -> kwk::__::for_until -// { -// test::c_find_if_not c1; -// TTS_EQUAL(c1.get_message(), test::messaging_context::base_message); -// kwk::find_if_not(c1, v, [&](auto){ return true; }); -// TTS_EQUAL(c1.get_message(), std::string{"find_if_not used!"}); - -// test::c_find_if c2; -// TTS_EQUAL(c2.get_message(), test::messaging_context::base_message); -// kwk::find_if_not(c2, v, [&](auto){ return true; }); -// TTS_EQUAL(c2.get_message(), std::string{"find_if used!"}); -// } - -// auto v2 = test::make_view_2(); - -// // find_first_of -> find_if -// // find_first_of -> any_of -> reduce -> for_each -// { -// // vvv find_first_of -> find_if -> kwk::__::for_until vvv -// test::c_find_first_of c1; -// TTS_EQUAL(c1.get_message(), test::messaging_context::base_message); -// kwk::find_first_of(c1, v, v2); -// TTS_EQUAL(c1.get_message(), std::string{"find_first_of used!"}); - -// test::c_find_if c2; -// TTS_EQUAL(c2.get_message(), test::messaging_context::base_message); -// kwk::find_first_of(c2, v, v2); -// TTS_EQUAL(c2.get_message(0), std::string{"find_if used!"}); -// TTS_EQUAL(c2.get_message(1), std::string{"map used!"}); - -// // vvv find_first_of -> any_of -> reduce -> for_each vvv -// test::c_any_of c3; -// TTS_EQUAL(c3.get_message(), test::messaging_context::base_message); -// kwk::find_first_of(c3, v, v2); -// TTS_EQUAL(c3.get_message(), std::string{"any_of used!"}); - -// test::c_reduce3 c4; -// TTS_EQUAL(c4.get_message(), test::messaging_context::base_message); -// kwk::find_first_of(c4, v, v2); -// TTS_EQUAL(c4.get_message(), std::string{"reduce(ctx, in, f, init) used!"}); - -// test::context_for_each c5; -// TTS_EQUAL(c5.get_message(), test::messaging_context::base_message); -// kwk::find_first_of(c5, v, v2); -// TTS_EQUAL(c5.get_message(), std::string{"map used!"}); -// } - -// // TODO: finir cette partie lorsque les predicates seront finis eux aussi - -// // find_last_if -> find_if -> kwk::__::for_until -// { -// test::c_find_last_if c1; -// TTS_EQUAL(c1.get_message(), test::messaging_context::base_message); -// kwk::find_last_if(c1, v, [&](auto){ return true; }); -// TTS_EQUAL(c1.get_message(), std::string{"find_last_if used!"}); - -// test::c_find_if c2; -// TTS_EQUAL(c2.get_message(), test::messaging_context::base_message); -// kwk::find_last_if(c2, v, [&](auto){ return true; }); -// TTS_EQUAL(c2.get_message(), std::string{"find_if used!"}); -// } - -// // find_last -> find_last_if -> find_if -> kwk::__::for_until -// { -// test::c_find_last c1; -// TTS_EQUAL(c1.get_message(), test::messaging_context::base_message); -// kwk::find_last(c1, v, 4); -// TTS_EQUAL(c1.get_message(), std::string{"find_last used!"}); - -// test::c_find_last_if c2; -// TTS_EQUAL(c2.get_message(), test::messaging_context::base_message); -// kwk::find_last(c2, v, 4); -// TTS_EQUAL(c2.get_message(), std::string{"find_last_if used!"}); - -// test::c_find_if c3; -// TTS_EQUAL(c3.get_message(), test::messaging_context::base_message); -// kwk::find_last(c3, v, 4); -// TTS_EQUAL(c3.get_message(), std::string{"find_if used!"}); -// } - -// // find_last_if_not -> find_last_if -> find_if -> kwk::__::for_until -// { -// test::c_find_last_if_not c1; -// TTS_EQUAL(c1.get_message(), test::messaging_context::base_message); -// kwk::find_last_if_not(c1, v, [](auto e) { return (e == 0); }); -// TTS_EQUAL(c1.get_message(), std::string{"find_last_if_not used!"}); - -// test::c_find_last_if c2; -// TTS_EQUAL(c2.get_message(), test::messaging_context::base_message); -// kwk::find_last_if_not(c2, v, [](auto e) { return (e == 0); }); -// TTS_EQUAL(c2.get_message(), std::string{"find_last_if used!"}); - -// test::c_find_if c3; -// TTS_EQUAL(c3.get_message(), test::messaging_context::base_message); -// kwk::find_last_if_not(c3, v, [](auto e) { return (e == 0); }); -// TTS_EQUAL(c3.get_message(), std::string{"find_if used!"}); -// } -// }; +namespace test +{ + struct c_find : public context_for_each {}; + struct c_find_if : public context_for_each {}; + struct c_find_if_not : public context_for_each {}; + struct c_find_first_of : public context_for_each {}; + struct c_find_last : public context_for_each {}; + struct c_find_last_if : public context_for_each {}; + struct c_find_last_if_not : public context_for_each {}; +} +namespace kwk +{ + template + constexpr auto find_if(test::c_find_if& ctx, Container const& c, Check cc) + { + ctx.set_message("find_if used!"); + return find_if(cpu, c, cc); + } + + template + constexpr auto find(test::c_find& ctx, Out const& o, auto v) + { + ctx.set_message("find used!"); + return find(cpu, o, v); + } + + template + constexpr auto find_if_not(test::c_find_if_not& ctx, Out const& o, Func f) + { + ctx.set_message("find_if_not used!"); + return find_if_not(cpu, o, f); + } + + template + constexpr auto find_first_of(test::c_find_first_of& ctx, Out const& o, Values const& v) + { + ctx.set_message("find_first_of used!"); + return find_first_of(cpu, o, v); + } + + template + constexpr auto find_last_if(test::c_find_last_if& ctx, Out const& o, Func f) + { + ctx.set_message("find_last_if used!"); + return find_last_if(cpu, o, f); + } + + template + constexpr auto find_last(test::c_find_last& ctx, Out const& o, auto v) + { + ctx.set_message("find_last used!"); + return find_last(cpu, o, v); + } + + template + constexpr auto find_last_if_not(test::c_find_last_if_not& ctx, Out const& o, Func f) + { + ctx.set_message("find_last_if_not used!"); + return find_last_if_not(cpu, o, f); + } + + // find_if -> kwk::__::for_until + // find_if_not -> find_if + // find_first_of -> find_if + // find_first_of -> any_of -> reduce -> for_each + // find -> find_if + // find_last_if -> find_if + // find_last -> find_last_if -> find_if + // find_last_if_not -> find_last_if -> find_if +}; + +TTS_CASE("Check for context overload - initially from find.hpp") +{ + auto v = test::make_view_1(); + + // find_if -> kwk::__::for_until + { + test::c_find_if c1; + TTS_EQUAL(c1.get_message(), test::messaging_context::base_message); + kwk::find_if(c1, v, [&](auto){ return true; }); + TTS_EQUAL(c1.get_message(), std::string{"find_if used!"}); + + // kwk::__::for_until + // test::context_for_until c2; + // TTS_EQUAL(c2.get_message(), test::messaging_context::base_message); + // kwk::find_if(c2, v, [&](auto e){ return true; }); + // TTS_EQUAL(c2.get_message(), std::string{"for_until used!"}); + } + + // find -> find_if -> kwk::__::for_until + { + test::c_find c1; + TTS_EQUAL(c1.get_message(), test::messaging_context::base_message); + kwk::find(c1, v, 12); + TTS_EQUAL(c1.get_message(), std::string{"find used!"}); + + test::c_find_if c2; + TTS_EQUAL(c2.get_message(), test::messaging_context::base_message); + kwk::find(c2, v, 12); + TTS_EQUAL(c2.get_message(), std::string{"find_if used!"}); + } + + // find_if_not -> find_if -> kwk::__::for_until + { + test::c_find_if_not c1; + TTS_EQUAL(c1.get_message(), test::messaging_context::base_message); + kwk::find_if_not(c1, v, [&](auto){ return true; }); + TTS_EQUAL(c1.get_message(), std::string{"find_if_not used!"}); + + test::c_find_if c2; + TTS_EQUAL(c2.get_message(), test::messaging_context::base_message); + kwk::find_if_not(c2, v, [&](auto){ return true; }); + TTS_EQUAL(c2.get_message(), std::string{"find_if used!"}); + } + + auto v2 = test::make_view_2(); + + // find_first_of -> find_if + // find_first_of -> any_of -> reduce -> for_each + { + // vvv find_first_of -> find_if -> kwk::__::for_until vvv + test::c_find_first_of c1; + TTS_EQUAL(c1.get_message(), test::messaging_context::base_message); + kwk::find_first_of(c1, v, v2); + TTS_EQUAL(c1.get_message(), std::string{"find_first_of used!"}); + + test::c_find_if c2; + TTS_EQUAL(c2.get_message(), test::messaging_context::base_message); + kwk::find_first_of(c2, v, v2); + TTS_EQUAL(c2.get_message(0), std::string{"find_if used!"}); + TTS_EQUAL(c2.get_message(1), std::string{"map used!"}); + + // vvv find_first_of -> any_of -> reduce -> for_each vvv + test::c_any_of c3; + TTS_EQUAL(c3.get_message(), test::messaging_context::base_message); + kwk::find_first_of(c3, v, v2); + TTS_EQUAL(c3.get_message(), std::string{"any_of used!"}); + + test::c_reduce3 c4; + TTS_EQUAL(c4.get_message(), test::messaging_context::base_message); + kwk::find_first_of(c4, v, v2); + TTS_EQUAL(c4.get_message(), std::string{"reduce(ctx, in, f, init) used!"}); + + test::context_for_each c5; + TTS_EQUAL(c5.get_message(), test::messaging_context::base_message); + kwk::find_first_of(c5, v, v2); + TTS_EQUAL(c5.get_message(), std::string{"map used!"}); + } + + // TODO: finir cette partie lorsque les predicates seront finis eux aussi + + // find_last_if -> find_if -> kwk::__::for_until + { + test::c_find_last_if c1; + TTS_EQUAL(c1.get_message(), test::messaging_context::base_message); + kwk::find_last_if(c1, v, [&](auto){ return true; }); + TTS_EQUAL(c1.get_message(), std::string{"find_last_if used!"}); + + test::c_find_if c2; + TTS_EQUAL(c2.get_message(), test::messaging_context::base_message); + kwk::find_last_if(c2, v, [&](auto){ return true; }); + TTS_EQUAL(c2.get_message(), std::string{"find_if used!"}); + } + + // find_last -> find_last_if -> find_if -> kwk::__::for_until + { + test::c_find_last c1; + TTS_EQUAL(c1.get_message(), test::messaging_context::base_message); + kwk::find_last(c1, v, 4); + TTS_EQUAL(c1.get_message(), std::string{"find_last used!"}); + + test::c_find_last_if c2; + TTS_EQUAL(c2.get_message(), test::messaging_context::base_message); + kwk::find_last(c2, v, 4); + TTS_EQUAL(c2.get_message(), std::string{"find_last_if used!"}); + + test::c_find_if c3; + TTS_EQUAL(c3.get_message(), test::messaging_context::base_message); + kwk::find_last(c3, v, 4); + TTS_EQUAL(c3.get_message(), std::string{"find_if used!"}); + } + + // find_last_if_not -> find_last_if -> find_if -> kwk::__::for_until + { + test::c_find_last_if_not c1; + TTS_EQUAL(c1.get_message(), test::messaging_context::base_message); + kwk::find_last_if_not(c1, v, [](auto e) { return (e == 0); }); + TTS_EQUAL(c1.get_message(), std::string{"find_last_if_not used!"}); + + test::c_find_last_if c2; + TTS_EQUAL(c2.get_message(), test::messaging_context::base_message); + kwk::find_last_if_not(c2, v, [](auto e) { return (e == 0); }); + TTS_EQUAL(c2.get_message(), std::string{"find_last_if used!"}); + + test::c_find_if c3; + TTS_EQUAL(c3.get_message(), test::messaging_context::base_message); + kwk::find_last_if_not(c3, v, [](auto e) { return (e == 0); }); + TTS_EQUAL(c3.get_message(), std::string{"find_if used!"}); + } +}; diff --git a/test/algorithm/algos/sycl/reduce_ext.cpp b/test/algorithm/algos/sycl/reduce_ext.cpp new file mode 100644 index 00000000..b63779c2 --- /dev/null +++ b/test/algorithm/algos/sycl/reduce_ext.cpp @@ -0,0 +1,30 @@ +//====================================================================================================================== +/* + KIWAKU - Containers Well Made + Copyright : KIWAKU Contributors & Maintainers + SPDX-License-Identifier: BSL-1.0 +*/ +//====================================================================================================================== +// #include "../generic/replace.hpp" +#include +#include +#include +#include "test.hpp" + +#include +#include + +TTS_CASE("Check for kwk::reduce(in) 1D") +{ + const std::size_t data_size = 10; + + int data[data_size]; + + for (std::size_t i = 0; i < data_size; ++i) { data[i] = 2; } + + auto kv = kwk::view{kwk::source = data, kwk::of_size(data_size)}; + + auto res = kwk::reduce(kwk::sycl::default_context, kv); + + TTS_EQUAL(res, static_cast(data_size) * 2); +}; diff --git a/test/sycl_sandbox/bench/reduce.cpp b/test/sycl_sandbox/bench/reduce.cpp new file mode 100644 index 00000000..ba5b995d --- /dev/null +++ b/test/sycl_sandbox/bench/reduce.cpp @@ -0,0 +1,255 @@ +//====================================================================================================================== +/* + KIWAKU - Containers Well Made + Copyright : KIWAKU Contributors & Maintainers + SPDX-License-Identifier: BSL-1.0 +*/ +//====================================================================================================================== + +// Exécution GPU SYCL : +// icpx -fsycl -fsycl-targets=nvptx64-nvidia-cuda --cuda-path=/usr/local/cuda reduce.cpp -o e -O3 -std=c++20 -I/mnt/chaton/kiwaku/include && ./e + +// Exécution CPU SYCL : +// icpx -fsycl reduce.cpp -o e -O3 -std=c++20 -I/mnt/chaton/kiwaku/include && ./e + + +#include +#include +#include + +#include +#include +#include "../utils/utils.hpp" + +#define HEAVY true +#define INCREASE_SIZE_COUNT 20 +#define REPEAT_ITERATION_COUNT 6 +// #define ARRAY_LENGTH 102400000UL +#define ARRAY_LENGTH 25600000UL + +std::string make_prefix() +{ + return get_computer_name() + "_" + (HEAVY?"heavy":"copy") + + "_isc" + std::to_string(INCREASE_SIZE_COUNT) + + "_rp" + std::to_string(REPEAT_ITERATION_COUNT) + + "_al" + std::to_string(ARRAY_LENGTH) + + "_"; +} + +#if HEAVY + // auto global_fct = [](auto init, auto x) + // { + // return init + 1 / ((1 - std::atan(x)) + // + (1 + // / (2 + // + std::atan( 5 / (3 + std::atan(std::atan(std::atan(std::atan(x))))))))); + // }; + auto global_fct = [](auto init, auto x) + { + return init + x/2; + }; +#else + auto global_fct = [](auto init, auto x) { return init + x; }; // 1.0/(1.0+e) +#endif + + +namespace bench +{ + struct result_item + { + result_item(std::size_t witem_count_) : witem_count(witem_count_) {} + std::size_t witem_count; + std::vector host_alloc, copies_and_kernel, check; + // Nouvelle allocation mémoire hôte à chaque fois pour éviter que le compilo n'optimise les calculs + }; + + struct result_vector + { + // Elapsed time in milliseconds + std::vector items; + + void clear() { items.clear(); } + + void write_to_gfile() + { + // array size + // h1 h2 h3 ... + // c1 c2 c3 ... + // k1 k2 k3 ... + // array size + // ... + for (result_item& item : items) + { + write_f << item.witem_count << "\n"; + for (std::size_t i2 = 0; i2 < item.check.size(); ++i2) { + write_f << item.host_alloc[i2] << " "; + } + write_f << "\n"; + for (std::size_t i2 = 0; i2 < item.check.size(); ++i2) { + write_f << item.copies_and_kernel[i2] << " "; + } + write_f << "\n"; + for (std::size_t i2 = 0; i2 < item.check.size(); ++i2) { + write_f << item.check[i2] << " "; + } + write_f << "\n"; + } + } + }; +} + +void timer(std::vector& v, bench::chrono_t& c) +{ + v.push_back(c.ElapsedTimeMSReset()); +} + +void check_tolerance(auto v1, auto v2) +{ + const double ERROR_TOLERANCE = 0.001; + double diff = std::abs(v1 - v2); + double moy = std::abs(v1 + v2) / 2; + std::cout << " check_tolerance: diff(" << diff << ") moy(" << moy << ")\n"; + assert(diff < moy * ERROR_TOLERANCE); +} + +template +bench::result_item main_bench(Context& ctx, std::size_t witem_count, std::size_t repeat_count) +{ + std::size_t array_length = ARRAY_LENGTH; + bench::result_item ritem{witem_count}; + std::cout << "main_bench - len(" << array_length << ") - witem_count(" << witem_count << ")\n"; + + for (std::size_t r = 0; r < repeat_count; ++r) + { + bench::chrono_t chrono; chrono.Init(); + + double* in_array = new double[array_length]; + for (std::size_t i = 0; i < array_length; ++i) { in_array[i] = i; } + + auto in_view = kwk::view{kwk::source = in_array , kwk::of_size(array_length)}; + timer(ritem.host_alloc, chrono); + double res_device = ::kwk::reduce(ctx, in_view, global_fct, double{0}); + timer(ritem.copies_and_kernel, chrono); + + double res_verif = 0; + for (std::size_t i = 0; i < array_length; ++i) { res_verif = global_fct(res_verif, in_array[i]); } + std::cout << " main_bench len(" << array_length << ") res_device(" << res_device << ") res_verif(" << res_verif << ")\n"; + check_tolerance(res_verif, res_device); + timer(ritem.check, chrono); + delete[] in_array; + } + return ritem; +}; + + +bench::result_item main_bench_cpu_native(std::size_t witem_count, std::size_t repeat_count) +{ + std::size_t array_length = ARRAY_LENGTH; + bench::result_item ritem{witem_count}; + std::cout << "main_bench_cpu_native - len(" << array_length << ") - witem_count(" << witem_count << ")\n"; + + for (std::size_t r = 0; r < repeat_count; ++r) + { + bench::chrono_t chrono; chrono.Init(); + + double* in_array = new double[array_length]; + for (std::size_t i = 0; i < array_length; ++i) { in_array[i] = i; } + + timer(ritem.host_alloc, chrono); + double res_device = 0; + for (std::size_t i = 0; i < array_length; ++i) { res_device = global_fct(res_device, in_array[i]); } + timer(ritem.copies_and_kernel, chrono); + + double res_verif = 0; + for (std::size_t i = 0; i < array_length; ++i) { res_verif = global_fct(res_verif, in_array[i]); } + check_tolerance(res_verif, res_device); + timer(ritem.check, chrono); + delete[] in_array; + // std::cout << " main_bench_cpu_native - res_device(" << res_device << ") res_verif(" << res_verif << ")\n"; + } + return ritem; +}; + + + +int main(int argc, char* argv[]) +{ + std::string prefix = make_prefix(); + std::size_t file_version = 2; + std::vector workitem_size_vect; + std::size_t val = 10; + // for (std::size_t i = 0; i < INCREASE_SIZE_COUNT; ++i) + // { + // workitem_size_vect.push_back(val); + // val *= 2; + // } + workitem_size_vect.push_back(ARRAY_LENGTH/2); + + array_printer_t ap; + bench::result_vector rvect; + ap.add({"device", "alloc_host", "cpy+ker", "check"}); + + // sycl_context_cpu when compiled with "-fsycl" + // sycl_context_gpu when compiled with "-fsycl -fsycl-targets=nvptx64-nvidia-cuda --cuda-path=/usr/local/cuda" + write_f.open(prefix + "reduce_sycl_context_cpu" + ".txt"); + write_f << file_version << "\n"; + rvect.clear(); + ap.add({"SYCL", "-", "-", "-"}); + for (std::size_t size : workitem_size_vect) + { + kwk::sycl::default_context.set_workitem_count(size); + ap.add({"size", std::to_string(size), "-", "-"}); + bench::result_item r = main_bench(kwk::sycl::default_context, size, REPEAT_ITERATION_COUNT); + rvect.items.push_back(r); + for (std::size_t i = 0; i < REPEAT_ITERATION_COUNT; ++i) + { + ap.add({"", std::to_string(r.host_alloc[i]), std::to_string(r.copies_and_kernel[i]), std::to_string(r.check[i])}); + } + } + rvect.write_to_gfile(); + write_f.close(); + + + + // write_f.open(prefix + "reduce_cpu_context.txt"); + // write_f << file_version << "\n"; + // rvect.clear(); + // ap.add({"CPU", "-", "-", "-"}); + // for (std::size_t size : workitem_size_vect) + // { + // ap.add({"size", std::to_string(size), "-", "-"}); + // bench::result_item r = main_bench(kwk::cpu, size, REPEAT_ITERATION_COUNT); + // rvect.items.push_back(r); + // for (std::size_t i = 0; i < REPEAT_ITERATION_COUNT; ++i) + // { + // ap.add({"", std::to_string(r.host_alloc[i]), std::to_string(r.copies_and_kernel[i]), std::to_string(r.check[i])}); + // } + // } + // rvect.write_to_gfile(); + // write_f.close(); + + + + // write_f.open(prefix + "reduce_cpu_native.txt"); + // write_f << file_version << "\n"; + // rvect.clear(); + // ap.add({"CPU-NATIVE", "-", "-", "-"}); + // for (std::size_t size : workitem_size_vect) + // { + // ap.add({"size", std::to_string(size), "-", "-"}); + // bench::result_item r = main_bench_cpu_native(size, REPEAT_ITERATION_COUNT); + // rvect.items.push_back(r); + // for (std::size_t i = 0; i < REPEAT_ITERATION_COUNT; ++i) + // { + // ap.add({"", std::to_string(r.host_alloc[i]), std::to_string(r.copies_and_kernel[i]), std::to_string(r.check[i])}); + // } + // } + // rvect.write_to_gfile(); + // write_f.close(); + + ap.print(); +} + +// On fait donc : +// test/algorithm/algos/ +// et des test/algorithm/algos/cpu/ sycl/ etc diff --git a/test/sycl_sandbox/bench/transform.cpp b/test/sycl_sandbox/bench/transform.cpp index 12ac0bbc..294033f8 100644 --- a/test/sycl_sandbox/bench/transform.cpp +++ b/test/sycl_sandbox/bench/transform.cpp @@ -10,9 +10,18 @@ // Exécution GPU SYCL : // icpx -fsycl -fsycl-targets=nvptx64-nvidia-cuda --cuda-path=/usr/local/cuda transform.cpp -o e -O3 -std=c++20 -I/mnt/chaton/kiwaku/include && ./e +// LEGEND: +// icpx -fsycl -fsycl-targets=nvptx64-nvidia-cuda --cuda-path=/opt/cuda transform.cpp -o e -O3 -std=c++20 -I/home/sylvainj/sshmount/kiwaku/include && ./e // Exécution CPU SYCL : // icpx -fsycl transform.cpp -o e -O3 -std=c++20 -I/mnt/chaton/kiwaku/include && ./e +// Sur LEGEND: +// icpx -fsycl transform.cpp -o e -O3 -std=c++20 -I/home/sylvainj/sshmount/kiwaku/include && ./e + + +// LEGEND: Run both x86_64 cpu and Nvidia GPU +// icpx -fsycl -fsycl-targets=nvptx64-nvidia-cuda,x86_64 --cuda-path=/opt/cuda transform.cpp -o e -O3 -std=c++20 -I/home/sylvainj/sshmount/kiwaku/include && ./e + #include @@ -24,13 +33,46 @@ #include "../utils/utils.hpp" #define HEAVY true -#define INCREASE_SIZE_COUNT 7 -#define REPEAT_ITERATION_COUNT 5 +#define INCREASE_SIZE_COUNT 1 +// #define INCREASE_SIZE_COUNT 6 +// #define INCREASE_SIZE_COUNT 1 +#define REPEAT_ITERATION_COUNT 7 + +// bool G_HEAVY = false; + +std::string make_prefix() +{ + return "transform4_" + + get_computer_name() + "_" + (HEAVY?"heavy":"copy") + + "_isc" + std::to_string(INCREASE_SIZE_COUNT) + + "_rp" + std::to_string(REPEAT_ITERATION_COUNT) + // + "_ndr" + std::to_string(ND_RANGE_LOCAL) + + "_"; +} + +// auto get_function() +// { +// if (G_HEAVY) +// { +// auto fct = [](auto x) +// { +// return 1 / ((1 - std::atan(x)) +// + (1 +// / (2 +// + std::atan( 5 / (3 + std::atan(std::atan(std::atan(std::atan(x))))))))); +// }; +// return fct; +// } +// else +// { +// return [](auto e) { return e; }; +// } +// } #if HEAVY - std::string make_suffix() { - return std::string{"_heavy_"} + get_computer_name(); - } + // std::string make_suffix() { + // return std::string{"_heavy_"} + get_computer_name(); + // } auto global_fct = [](auto x) { @@ -40,9 +82,9 @@ + std::atan( 5 / (3 + std::atan(std::atan(std::atan(std::atan(x))))))))); }; #else - std::string make_suffix() { - return std::string{"_copy_"} + get_computer_name(); - } + // std::string make_suffix() { + // return std::string{"_copy_"} + get_computer_name(); + // } auto global_fct = [](auto e) { return e; }; // 1.0/(1.0+e) #endif @@ -53,6 +95,7 @@ namespace bench { result_item(std::size_t array_size_) : array_size(array_size_) {} std::size_t array_size; + // std::size_t nd_range_size; std::vector host_alloc, copies_and_kernel, check; // Nouvelle allocation mémoire hôte à chaque fois pour éviter que le compilo n'optimise les calculs }; @@ -75,6 +118,7 @@ namespace bench for (result_item& item : items) { write_f << item.array_size << "\n"; + // write_f << item.nd_range_size << "\n"; for (std::size_t i2 = 0; i2 < item.check.size(); ++i2) { write_f << item.host_alloc[i2] << " "; } @@ -102,8 +146,10 @@ bench::result_item main_bench(Context& ctx, std::size_t array_length, std::size_ // - bench::result_item ritem{array_length}; + // ritem.nd_range_size = ND_RANGE_LOCAL; - std::cout << "main_bench - len(" << array_length << ")\n"; + // std::cout << "main_bench - array_length(" << array_length << ") - nd_range_size(" << ND_RANGE_LOCAL << ")\n"; + std::cout << "main_bench - array_length(" << array_length << ")\n"; const double ERROR_TOLERANCE = 0.001; @@ -182,49 +228,120 @@ bench::result_item main_bench_cpu_naive(std::size_t array_length, std::size_t re int main(int argc, char* argv[]) { - std::string suffix = make_suffix(); + std::string prefix = make_prefix(); std::size_t file_version = 2; std::vector array_size_vect; - std::size_t val = 1600000; + std::size_t val = 12800000 * 128; // 32 -> 128 for (std::size_t i = 0; i < INCREASE_SIZE_COUNT; ++i) { array_size_vect.push_back(val); val *= 2; } + // std::vector nd_range_local_size; + // for (std::size_t val = 8; val <= 1024; val *= 2) + // { + // nd_range_local_size.push_back(val); + // } + + std::size_t array_size = array_size_vect[0]; + array_printer_t ap; bench::result_vector rvect; ap.add({"device", "alloc_host", "cpy+ker", "check"}); - // sycl_context_cpu when compiled with "-fsycl" - // sycl_context_gpu when compiled with "-fsycl -fsycl-targets=nvptx64-nvidia-cuda --cuda-path=/usr/local/cuda" - write_f.open("sycl_context_cpu" + suffix + ".txt"); - write_f << file_version << "\n"; - rvect.clear(); - ap.add({"SYCL", "-", "-", "-"}); - for (std::size_t size : array_size_vect) - { - ap.add({"size", std::to_string(size), "-", "-"}); - bench::result_item r = main_bench(kwk::sycl::default_context, size, REPEAT_ITERATION_COUNT); - rvect.items.push_back(r); - for (std::size_t i = 0; i < REPEAT_ITERATION_COUNT; ++i) - { - ap.add({"", std::to_string(r.host_alloc[i]), std::to_string(r.copies_and_kernel[i]), std::to_string(r.check[i])}); - } - } - rvect.write_to_gfile(); - write_f.close(); + kwk::sycl::context sycl_gpu_ctx{::sycl::gpu_selector_v}; + kwk::sycl::context sycl_cpu_ctx{::sycl::cpu_selector_v}; + + // // sycl_context_cpu when compiled with "-fsycl" + // // sycl_context_gpu when compiled with "-fsycl -fsycl-targets=nvptx64-nvidia-cuda --cuda-path=/usr/local/cuda" + // write_f.open(prefix + "sycl_gpu_ndr.txt"); + // write_f << file_version << "\n"; + // rvect.clear(); + // ap.add({"SYCL", "-", "-", "-"}); + // for (std::size_t ndr_size : nd_range_local_size) + // { + // ND_RANGE_LOCAL = ndr_size; + // ap.add({"array_size", std::to_string(array_size), "-", "-"}); + // ap.add({"ndr_size", std::to_string(ndr_size), "-", "-"}); + // bench::result_item r = main_bench(sycl_gpu_ctx, array_size, REPEAT_ITERATION_COUNT); // kwk::sycl::default_context + // rvect.items.push_back(r); + // for (std::size_t i = 0; i < REPEAT_ITERATION_COUNT; ++i) + // { + // ap.add({"", std::to_string(r.host_alloc[i]), std::to_string(r.copies_and_kernel[i]), std::to_string(r.check[i])}); + // } + // } + // rvect.write_to_gfile(); + // write_f.close(); + + // write_f.open(prefix + "sycl_cpu_ndr.txt"); + // write_f << file_version << "\n"; + // rvect.clear(); + // ap.add({"SYCL", "-", "-", "-"}); + // for (std::size_t ndr_size : nd_range_local_size) + // { + // ND_RANGE_LOCAL = ndr_size; + // ap.add({"array_size", std::to_string(array_size), "-", "-"}); + // ap.add({"ndr_size", std::to_string(ndr_size), "-", "-"}); + // bench::result_item r = main_bench(sycl_cpu_ctx, array_size, REPEAT_ITERATION_COUNT); + // rvect.items.push_back(r); + // for (std::size_t i = 0; i < REPEAT_ITERATION_COUNT; ++i) + // { + // ap.add({"", std::to_string(r.host_alloc[i]), std::to_string(r.copies_and_kernel[i]), std::to_string(r.check[i])}); + // } + // } + // rvect.write_to_gfile(); + // write_f.close(); + + // write_f.open(prefix + "cpu_context_ndr.txt"); + // write_f << file_version << "\n"; + // rvect.clear(); + // ap.add({"CPU", "-", "-", "-"}); + // for (std::size_t ndr_size : nd_range_local_size) + // { + // ND_RANGE_LOCAL = ndr_size; + // ap.add({"array_size", std::to_string(array_size), "-", "-"}); + // ap.add({"ndr_size", std::to_string(ndr_size), "-", "-"}); + // bench::result_item r = main_bench(kwk::cpu, array_size, REPEAT_ITERATION_COUNT); + // rvect.items.push_back(r); + // for (std::size_t i = 0; i < REPEAT_ITERATION_COUNT; ++i) + // { + // ap.add({"", std::to_string(r.host_alloc[i]), std::to_string(r.copies_and_kernel[i]), std::to_string(r.check[i])}); + // } + // } + // rvect.write_to_gfile(); + // write_f.close(); - write_f.open("cpu_context" + suffix + ".txt"); + // sycl_context_cpu when compiled with "-fsycl" + // sycl_context_gpu when compiled with "-fsycl -fsycl-targets=nvptx64-nvidia-cuda --cuda-path=/usr/local/cuda" + // write_f.open(prefix + "sycl_gpu.txt"); + // write_f << file_version << "\n"; + // rvect.clear(); + // ap.add({"SYCL", "-", "-", "-"}); + // for (std::size_t size : array_size_vect) + // { + // ap.add({"size", std::to_string(size), "-", "-"}); + // bench::result_item r = main_bench(sycl_gpu_ctx, size, REPEAT_ITERATION_COUNT); // kwk::sycl::default_context + // rvect.items.push_back(r); + // for (std::size_t i = 0; i < REPEAT_ITERATION_COUNT; ++i) + // { + // ap.add({"", std::to_string(r.host_alloc[i]), std::to_string(r.copies_and_kernel[i]), std::to_string(r.check[i])}); + // } + // } + // rvect.write_to_gfile(); + // write_f.close(); + + + write_f.open(prefix + "sycl_cpu4.txt"); write_f << file_version << "\n"; rvect.clear(); - ap.add({"CPU", "-", "-", "-"}); + ap.add({"SYCL", "-", "-", "-"}); for (std::size_t size : array_size_vect) { ap.add({"size", std::to_string(size), "-", "-"}); - bench::result_item r = main_bench(kwk::cpu, size, REPEAT_ITERATION_COUNT); + bench::result_item r = main_bench(sycl_cpu_ctx, size, REPEAT_ITERATION_COUNT); rvect.items.push_back(r); for (std::size_t i = 0; i < REPEAT_ITERATION_COUNT; ++i) { @@ -234,24 +351,44 @@ int main(int argc, char* argv[]) rvect.write_to_gfile(); write_f.close(); - - - write_f.open("cpu_native" + suffix + ".txt"); - write_f << file_version << "\n"; - rvect.clear(); - ap.add({"CPU-NATIVE", "-", "-", "-"}); - for (std::size_t size : array_size_vect) - { - ap.add({"size", std::to_string(size), "-", "-"}); - bench::result_item r = main_bench_cpu_naive(size, REPEAT_ITERATION_COUNT); - rvect.items.push_back(r); - for (std::size_t i = 0; i < REPEAT_ITERATION_COUNT; ++i) - { - ap.add({"", std::to_string(r.host_alloc[i]), std::to_string(r.copies_and_kernel[i]), std::to_string(r.check[i])}); - } - } - rvect.write_to_gfile(); - write_f.close(); + // write_f.open(prefix + "cpu_context.txt"); + // write_f << file_version << "\n"; + // rvect.clear(); + // ap.add({"CPU", "-", "-", "-"}); + // for (std::size_t size : array_size_vect) + // { + // ap.add({"size", std::to_string(size), "-", "-"}); + // bench::result_item r = main_bench(kwk::cpu, size, REPEAT_ITERATION_COUNT); + // rvect.items.push_back(r); + // for (std::size_t i = 0; i < REPEAT_ITERATION_COUNT; ++i) + // { + // ap.add({"", std::to_string(r.host_alloc[i]), std::to_string(r.copies_and_kernel[i]), std::to_string(r.check[i])}); + // } + // } + // rvect.write_to_gfile(); + // write_f.close(); + + + + + + + // write_f.open(prefix + "cpu_native.txt"); + // write_f << file_version << "\n"; + // rvect.clear(); + // ap.add({"CPU-NATIVE", "-", "-", "-"}); + // for (std::size_t size : array_size_vect) + // { + // ap.add({"size", std::to_string(size), "-", "-"}); + // bench::result_item r = main_bench_cpu_naive(size, REPEAT_ITERATION_COUNT); + // rvect.items.push_back(r); + // for (std::size_t i = 0; i < REPEAT_ITERATION_COUNT; ++i) + // { + // ap.add({"", std::to_string(r.host_alloc[i]), std::to_string(r.copies_and_kernel[i]), std::to_string(r.check[i])}); + // } + // } + // rvect.write_to_gfile(); + // write_f.close(); ap.print(); } diff --git a/test/sycl_sandbox/plot/archives/plot_diff_2023-05-01 copy.py b/test/sycl_sandbox/plot/archives/plot_diff_2023-05-01 copy.py new file mode 100644 index 00000000..82115a64 --- /dev/null +++ b/test/sycl_sandbox/plot/archives/plot_diff_2023-05-01 copy.py @@ -0,0 +1,305 @@ +#!/usr/bin/python + +# Hello world python program + +# Lancer le script : python3 ./2022-02-08_sparseccl.py + +import matplotlib.pyplot as plt +import numpy as np +import statistics as stat +import sys +import math +import plot_utils as pu + +# Plots diff between Lorentz standalone and Kiwaku, omitting Covfie + + +VERSION_ATTENDUE = 2 + +use_acts_field = True + +plot_diff = True # Plot values relative to covfie, or plot absolute values + +plot_cycles_instead_of_time = False + +per_particle_values = True # Divide the values by the number of particles and steps + + +computer_name = "blop" # aussi utilisé pour le nom du fichier d'entrée +str_min_iteration_count = "10" # added to plot title only + + +# ============= Gestion de la taille +my_dpi = 96 +# output_image_name = "no_name" +field_type_name = "" # InterpolateNN_LayoutStride + +if use_acts_field: + field_type_name = "acts-field" +else: + field_type_name = "constant-field" + +output_image_name = field_type_name # InterpolateNN_LayoutStride +output_image_ver = "v1" + + +image_ratio = 880 / 480 + +image_width = 1280 # 1280 +image_height = 0 +image_scale_factor = image_width / 640 +line_width = image_scale_factor * 1.5 +image_height = image_width / image_ratio #(image_width / 640) * 480 + +plt.figure(figsize=(image_width/my_dpi, image_height/my_dpi) , dpi=my_dpi) +# output_image_name = "lorentz_compare_" + computer_name + "_" + output_image_name + ".png" +output_image_name = "lorentz_compare_" + computer_name + "_" + output_image_name + ".png" + +MY_SIZE = (10 * image_scale_factor) +MY_SIZE_SMALL = (6 * image_scale_factor) +TITLE_SIZE = (12 * image_scale_factor) + +#plt.rc('font', size=MY_SIZE) # controls default text sizes +plt.rc('axes', titlesize=TITLE_SIZE) # fontsize of the axes title +plt.rc('axes', labelsize=MY_SIZE) # fontsize of the x and y labels +plt.rc('xtick', labelsize=MY_SIZE) # fontsize of the tick labels +plt.rc('ytick', labelsize=MY_SIZE) # fontsize of the tick labels +plt.rc('legend', fontsize=MY_SIZE) # legend fontsize +#plt.rc('figure', titlesize=MY_SIZE) # fontsize of the figure title +# plt.rcParams.update({'font.size': MY_SIZE}) + +# fin gestion de la taille ============= + +# Ici, une courbe n'est qu'une suite toute basique de points + +# lorentz_external_v2_fname = "build/lorentz-euler_v2.txt" + +def remove_newline(list): + if (len(list) == 1) and (list[0] == "\n"): + del list[0] + # print("remove_newline newsize = " + str(len(list))) + else: + if (len(list) != 0): + list[len(list)-1] = list[len(list)-1].rstrip("\n") + return list + +def list_str_to_int(list, divide_by = None): + # print("list_str_to_int len = " + str(len(list)) + " 1st value = "+ str(list[0])) + if len(list) == 0: + list.append(0) + else: + if divide_by == None: + list = [int(i) for i in list] + else: + list = [int(i)/divide_by for i in list] + return list + +# Charge le fichier de bench "path" et retourne la liste de ce qui a été lu. +def load_file(path): + global VERSION_ATTENDUE + bench_list = [] + + with open(path) as fp: + version = fp.readline() # version du fichier actuel (doit être 106 et non plus 105) + print("Version du fichier : {}".format(version)) + + if (int(version) != VERSION_ATTENDUE): + sys.exit("ERREUR, VERSION DU FICHIER NON COMPATIBLE : " + str(int(version)) + ". VERSION ATTENDUE = " + str(VERSION_ATTENDUE)) + + line = fp.readline() + + while line: + words = remove_newline(line.split(" ")) + # words[len(words)-1] = words[len(words)-1].rstrip("\n") + # words.remove('\n') + # print(words) + + # Autant de fois qu'il y a d'évènements (nouvelle ligne) : + # v1: + # particles | steps | imom | repeat_count | elapsed_time | check_string + + # v2: + # particles | steps | imom | repeat_count | check_string + # elapsed_time1 | ... | elapsed_timeN + + header = {} # dictionnaire vide + header["particles"] = int(words[0]) + header["steps"] = int(words[1]) + header["imom"] = int(words[2]) + header["repeat_count"] = int(words[3]) + header["check_string"] = words[4] + header["ptotal"] = header["particles"] * header["steps"] + + # times_l = [] + # cycles_l = [] + divide_factor = 1 + if per_particle_values: + if plot_cycles_instead_of_time: + divide_factor = header["ptotal"] + else: + divide_factor = header["ptotal"] / 1000 + else: + if plot_cycles_instead_of_time: + divide_factor = 1e9 + else: + divide_factor = 1e6 + + times_int = pu.filter_outliers(list_str_to_int(remove_newline(fp.readline().split(" ")), divide_factor)) + cycles_int = pu.filter_outliers(list_str_to_int(remove_newline(fp.readline().split(" ")), divide_factor)) + + header["times_l"] = times_int + header["cycles_l"] = cycles_int + + header["elapsed_time"] = stat.median(times_int) + header["cycle"] = stat.median(cycles_int) + + + # print(header["times_l"]) + + bench_list.append(header) + + # Lecture de la prochaine ligne + line = fp.readline() + + return bench_list + + + + +# Constant field: blop-debian11 +# lorentz_covfie_list = load_file("data/lorentz-euler_covfie_blop-debian11_2023-04-17_4096-65536_1024-2048_128-131072_constant-field.txt") +# lorentz_standalone_list = load_file("data/lorentz_standalone_blop-debian11_2023-04-16_4096-65536_1024-2048_128-131072_constant-field.txt") +# lorentz_kiwaku_list = load_file("data/lorentz_kiwaku_blop-debian11_2023-04-16_4096-65536_1024-2048_128-131072_constant-field.txt") + +if use_acts_field: + + lorentz_standalone_list = load_file("data/d9_blop/lorentz_opti_sclock_it10_standalone_blop-debian11_512-65536_1024_128_acts-field.txt") + lorentz_kiwaku_list = load_file("data/d9_blop/lorentz_opti_sclock_it10_kiwaku_blop-debian11_512-65536_1024_128_acts-field.txt") + + + +# TEMPORARILY DISABLE ERROR CHECKING +# pu.check_same_results(lorentz_standalone_list, lorentz_kiwaku_list) +# Error with lorentz_kiwaku_list, so it seems. +# pu.check_same_results(lorentz_standalone_list, lorentz_covfie_list) + +# external_1D = make_1D_list(lorentz_external_list, "elapsed_time") + +# ptotal_1D = pu.make_1D_list_every_1line(lorentz_standalone_list, "particles", "steps", 1) +ptotal_1D = pu.make_1D_list_every_1line(lorentz_standalone_list, "particles", 1) + +def make_violin_plot_list(bench_list, keyword, compare_to_list = None): + l2_res = [] + + for header in bench_list: + l2_res.append(header[keyword].copy()) + + if compare_to_list != None: + l2_compare = [] + + for header in compare_to_list: + l2_compare.append(header[keyword]) # <- ! not a copy ! + + for il in range(0, len(l2_res)): + l1_res = l2_res[il] + l1_compare = l2_compare[il] + compare_value = stat.median(l1_compare) + + for i in range(0, len(l1_res)): + old = l1_res[i] + l1_res[i] = 100 * l1_res[i] / compare_value # l1_compare[i] + l2_res[il] = l1_res + + return l2_res + + +compare_to_list = None +if plot_diff: + compare_to_list = lorentz_standalone_list + +if plot_cycles_instead_of_time: + violin_keyword = "cycles_l" + plot_keyword = "cycle" +else: + violin_keyword = "times_l" + plot_keyword = "elapsed_time" + +# pu.draw_violin_plot("grey", make_violin_plot_list(lorentz_standalone_list, violin_keyword, compare_to_list)) +# pu.draw_violin_plot("blue", make_violin_plot_list(lorentz_kiwaku_list, violin_keyword, compare_to_list)) + + +if plot_diff: + ldiff0 = pu.make_diff_list(compare_to_list, lorentz_standalone_list, plot_keyword) + ldiff2 = pu.make_diff_list(compare_to_list, lorentz_kiwaku_list, plot_keyword) +else: + ldiff0 = pu.make_absolute_list(lorentz_standalone_list, plot_keyword) + ldiff2 = pu.make_absolute_list(lorentz_kiwaku_list, plot_keyword) + +# (opti matrix) +plt.plot (range(1, len(ldiff0)+1), ldiff0, color="grey", label="Standalone", linestyle="dashdot", linewidth=line_width) +plt.plot (range(1, len(ldiff2)+1), ldiff2, color="blue", label="Kiwaku", linestyle="solid", linewidth=line_width) + + +plt.rcParams['grid.linestyle'] = "-" +plt.rcParams['grid.alpha'] = 0.15 +plt.rcParams['grid.color'] = "black" ##cccccc +plt.grid(linewidth=line_width/20) + +plt.xticks(range(1, len(ldiff0)+1), ptotal_1D) + +unit_name = "" +if plot_diff: + unit_name = "relative " +else: + unit_name = "" + +if plot_cycles_instead_of_time: + unit_name += "cpu cycles" +else: + unit_name += "time" + +plt.title("Lorentz-Euler - " + computer_name + " - " + field_type_name + " - " + unit_name + " - iter/pt: " + str_min_iteration_count) + +pdiff_str = "" +if plot_diff: + pdiff_str = "diff" +else: + pdiff_str = "absolute" + +unit_name_file = unit_name.replace(" ", "_") +if per_particle_values: + unit_name_file += "_per-particle" +out_fname = "2023-05-01_" + computer_name + "_" + field_type_name + "_" + unit_name_file + "_" + pdiff_str + ".png" + + +# plt.ylabel('Elapsed time (µs)') +if plot_diff: + plt.ylabel('Relative performance (%), lower is better') +else: + if per_particle_values: + if plot_cycles_instead_of_time: + plt.ylabel('CPU cycles per (particle*steps)') + else: + plt.ylabel('Elapsed time (nanoseconds) per (particle*steps)') + else: + if plot_cycles_instead_of_time: + plt.ylabel('CPU cycles in billions (1e9)') + else: + plt.ylabel('Elapsed time in seconds') + + + + +plt.xlabel('Number of particles') +# Ascending number of particles (and imom), in millions (10^6) +#plt.ylim([-5, 100]) +plt.legend() +# global_drawn_x_variables_number+1 +# plt.xticks(range(1, 6), x_list_curve_drawn) # = x_list_shared et x_list_acc + +plt.ylim([0, 113]) + +plt.savefig(out_fname, format='png') #, dpi=my_dpi) + +plt.show() +print ("Hello World!") diff --git a/test/sycl_sandbox/plot/archives/plot_distribution.py b/test/sycl_sandbox/plot/archives/plot_distribution.py new file mode 100644 index 00000000..05e7bc78 --- /dev/null +++ b/test/sycl_sandbox/plot/archives/plot_distribution.py @@ -0,0 +1,200 @@ +#!/usr/bin/python + +# Hello world python program + +# Lancer le script : python3 ./2022-02-08_sparseccl.py + +import matplotlib.pyplot as plt +import numpy as np +import statistics as stat +import sys +import math +import plot_utils as pu + +# Plots diff between Lorentz standalone and Kiwaku, omitting Covfie + + +VERSION_ATTENDUE = 2 + +computer_name = "blop" # aussi utilisé pour le nom du fichier d'entrée +str_min_iteration_count = "10" # added to plot title only + + +# ============= Gestion de la taille +my_dpi = 96 +output_image_name = "no_name" +field_type_name = "" # InterpolateNN_LayoutStride + +image_ratio = 880 / 480 + +image_width = 1280 # 1280 +image_height = 0 +image_scale_factor = image_width / 640 +line_width = image_scale_factor * 1.5 +image_height = image_width / image_ratio #(image_width / 640) * 480 + +plt.figure(figsize=(image_width/my_dpi, image_height/my_dpi) , dpi=my_dpi) +# output_image_name = "lorentz_compare_" + computer_name + "_" + output_image_name + ".png" +output_image_name = "lorentz_compare_" + computer_name + "_" + output_image_name + ".png" + +MY_SIZE = (10 * image_scale_factor) +MY_SIZE_SMALL = (6 * image_scale_factor) +TITLE_SIZE = (12 * image_scale_factor) + +#plt.rc('font', size=MY_SIZE) # controls default text sizes +plt.rc('axes', titlesize=TITLE_SIZE) # fontsize of the axes title +plt.rc('axes', labelsize=MY_SIZE) # fontsize of the x and y labels +plt.rc('xtick', labelsize=MY_SIZE) # fontsize of the tick labels +plt.rc('ytick', labelsize=MY_SIZE) # fontsize of the tick labels +plt.rc('legend', fontsize=MY_SIZE) # legend fontsize +#plt.rc('figure', titlesize=MY_SIZE) # fontsize of the figure title +# plt.rcParams.update({'font.size': MY_SIZE}) + +# fin gestion de la taille ============= + +# Ici, une courbe n'est qu'une suite toute basique de points + +# lorentz_external_v2_fname = "build/lorentz-euler_v2.txt" + +def remove_newline(list): + if (len(list) == 1) and (list[0] == "\n"): + del list[0] + # print("remove_newline newsize = " + str(len(list))) + else: + if (len(list) != 0): + list[len(list)-1] = list[len(list)-1].rstrip("\n") + return list + +def remove_empty_words(list): + if (len(list) == 0): + return list + + ri = 0 + for i in range(0, len(list)): + if (list[ri] == ''): + del list[ri] + else: + ri += 1 + + if (len(list) == 1) and (list[0] == "\n"): + del list[0] + # print("remove_newline newsize = " + str(len(list))) + else: + if (len(list) != 0): + list[len(list)-1] = list[len(list)-1].rstrip("\n") + return list + +def list_str_to_int(list, divide_by = None): + print("list_str_to_int len = " + str(len(list)) + " 1st value = "+ str(list[0]) + "values:") + print(list) + if len(list) == 0: + list.append(0) + else: + if divide_by == None: + list = [int(i) for i in list] + else: + list = [int(i)/divide_by for i in list] + return list + +# Charge le fichier de bench "path" et retourne la liste de ce qui a été lu. +def load_file(path): + global VERSION_ATTENDUE + bench_list = {} + + with open(path) as fp: + version = fp.readline() # version du fichier actuel (doit être 106 et non plus 105) + print("Version du fichier : {}".format(version)) + + if (int(version) != VERSION_ATTENDUE): + sys.exit("ERREUR, VERSION DU FICHIER NON COMPATIBLE : " + str(int(version)) + ". VERSION ATTENDUE = " + str(VERSION_ATTENDUE)) + + divide_factor = 1 + + bench_list["raw_host_alloc"] = list_str_to_int(remove_empty_words(remove_newline(fp.readline().split(" "))), divide_factor) + bench_list["raw_copy_and_kernel"] = list_str_to_int(remove_empty_words(remove_newline(fp.readline().split(" "))), divide_factor) + bench_list["raw_checksum"] = list_str_to_int(remove_empty_words(remove_newline(fp.readline().split(" "))), divide_factor) + + bench_list["no-outier_host_alloc"] = pu.filter_outliers(bench_list["raw_host_alloc"] ) + bench_list["no-outier_copy_and_kernel"] = pu.filter_outliers(bench_list["raw_copy_and_kernel"]) + bench_list["no-outier_checksum"] = pu.filter_outliers(bench_list["raw_checksum"] ) + + bench_list["med_host_alloc"] = stat.median(bench_list["raw_host_alloc"] ) + bench_list["med_copy_and_kernel"] = stat.median(bench_list["raw_copy_and_kernel"]) + bench_list["med_checksum"] = stat.median(bench_list["raw_checksum"] ) + + + return bench_list + +# dpcpp kwk_axpy_bench.cpp -o e.exe -O3 -std=c++20 -I/home/data_sync/academique/These/kiwaku_2023/kiwaku/include/ && ./e.exe +# python3 plot_distribution.py + +lcpu = load_file("kwk_buffers_cpu.txt") +lomp = load_file("kwk_buffers_omp.txt") +lsycl = load_file("kwk_buffers_sycl.txt") + +values1 = [lcpu["med_host_alloc"], lomp["med_host_alloc"], lsycl["med_host_alloc"] ] +values2 = [lcpu["med_copy_and_kernel"], lomp["med_copy_and_kernel"], lsycl["med_copy_and_kernel"] ] +values3 = [lcpu["med_checksum"], lomp["med_checksum"], lsycl["med_checksum"] ] +# values2 = [med1_copy_and_kernel, med2_copy_and_kernel, med3_copy_and_kernel] +# values3 = [med1_checksum, med2_checksum, med3_checksum ] + + +# Data +# groups = ['G1', 'G2', 'G3', 'G4', 'G5'] +# values1 = [12, 19, 14, 27, 16] +# values2 = [21, 30, 15, 17, 20] + +# fig, ax = plt.subplots() + +color_host_alloc = "blue" +color_kernel = "red" +color_checksum = "green" + +# Stacked bar chart +# plt.bar(groups, values1, label = "host alloc", color=color_host_alloc) +# plt.bar(groups, values2, bottom = values1, label = "copies + kernel", color=color_kernel) +# plt.bar(groups, values3, bottom = np.add(values1, values2), label = "checksum", color=color_checksum) + + +# Create a figure instance +# fig = plt.figure() + +# Create an axes instance +# ax = fig.add_axes([0,0,1,1]) + +# Create the boxplot +# plt.violinplot(positions=[0, 1, 2], dataset=[values1, values2, values3]) + +# pu.draw_violin_plot("black", [values1, values2, values3]) +ds = [lcpu["raw_host_alloc"], lomp["raw_host_alloc"], lsycl["raw_host_alloc"]] +pu.draw_violin_plot_pos(color_face=color_host_alloc, color_edge="black", dataset_=ds, positions_=[0, 1, 2]) + + +l1 = [x + lcpu ["med_host_alloc"] for x in lcpu ["raw_copy_and_kernel"]] +l2 = [x + lomp ["med_host_alloc"] for x in lomp ["raw_copy_and_kernel"]] +l3 = [x + lsycl["med_host_alloc"] for x in lsycl["raw_copy_and_kernel"]] +ds = [l1, l2, l3] +pu.draw_violin_plot_pos(color_face=color_kernel, color_edge="black", dataset_=ds, positions_=[0, 1, 2]) + + +l1 = [x + lcpu ["med_host_alloc"] + lcpu ["med_copy_and_kernel"] for x in lcpu ["raw_checksum"]] +l2 = [x + lomp ["med_host_alloc"] + lomp ["med_copy_and_kernel"] for x in lomp ["raw_checksum"]] +l3 = [x + lsycl["med_host_alloc"] + lsycl["med_copy_and_kernel"] for x in lsycl["raw_checksum"]] +ds = [l1, l2, l3] +pu.draw_violin_plot_pos(color_face=color_checksum, color_edge="black", dataset_=ds, positions_=[0, 1, 2]) + + + +plt.xlabel('Number of particles') +# Ascending number of particles (and imom), in millions (10^6) +#plt.ylim([-5, 100]) +plt.legend() +# global_drawn_x_variables_number+1 +# plt.xticks(range(1, 6), x_list_curve_drawn) # = x_list_shared et x_list_acc + +# plt.ylim([0, 113]) + +# plt.savefig(out_fname, format='png') #, dpi=my_dpi) + +plt.show() +print ("Hello World!") diff --git a/test/sycl_sandbox/plot/plot_2023-11-04.py b/test/sycl_sandbox/plot/plot_2023-11-04_reduce.py similarity index 89% rename from test/sycl_sandbox/plot/plot_2023-11-04.py rename to test/sycl_sandbox/plot/plot_2023-11-04_reduce.py index b8fe951a..78aad821 100644 --- a/test/sycl_sandbox/plot/plot_2023-11-04.py +++ b/test/sycl_sandbox/plot/plot_2023-11-04_reduce.py @@ -145,21 +145,22 @@ def load_file(path): is_heavy = False if is_heavy: - suffix = "_heavy_blop" - suffix_title = "transform: [](auto e) { return very_complicated_stuff; }" + # suffix = "_heavy_blop" + suffix_title = "reduce: [](auto init, auto x) { return very_complicated_stuff; }" - lcpu = load_file("heavy/cpu_context" + suffix + ".txt") - lcpu_native = load_file("heavy/cpu_native" + suffix + ".txt") - lsycl_cpu = load_file("heavy/sycl_context_cpu" + suffix + ".txt") - lsycl_gpu = load_file("heavy/sycl_context_gpu" + suffix + ".txt") + lcpu = load_file("reduce/blop_heavy_isc20_rp6_al102400000_reduce_cpu_context.txt") + lcpu_native = load_file("reduce/blop_heavy_isc20_rp6_al102400000_reduce_cpu_native.txt") + lsycl_cpu = load_file("reduce/blop_heavy_isc20_rp6_al102400000_reduce_sycl_context_cpu.txt") + lsycl_gpu = load_file("reduce/blop_heavy_isc20_rp6_al102400000_reduce_sycl_context_gpu.txt") else: - suffix = "_copy_blop" - suffix_title = "transform: [](auto e) { return e; }" + # suffix = "_copy_blop" + suffix_title = "reduce: [](auto init, auto x) { return init + x; }" + + lcpu = load_file("reduce/blop_copy_isc20_rp6_al102400000_reduce_cpu_context.txt") + lcpu_native = load_file("reduce/blop_copy_isc20_rp6_al102400000_reduce_cpu_native.txt") + lsycl_cpu = load_file("reduce/blop_copy_isc20_rp6_al102400000_reduce_sycl_context_cpu.txt") + lsycl_gpu = load_file("reduce/blop_copy_isc20_rp6_al102400000_reduce_sycl_context_gpu.txt") - lcpu = load_file("copy/cpu_context" + suffix + ".txt") - lcpu_native = load_file("copy/cpu_native" + suffix + ".txt") - lsycl_cpu = load_file("copy/sycl_context_cpu" + suffix + ".txt") - lsycl_gpu = load_file("copy/sycl_context_gpu" + suffix + ".txt") # values1 = [lcpu["med_host_alloc"], lomp["med_host_alloc"], lsycl["med_host_alloc"] ] # values2 = [lcpu["med_copy_and_kernel"], lomp["med_copy_and_kernel"], lsycl["med_copy_and_kernel"] ] @@ -169,7 +170,7 @@ def load_file(path): # for item in lcpu: # array_length_list.append(item["array_length"]) -array_length_list = pu.make_1D_list_every_1line_divided(lcpu, "array_length", 1, 1000000) +array_length_list = pu.make_1D_list_every_1line(lcpu, "array_length", 3) # Data @@ -259,7 +260,7 @@ def load_file(path): plt.xticks(range(1, len(array_length_list)+1), array_length_list) - plt.xlabel('Array length, 10^6') + plt.xlabel('Workitem count') plt.ylabel('Elapsed time (ms) (lower is better)') diff --git a/test/sycl_sandbox/plot/plot_2023-11-10_transform.py b/test/sycl_sandbox/plot/plot_2023-11-10_transform.py new file mode 100644 index 00000000..b8310dca --- /dev/null +++ b/test/sycl_sandbox/plot/plot_2023-11-10_transform.py @@ -0,0 +1,343 @@ +#!/usr/bin/python + +# Hello world python program + +# Lancer le script : python3 ./2022-02-08_sparseccl.py + +import matplotlib.pyplot as plt +import numpy as np +import statistics as stat +import sys +import math +import plot_utils as pu + +# Plots diff between Lorentz standalone and Kiwaku, omitting Covfie + + +VERSION_ATTENDUE = 2 + +computer_name = "blop" # aussi utilisé pour le nom du fichier d'entrée +str_min_iteration_count = "10" # added to plot title only + + +# ============= Gestion de la taille +my_dpi = 96 +output_image_name = "no_name" +field_type_name = "" # InterpolateNN_LayoutStride + +image_ratio = 880 / 480 + +image_width = 1280 # 1280 +image_height = 0 +image_scale_factor = image_width / 640 +line_width = image_scale_factor * 1.5 +image_height = image_width / image_ratio #(image_width / 640) * 480 + +plt.figure(figsize=(image_width/my_dpi, image_height/my_dpi) , dpi=my_dpi) +# output_image_name = "lorentz_compare_" + computer_name + "_" + output_image_name + ".png" +output_image_name = "lorentz_compare_" + computer_name + "_" + output_image_name + ".png" + +MY_SIZE = (10 * image_scale_factor) +MY_SIZE_SMALL = (6 * image_scale_factor) +TITLE_SIZE = (12 * image_scale_factor) + +#plt.rc('font', size=MY_SIZE) # controls default text sizes +plt.rc('axes', titlesize=TITLE_SIZE) # fontsize of the axes title +plt.rc('axes', labelsize=MY_SIZE) # fontsize of the x and y labels +plt.rc('xtick', labelsize=MY_SIZE) # fontsize of the tick labels +plt.rc('ytick', labelsize=MY_SIZE) # fontsize of the tick labels +plt.rc('legend', fontsize=MY_SIZE) # legend fontsize +#plt.rc('figure', titlesize=MY_SIZE) # fontsize of the figure title +# plt.rcParams.update({'font.size': MY_SIZE}) + +# fin gestion de la taille ============= + +# Ici, une courbe n'est qu'une suite toute basique de points + +# lorentz_external_v2_fname = "build/lorentz-euler_v2.txt" + +def remove_newline(list): + if (len(list) == 1) and (list[0] == "\n"): + del list[0] + # print("remove_newline newsize = " + str(len(list))) + else: + if (len(list) != 0): + list[len(list)-1] = list[len(list)-1].rstrip("\n") + return list + +def remove_empty_words(list): + if (len(list) == 0): + return list + + ri = 0 + for i in range(0, len(list)): + if (list[ri] == ''): + del list[ri] + else: + ri += 1 + + if (len(list) == 1) and (list[0] == "\n"): + del list[0] + # print("remove_newline newsize = " + str(len(list))) + else: + if (len(list) != 0): + list[len(list)-1] = list[len(list)-1].rstrip("\n") + return list + +def list_str_to_int(list, divide_by = None): + print("list_str_to_int len = " + str(len(list)) + " 1st value = "+ str(list[0]) + "values:") + print(list) + if len(list) == 0: + list.append(0) + else: + if divide_by == None: + list = [int(i) for i in list] + else: + list = [int(i)/divide_by for i in list] + return list + +# Charge le fichier de bench "path" et retourne la liste de ce qui a été lu. +def load_file(path): + global VERSION_ATTENDUE + bench_list = [] + + with open(path) as fp: + version = fp.readline() # version du fichier actuel (doit être 106 et non plus 105) + print("Version du fichier : {}".format(version)) + + if (int(version) != VERSION_ATTENDUE): + sys.exit("ERREUR, VERSION DU FICHIER NON COMPATIBLE : " + str(int(version)) + ". VERSION ATTENDUE = " + str(VERSION_ATTENDUE)) + + divide_factor = 1 + + line = fp.readline() + while line: + + res = {} + + res["array_length"] = int(line) + + res["raw_host_alloc"] = list_str_to_int(remove_empty_words(remove_newline(fp.readline().split(" "))), divide_factor) + res["raw_copy_and_kernel"] = list_str_to_int(remove_empty_words(remove_newline(fp.readline().split(" "))), divide_factor) + res["raw_checksum"] = list_str_to_int(remove_empty_words(remove_newline(fp.readline().split(" "))), divide_factor) + + res["no-outier_host_alloc"] = pu.filter_outliers(res["raw_host_alloc"] ) + res["no-outier_copy_and_kernel"] = pu.filter_outliers(res["raw_copy_and_kernel"]) + res["no-outier_checksum"] = pu.filter_outliers(res["raw_checksum"] ) + + res["med_host_alloc"] = stat.median(res["raw_host_alloc"] ) + res["med_copy_and_kernel"] = stat.median(res["raw_copy_and_kernel"]) + res["med_checksum"] = stat.median(res["raw_checksum"] ) + + bench_list.append(res) + + line = fp.readline() + + return bench_list + +# dpcpp kwk_axpy_bench.cpp -o e.exe -O3 -std=c++20 -I/home/data_sync/academique/These/kiwaku_2023/kiwaku/include/ && ./e.exe +# python3 plot_distribution.py + +# suffix = "_copy" + +# suffix = "_heavy_sh$t_blop_cuda" + +is_heavy = True + +# if is_heavy: +# suffix = "_heavy_blop" +# suffix_title = "transform: [](auto e) { return very_complicated_stuff; }" + +# l_context_cpu = load_file("heavy/cpu_context" + suffix + ".txt") +# l_native_cpu = load_file("heavy/cpu_native" + suffix + ".txt") +# l_context_sycl_cpu = load_file("heavy/sycl_context_cpu" + suffix + ".txt") +# l_context_sycl_gpu = load_file("heavy/sycl_context_gpu" + suffix + ".txt") +# else: +# suffix = "_copy_blop" +# suffix_title = "transform: [](auto e) { return e; }" + +# l_context_cpu = load_file("copy/cpu_context" + suffix + ".txt") +# l_native_cpu = load_file("copy/cpu_native" + suffix + ".txt") +# l_context_sycl_cpu = load_file("copy/sycl_context_cpu" + suffix + ".txt") +# l_context_sycl_gpu = load_file("copy/sycl_context_gpu" + suffix + ".txt") + +if is_heavy: + suffix_title = "LEGEND - transform: [](auto e) { return very_complicated_stuff; }" + + # l_context_cpu = load_file("transform/transform_parsys-legend_heavy_isc9_rp7_cpu_context.txt") + # l_native_cpu = load_file("transform/transform_parsys-legend_heavy_isc9_rp7_cpu_native.txt") + # l_context_sycl_cpu = load_file("transform/transform_parsys-legend_heavy_isc9_rp7_sycl_cpu.txt") + + # l_native_cpu = load_file("transform/transform_parsys-legend_heavy_isc6_rp7_cpu_native.txt") + l_context_cpu = load_file("transform/transform_parsys-legend_heavy_isc6_rp7_cpu_context.txt") + l_context_sycl_cpu = load_file("transform/transform_parsys-legend_heavy_isc6_rp7_sycl_cpu.txt") + l_context_sycl_gpu = load_file("transform/transform4_parsys-legend_heavy_isc6_rp7_sycl_gpu.txt") + # transform4_parsys-legend_heavy_isc6_rp7_sycl_gpu + + # l_native_cpu = load_file("full_without_nd_range/transform3_parsys-legend_heavy_isc6_rp7_cpu_native.txt") + # l_context_cpu = load_file("full_without_nd_range/transform3_parsys-legend_heavy_isc6_rp7_cpu_context.txt") + # l_context_sycl_cpu = load_file("full_without_nd_range/transform3_parsys-legend_heavy_isc6_rp7_sycl_cpu.txt") + # l_context_sycl_gpu = load_file("full_without_nd_range/transform3_parsys-legend_heavy_isc6_rp7_sycl_gpu.txt") + + # l_native_cpu = load_file("full_with_nd_range/transform3_parsys-legend_heavy_isc6_rp7_cpu_native.txt") + # l_context_cpu = load_file("full_with_nd_range/transform3_parsys-legend_heavy_isc1_rp3_ndr0_cpu_context_ndr.txt") + # l_context_sycl_cpu = load_file("full_with_nd_range/transform3_parsys-legend_heavy_isc1_rp3_ndr0_sycl_cpu_ndr.txt") + # l_context_sycl_gpu = load_file("full_with_nd_range/transform3_parsys-legend_heavy_isc1_rp3_ndr0_sycl_gpu_ndr.txt") + + + # l_context_cpu = load_file("transform_ndr1024/transform3_parsys-legend_heavy_isc4_rp5_cpu_context.txt") + # l_context_sycl_cpu = load_file("transform_ndr1024/transform3_parsys-legend_heavy_isc4_rp5_sycl_cpu.txt") + # l_context_sycl_gpu = load_file("transform_ndr1024/transform3_parsys-legend_heavy_isc4_rp5_sycl_gpu.txt") +else: + suffix_title = "LEGEND - transform: [](auto e) { return e; }" + + l_native_cpu = load_file("transform/transform_parsys-legend_copy_isc6_rp7_cpu_native.txt") + l_context_cpu = load_file("transform/transform_parsys-legend_copy_isc6_rp7_cpu_context.txt") + l_context_sycl_cpu = load_file("transform/transform_parsys-legend_copy_isc6_rp7_sycl_cpu.txt") + l_context_sycl_gpu = load_file("transform/transform_parsys-legend_copy_isc6_rp7_sycl_gpu.txt") + +# array_length_list = pu.make_1D_list_every_1line(l_context_cpu, "array_length", 1) +array_length_list = pu.make_1D_list_every_1line_divided(l_context_cpu, "array_length", 1, 1000000) + +color_host_alloc = "blue" +color_kernel = "red" +color_checksum = "green" + +display_speedup = False + +min_value = 0 +max_value = 0 + +cpu_name = "AMD Ryzen 9 5950X" +gpu_name = "NVIDIA GeForce RTX 2080 Ti" + +# cpu_name = "i3-4360 CPU @ 3.70GHz" +# gpu_name = "NVIDIA GeForce GTX 1050" + +display_context_sycl_cpu = True +display_context_sycl_gpu = True +display_context_cpu = True +display_native_cpu = False + +def update_min_max(condition, list, min_value, max_value, initialized): + if condition: + if initialized: + min_value = min(min_value, min(list)) + max_value = max(max_value, max(list)) + else: + min_value = min(list) + max_value = max(list) + initialized = True + return (min_value, max_value, initialized) + + +if display_speedup: + start_index = 0 + + array_length_list_update = [] + for i in range(start_index, len(array_length_list)): + array_length_list_update.append(array_length_list[i]) + + # relative_list = l_native_cpu + # relative_name = "native cpu - " + cpu_name + + # relative_list = l_context_cpu + # relative_name = "cpu context - " + cpu_name + + relative_list = l_context_sycl_gpu + relative_name = "context::sycl - " + gpu_name + + # relative_to = "context_sycl_cpu" # context_sycl_cpu context_sycl_gpu context_cpu native_cpu + # relative_list = l_context_sycl_gpu + + # cpu_diff = pu.make_absolute_list(l_context_cpu , "med_copy_and_kernel") + # sycl_diff = pu.make_absolute_list(lsycl, "med_copy_and_kernel") + sycl_cpu = pu.make_div_list(l_context_sycl_cpu , relative_list, start_index, 100, "med_copy_and_kernel") + sycl_gpu = pu.make_div_list(l_context_sycl_gpu , relative_list, start_index, 100, "med_copy_and_kernel") + ctx_cpu = pu.make_div_list(l_context_cpu , relative_list, start_index, 100, "med_copy_and_kernel") + + ref_list = [] + for i in relative_list: + ref_list.append(100) + + # min_value = min(100, min(sycl_cpu))#, min(sycl_gpu), min(ctx_cpu)) + # max_value = max(100, max(sycl_cpu))#, max(sycl_gpu), max(ctx_cpu)) + + (min_value, max_value, initialized) = (False, 0, 0) + (min_value, max_value, initialized) = update_min_max(display_context_sycl_cpu, sycl_cpu, min_value, max_value, initialized) + (min_value, max_value, initialized) = update_min_max(display_context_sycl_gpu, sycl_gpu, min_value, max_value, initialized) + (min_value, max_value, initialized) = update_min_max(display_context_cpu, ctx_cpu, min_value, max_value, initialized) + min_value = min(100, min_value) + max_value = max(100, max_value) + + + # plt.plot (range(1, len(ref_list)+1), ref_list, color="green", label=relative_name, linestyle="solid", linewidth=line_width) + plt.plot (range(1, len(ref_list)+1), ref_list, color="grey", label=relative_name, linestyle="solid", linewidth=line_width) + # plt.plot (range(1, len(ctx_cpu)+1), ctx_cpu , color="black", label="context::cpu - " + cpu_name, linestyle="dotted", linewidth=line_width) + plt.plot (range(1, len(sycl_cpu)+1), sycl_cpu, color="blue", label="context::sycl - " + cpu_name, linestyle="dashdot", linewidth=line_width) + plt.plot (range(1, len(sycl_gpu)+1), sycl_gpu, color="green", label="context::sycl - " + gpu_name, linestyle="dashed", linewidth=line_width) + + plt.title("Algo " + suffix_title) + + plt.xticks(range(1, len(array_length_list_update)+1), array_length_list_update) + + plt.xlabel('Array length, 10^6') + plt.ylabel('Relative duration (%)') +else: + + # plot_diff = False + # if plot_diff: + # ldiff0 = pu.make_diff_list(compare_to_list, lorentz_standalone_list, plot_keyword) + # else: + cpu_list = pu.make_absolute_list(l_context_cpu , "med_copy_and_kernel") + sycl_cpu = pu.make_absolute_list(l_context_sycl_cpu , "med_copy_and_kernel") + sycl_gpu = pu.make_absolute_list(l_context_sycl_gpu , "med_copy_and_kernel") + # native_cpu = pu.make_absolute_list(l_native_cpu , "med_copy_and_kernel") + + (min_value, max_value, initialized) = (False, 0, 0) + (min_value, max_value, initialized) = update_min_max(display_context_sycl_cpu , sycl_cpu , min_value, max_value, initialized) + print(min_value) + print(max_value) + print(initialized) + (min_value, max_value, initialized) = update_min_max(display_context_sycl_gpu , sycl_gpu , min_value, max_value, initialized) + (min_value, max_value, initialized) = update_min_max(display_context_cpu , cpu_list , min_value, max_value, initialized) + # (min_value, max_value, initialized) = update_min_max(display_native_cpu , native_cpu, min_value, max_value, initialized) + + # min_value = min(min(cpu_list), min(sycl_cpu), min(sycl_gpu), min(native_cpu)) + # max_value = max(max(cpu_list), max(sycl_cpu), max(sycl_gpu), max(native_cpu)) + + # if display_native_cpu: + # plt.plot (range(1, len(native_cpu)+1), native_cpu, color="grey", label="native cpu - " + cpu_name , linestyle="solid" , linewidth=line_width) + if display_context_cpu: + plt.plot (range(1, len(cpu_list)+1), cpu_list, color="grey", label="context::cpu - " + cpu_name , linestyle="solid" , linewidth=line_width) + if display_context_sycl_cpu: + plt.plot (range(1, len(sycl_cpu)+1), sycl_cpu, color="blue", label="context::sycl - " + cpu_name, linestyle="dashdot", linewidth=line_width) + if display_context_sycl_gpu: + plt.plot (range(1, len(sycl_gpu)+1), sycl_gpu, color="green", label="context::sycl - " + gpu_name, linestyle="dashed", linewidth=line_width) + + plt.title(suffix_title) + + plt.xticks(range(1, len(array_length_list)+1), array_length_list) + + plt.xlabel('Array length, 10^6') + plt.ylabel('Elapsed time (ms)') + + +# Ascending number of particles (and imom), in millions (10^6) +#plt.ylim([-5, 100]) +plt.legend() + +plt.rcParams['grid.linestyle'] = "-" +plt.rcParams['grid.alpha'] = 0.15 +plt.rcParams['grid.color'] = "black" ##cccccc +plt.grid(linewidth=line_width/10) + +# global_drawn_x_variables_number+1 +# plt.xticks(range(1, 6), x_list_curve_drawn) # = x_list_shared et x_list_acc + +plt.ylim([- (max_value - min_value) * 0.1, max_value * 1.1]) + +# plt.savefig(out_fname, format='png') #, dpi=my_dpi) + +plt.show() +print ("Hello World!") diff --git a/test/sycl_sandbox/plot/plot_2023-11-11_transform.py b/test/sycl_sandbox/plot/plot_2023-11-11_transform.py new file mode 100644 index 00000000..d74c2dc6 --- /dev/null +++ b/test/sycl_sandbox/plot/plot_2023-11-11_transform.py @@ -0,0 +1,345 @@ +#!/usr/bin/python + +# Hello world python program + +# Lancer le script : python3 ./2022-02-08_sparseccl.py + +import matplotlib.pyplot as plt +import numpy as np +import statistics as stat +import sys +import math +import plot_utils as pu + +# Plots diff between Lorentz standalone and Kiwaku, omitting Covfie + + +VERSION_ATTENDUE = 2 + +computer_name = "blop" # aussi utilisé pour le nom du fichier d'entrée +str_min_iteration_count = "10" # added to plot title only + + +# ============= Gestion de la taille +my_dpi = 96 +output_image_name = "no_name" +field_type_name = "" # InterpolateNN_LayoutStride + +# image_ratio = 640 / 480 +image_ratio = 880 / 480 + +image_width = 1280 # 1280 +image_height = 0 +image_scale_factor = image_width / 640 +line_width = image_scale_factor * 1.5 +image_height = image_width / image_ratio #(image_width / 640) * 480 + +plt.figure(figsize=(image_width/my_dpi, image_height/my_dpi) , dpi=my_dpi) +# output_image_name = "lorentz_compare_" + computer_name + "_" + output_image_name + ".png" +output_image_name = "lorentz_compare_" + computer_name + "_" + output_image_name + ".png" + +MY_SIZE = (10 * image_scale_factor) +MY_SIZE_SMALL = (6 * image_scale_factor) +TITLE_SIZE = (12 * image_scale_factor) + +#plt.rc('font', size=MY_SIZE) # controls default text sizes +plt.rc('axes', titlesize=TITLE_SIZE) # fontsize of the axes title +plt.rc('axes', labelsize=MY_SIZE) # fontsize of the x and y labels +plt.rc('xtick', labelsize=MY_SIZE) # fontsize of the tick labels +plt.rc('ytick', labelsize=MY_SIZE) # fontsize of the tick labels +plt.rc('legend', fontsize=MY_SIZE) # legend fontsize +#plt.rc('figure', titlesize=MY_SIZE) # fontsize of the figure title +# plt.rcParams.update({'font.size': MY_SIZE}) + +# fin gestion de la taille ============= + +# Ici, une courbe n'est qu'une suite toute basique de points + +# lorentz_external_v2_fname = "build/lorentz-euler_v2.txt" + +def remove_newline(list): + if (len(list) == 1) and (list[0] == "\n"): + del list[0] + # print("remove_newline newsize = " + str(len(list))) + else: + if (len(list) != 0): + list[len(list)-1] = list[len(list)-1].rstrip("\n") + return list + +def remove_empty_words(list): + if (len(list) == 0): + return list + + ri = 0 + for i in range(0, len(list)): + if (list[ri] == ''): + del list[ri] + else: + ri += 1 + + if (len(list) == 1) and (list[0] == "\n"): + del list[0] + # print("remove_newline newsize = " + str(len(list))) + else: + if (len(list) != 0): + list[len(list)-1] = list[len(list)-1].rstrip("\n") + return list + +def list_str_to_int(list, divide_by = None): + print("list_str_to_int len = " + str(len(list)) + " 1st value = "+ str(list[0]) + "values:") + print(list) + if len(list) == 0: + list.append(0) + else: + if divide_by == None: + list = [int(i) for i in list] + else: + list = [int(i)/divide_by for i in list] + return list + +# Charge le fichier de bench "path" et retourne la liste de ce qui a été lu. +def load_file(path): + global VERSION_ATTENDUE + bench_list = [] + + with open(path) as fp: + version = fp.readline() # version du fichier actuel (doit être 106 et non plus 105) + print("Version du fichier : {}".format(version)) + + if (int(version) != VERSION_ATTENDUE): + sys.exit("ERREUR, VERSION DU FICHIER NON COMPATIBLE : " + str(int(version)) + ". VERSION ATTENDUE = " + str(VERSION_ATTENDUE)) + + divide_factor = 1 + + line = fp.readline() + while line: + + res = {} + + res["array_length"] = int(line) + + res["raw_host_alloc"] = list_str_to_int(remove_empty_words(remove_newline(fp.readline().split(" "))), divide_factor) + res["raw_copy_and_kernel"] = list_str_to_int(remove_empty_words(remove_newline(fp.readline().split(" "))), divide_factor) + res["raw_checksum"] = list_str_to_int(remove_empty_words(remove_newline(fp.readline().split(" "))), divide_factor) + + res["no-outier_host_alloc"] = pu.filter_outliers(res["raw_host_alloc"] ) + res["no-outier_copy_and_kernel"] = pu.filter_outliers(res["raw_copy_and_kernel"]) + res["no-outier_checksum"] = pu.filter_outliers(res["raw_checksum"] ) + + res["med_host_alloc"] = stat.median(res["raw_host_alloc"] ) + res["med_copy_and_kernel"] = stat.median(res["raw_copy_and_kernel"]) + res["med_checksum"] = stat.median(res["raw_checksum"] ) + + bench_list.append(res) + + line = fp.readline() + + return bench_list + +# dpcpp kwk_axpy_bench.cpp -o e.exe -O3 -std=c++20 -I/home/data_sync/academique/These/kiwaku_2023/kiwaku/include/ && ./e.exe +# python3 plot_distribution.py + +# suffix = "_copy" + +# suffix = "_heavy_sh$t_blop_cuda" + +is_heavy = True + +# if is_heavy: +# suffix = "_heavy_blop" +# suffix_title = "transform: [](auto e) { return very_complicated_stuff; }" + +# l_context_cpu = load_file("heavy/cpu_context" + suffix + ".txt") +# l_native_cpu = load_file("heavy/cpu_native" + suffix + ".txt") +# l_context_sycl_cpu = load_file("heavy/sycl_context_cpu" + suffix + ".txt") +# l_context_sycl_gpu = load_file("heavy/sycl_context_gpu" + suffix + ".txt") +# else: +# suffix = "_copy_blop" +# suffix_title = "transform: [](auto e) { return e; }" + +# l_context_cpu = load_file("copy/cpu_context" + suffix + ".txt") +# l_native_cpu = load_file("copy/cpu_native" + suffix + ".txt") +# l_context_sycl_cpu = load_file("copy/sycl_context_cpu" + suffix + ".txt") +# l_context_sycl_gpu = load_file("copy/sycl_context_gpu" + suffix + ".txt") + +if is_heavy: + # suffix_title = "LEGEND - transform: [](auto e) { return very_complicated_stuff; }" + + # l_context_cpu = load_file("transform/transform_parsys-legend_heavy_isc9_rp7_cpu_context.txt") + # l_native_cpu = load_file("transform/transform_parsys-legend_heavy_isc9_rp7_cpu_native.txt") + # l_context_sycl_cpu = load_file("transform/transform_parsys-legend_heavy_isc9_rp7_sycl_cpu.txt") + + # l_native_cpu = load_file("transform/transform_parsys-legend_heavy_isc6_rp7_cpu_native.txt") + l_context_cpu = load_file("transform/transform_parsys-legend_heavy_isc6_rp7_cpu_context.txt") + l_context_sycl_cpu = load_file("transform/transform_parsys-legend_heavy_isc6_rp7_sycl_cpu.txt") + l_context_sycl_gpu = load_file("transform/transform4_parsys-legend_heavy_isc6_rp7_sycl_gpu.txt") + # transform4_parsys-legend_heavy_isc6_rp7_sycl_gpu + + # l_native_cpu = load_file("full_without_nd_range/transform3_parsys-legend_heavy_isc6_rp7_cpu_native.txt") + # l_context_cpu = load_file("full_without_nd_range/transform3_parsys-legend_heavy_isc6_rp7_cpu_context.txt") + # l_context_sycl_cpu = load_file("full_without_nd_range/transform3_parsys-legend_heavy_isc6_rp7_sycl_cpu.txt") + # l_context_sycl_gpu = load_file("full_without_nd_range/transform3_parsys-legend_heavy_isc6_rp7_sycl_gpu.txt") + + # l_native_cpu = load_file("full_with_nd_range/transform3_parsys-legend_heavy_isc6_rp7_cpu_native.txt") + # l_context_cpu = load_file("full_with_nd_range/transform3_parsys-legend_heavy_isc1_rp3_ndr0_cpu_context_ndr.txt") + # l_context_sycl_cpu = load_file("full_with_nd_range/transform3_parsys-legend_heavy_isc1_rp3_ndr0_sycl_cpu_ndr.txt") + # l_context_sycl_gpu = load_file("full_with_nd_range/transform3_parsys-legend_heavy_isc1_rp3_ndr0_sycl_gpu_ndr.txt") + + + # l_context_cpu = load_file("transform_ndr1024/transform3_parsys-legend_heavy_isc4_rp5_cpu_context.txt") + # l_context_sycl_cpu = load_file("transform_ndr1024/transform3_parsys-legend_heavy_isc4_rp5_sycl_cpu.txt") + # l_context_sycl_gpu = load_file("transform_ndr1024/transform3_parsys-legend_heavy_isc4_rp5_sycl_gpu.txt") +else: + suffix_title = "LEGEND - transform: [](auto e) { return e; }" + + l_native_cpu = load_file("transform/transform_parsys-legend_copy_isc6_rp7_cpu_native.txt") + l_context_cpu = load_file("transform/transform_parsys-legend_copy_isc6_rp7_cpu_context.txt") + l_context_sycl_cpu = load_file("transform/transform_parsys-legend_copy_isc6_rp7_sycl_cpu.txt") + l_context_sycl_gpu = load_file("transform/transform_parsys-legend_copy_isc6_rp7_sycl_gpu.txt") + +# array_length_list = pu.make_1D_list_every_1line(l_context_cpu, "array_length", 1) +array_length_list = pu.make_1D_list_every_1line_divided(l_context_cpu, "array_length", 1, 1000000) + +color_host_alloc = "blue" +color_kernel = "red" +color_checksum = "green" + +display_speedup = False + +min_value = 0 +max_value = 0 + +cpu_name = "AMD Ryzen 9 5950X" +gpu_name = "NVIDIA GeForce RTX 2080 Ti" +suffix_title = cpu_name + +# cpu_name = "i3-4360 CPU @ 3.70GHz" +# gpu_name = "NVIDIA GeForce GTX 1050" + +display_context_sycl_cpu = True +display_context_sycl_gpu = False +display_context_cpu = True +display_native_cpu = False + +def update_min_max(condition, list, min_value, max_value, initialized): + if condition: + if initialized: + min_value = min(min_value, min(list)) + max_value = max(max_value, max(list)) + else: + min_value = min(list) + max_value = max(list) + initialized = True + return (min_value, max_value, initialized) + + +if display_speedup: + start_index = 0 + + array_length_list_update = [] + for i in range(start_index, len(array_length_list)): + array_length_list_update.append(array_length_list[i]) + + # relative_list = l_native_cpu + # relative_name = "native cpu - " + cpu_name + + # relative_list = l_context_cpu + # relative_name = "cpu context - " + cpu_name + + relative_list = l_context_sycl_gpu + relative_name = "context::sycl - " + gpu_name + + # relative_to = "context_sycl_cpu" # context_sycl_cpu context_sycl_gpu context_cpu native_cpu + # relative_list = l_context_sycl_gpu + + # cpu_diff = pu.make_absolute_list(l_context_cpu , "med_copy_and_kernel") + # sycl_diff = pu.make_absolute_list(lsycl, "med_copy_and_kernel") + sycl_cpu = pu.make_div_list(l_context_sycl_cpu , relative_list, start_index, 100, "med_copy_and_kernel") + sycl_gpu = pu.make_div_list(l_context_sycl_gpu , relative_list, start_index, 100, "med_copy_and_kernel") + ctx_cpu = pu.make_div_list(l_context_cpu , relative_list, start_index, 100, "med_copy_and_kernel") + + ref_list = [] + for i in relative_list: + ref_list.append(100) + + # min_value = min(100, min(sycl_cpu))#, min(sycl_gpu), min(ctx_cpu)) + # max_value = max(100, max(sycl_cpu))#, max(sycl_gpu), max(ctx_cpu)) + + (min_value, max_value, initialized) = (False, 0, 0) + (min_value, max_value, initialized) = update_min_max(display_context_sycl_cpu, sycl_cpu, min_value, max_value, initialized) + (min_value, max_value, initialized) = update_min_max(display_context_sycl_gpu, sycl_gpu, min_value, max_value, initialized) + (min_value, max_value, initialized) = update_min_max(display_context_cpu, ctx_cpu, min_value, max_value, initialized) + min_value = min(100, min_value) + max_value = max(100, max_value) + + + # plt.plot (range(1, len(ref_list)+1), ref_list, color="green", label=relative_name, linestyle="solid", linewidth=line_width) + plt.plot (range(1, len(ref_list)+1), ref_list, color="grey", label=relative_name, linestyle="solid", linewidth=line_width) + # plt.plot (range(1, len(ctx_cpu)+1), ctx_cpu , color="black", label="context::cpu - " + cpu_name, linestyle="dotted", linewidth=line_width) + plt.plot (range(1, len(sycl_cpu)+1), sycl_cpu, color="blue", label="context::sycl - " + cpu_name, linestyle="dashdot", linewidth=line_width) + plt.plot (range(1, len(sycl_gpu)+1), sycl_gpu, color="green", label="context::sycl - " + gpu_name, linestyle="dashed", linewidth=line_width) + + plt.title("Algo " + suffix_title) + + plt.xticks(range(1, len(array_length_list_update)+1), array_length_list_update) + + plt.xlabel('Array length, 10^6') + plt.ylabel('Relative duration (%)') +else: + + # plot_diff = False + # if plot_diff: + # ldiff0 = pu.make_diff_list(compare_to_list, lorentz_standalone_list, plot_keyword) + # else: + cpu_list = pu.make_absolute_list_divided(l_context_cpu , "med_copy_and_kernel", 1000) + sycl_cpu = pu.make_absolute_list_divided(l_context_sycl_cpu , "med_copy_and_kernel", 1000) + sycl_gpu = pu.make_absolute_list_divided(l_context_sycl_gpu , "med_copy_and_kernel", 1000) + # native_cpu = pu.make_absolute_list(l_native_cpu , "med_copy_and_kernel") + + (min_value, max_value, initialized) = (False, 0, 0) + (min_value, max_value, initialized) = update_min_max(display_context_sycl_cpu , sycl_cpu , min_value, max_value, initialized) + print(min_value) + print(max_value) + print(initialized) + (min_value, max_value, initialized) = update_min_max(display_context_sycl_gpu , sycl_gpu , min_value, max_value, initialized) + (min_value, max_value, initialized) = update_min_max(display_context_cpu , cpu_list , min_value, max_value, initialized) + # (min_value, max_value, initialized) = update_min_max(display_native_cpu , native_cpu, min_value, max_value, initialized) + + # min_value = min(min(cpu_list), min(sycl_cpu), min(sycl_gpu), min(native_cpu)) + # max_value = max(max(cpu_list), max(sycl_cpu), max(sycl_gpu), max(native_cpu)) + + # if display_native_cpu: + # plt.plot (range(1, len(native_cpu)+1), native_cpu, color="grey", label="native cpu - " + cpu_name , linestyle="solid" , linewidth=line_width) + if display_context_cpu: + plt.plot (range(1, len(cpu_list)+1), cpu_list, color="grey", label="kwk::context::cpu" , linestyle="solid" , linewidth=line_width) + if display_context_sycl_cpu: + plt.plot (range(1, len(sycl_cpu)+1), sycl_cpu, color="blue", label="kwk::context::sycl", linestyle="dashdot", linewidth=line_width) + if display_context_sycl_gpu: + plt.plot (range(1, len(sycl_gpu)+1), sycl_gpu, color="green", label="kwk::context::sycl", linestyle="dashed", linewidth=line_width) + + plt.title(suffix_title) + + plt.xticks(range(1, len(array_length_list)+1), array_length_list) + + plt.xlabel('Array length, 10^6') + plt.ylabel('Elapsed time (s)') + + +# Ascending number of particles (and imom), in millions (10^6) +#plt.ylim([-5, 100]) +plt.legend() + +plt.rcParams['grid.linestyle'] = "-" +plt.rcParams['grid.alpha'] = 0.15 +plt.rcParams['grid.color'] = "black" ##cccccc +plt.grid(linewidth=line_width/10) + +# global_drawn_x_variables_number+1 +# plt.xticks(range(1, 6), x_list_curve_drawn) # = x_list_shared et x_list_acc + +plt.ylim([- (max_value - min_value) * 0.1, max_value * 1.1]) + +# plt.savefig(out_fname, format='png') #, dpi=my_dpi) + +plt.show() +print ("Hello World!") diff --git a/test/sycl_sandbox/plot/plot_utils.py b/test/sycl_sandbox/plot/plot_utils.py index 617ad724..9eb737ce 100644 --- a/test/sycl_sandbox/plot/plot_utils.py +++ b/test/sycl_sandbox/plot/plot_utils.py @@ -114,6 +114,14 @@ def make_absolute_list(l1, keyword = None): res.append(l1[i][keyword]) return res +def make_absolute_list_divided(l1, keyword, divide_by): + if keyword == None: + keyword = "elapsed_time" + res = [] + for i in range(0, len(l1)): + res.append(float(l1[i][keyword]) / divide_by) + return res + # def make_absolute_list(l1): # return make_absolute_list(l1, "elapsed_time")