forked from travisdowns/uarch-bench
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathresource-stalls.cpp
115 lines (82 loc) · 3.6 KB
/
resource-stalls.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
/*
* default_benches.cpp
*
* Various "default" benchmarks.
*/
#include "benchmark.hpp"
#include "boost/preprocessor/repetition/repeat_from_to.hpp"
#include "hedley.h"
#include "util.hpp"
extern "C" {
bench2_f rs_dep_add;
bench2_f rs_dep_add4;
bench2_f rs_dep_imul;
bench2_f rs_split_stores;
bench2_f rs_dep_fsqrt;
#define MAX_RATIO 10
#define DECL_MANY(fname, op) BOOST_PP_REPEAT_FROM_TO(0, MAX_RATIO, DECL_BENCH2, fname ## op)
DECL_MANY(rs_fsqrt_,nop)
DECL_MANY(rs_fsqrt_,add)
DECL_MANY(rs_fsqrt_,xorzero)
DECL_MANY(rs_fsqrt_,load)
DECL_MANY(rs_fsqrt_,store)
DECL_MANY(rs_fsqrt_,paddb)
DECL_MANY(rs_fsqrt_,vpaddb)
DECL_MANY(rs_fsqrt_,add_padd)
DECL_MANY(rs_fsqrt_,load_dep)
DECL_MANY(rs_load_,nop)
DECL_MANY(rs_load_,add)
BOOST_PP_REPEAT_FROM_TO(0, 120, DECL_BENCH2, rs_loadchain)
BOOST_PP_REPEAT_FROM_TO(0, 80, DECL_BENCH2, rs_storebuf)
}
struct thunk_args {
bench2_f* underlying;
};
long indirect_thunk(uint64_t iters, void *arg) {
auto thunk_arg = *static_cast<thunk_args *>(arg);
return thunk_arg.underlying(iters, nullptr);
}
template <typename M>
HEDLEY_NEVER_INLINE
void makei(bench2_f* fn, M& maker, const char* name, const char* desc, uint32_t ops_per_loop) {
thunk_args args{fn};
arg_provider_t ap = [=]{ return (void *)&args; };
maker.template make<indirect_thunk>(name, desc, ops_per_loop, ap);
}
template <typename TIMER>
void register_rstalls(GroupList& list) {
#if !UARCH_BENCH_PORTABLE
std::shared_ptr<BenchmarkGroup> group = std::make_shared<BenchmarkGroup>("studies/resource-stalls", "Test RESOURCE_STALLS events");
list.push_back(group);
auto maker = DeltaMaker<TIMER>(group.get(), 1000).setTags({"slow"});
makei(rs_dep_add , maker, "dep-add" , "Dependent adds (RS limit)", 128);
makei(rs_dep_add4 , maker, "dep-add4", "Inependent adds (? limit)", 128);
makei(rs_dep_imul , maker, "dep-imul", "Dependent imuls (RS limit)", 128);
makei(rs_split_stores, maker, "split-store", "Split stores (SB limit)", 128);
makei(rs_dep_fsqrt , maker, "fsqrt", "Dependent fqrt (RS limit)", 128);
// a macro to call maker.make on test that mix fsqrt and antoher op in a variery of ratios
#define MAKE_FSQRT_OP(z, n, op) makei(rs_fsqrt_ ## op ## n, maker, "fsqrt-" #op "-" #n, \
"fsqrt:" #op " in 1:" #n " ratio", 32);
BOOST_PP_REPEAT_FROM_TO(0, MAX_RATIO, MAKE_FSQRT_OP, nop)
BOOST_PP_REPEAT_FROM_TO(0, MAX_RATIO, MAKE_FSQRT_OP, add)
BOOST_PP_REPEAT_FROM_TO(0, MAX_RATIO, MAKE_FSQRT_OP, xorzero)
BOOST_PP_REPEAT_FROM_TO(0, MAX_RATIO, MAKE_FSQRT_OP, load)
BOOST_PP_REPEAT_FROM_TO(0, MAX_RATIO, MAKE_FSQRT_OP, store)
BOOST_PP_REPEAT_FROM_TO(0, MAX_RATIO, MAKE_FSQRT_OP, paddb)
BOOST_PP_REPEAT_FROM_TO(0, MAX_RATIO, MAKE_FSQRT_OP, vpaddb)
BOOST_PP_REPEAT_FROM_TO(0, MAX_RATIO, MAKE_FSQRT_OP, add_padd)
BOOST_PP_REPEAT_FROM_TO(0, MAX_RATIO, MAKE_FSQRT_OP, load_dep)
#define MAKE_LOAD_OP(z, n, op) makei(rs_load_ ## op ## n, maker, "load-" #op "-" #n, \
"load:" #op " in 1:" #n " ratio", 32);
BOOST_PP_REPEAT_FROM_TO(0, MAX_RATIO, MAKE_LOAD_OP, nop)
BOOST_PP_REPEAT_FROM_TO(0, MAX_RATIO, MAKE_LOAD_OP, add)
#define MAKE_LOADCHAIN(z, n, _) makei(rs_loadchain ## n, maker, "loadchain-" #n, \
"loadchain: " #n " loads", 32);
BOOST_PP_REPEAT_FROM_TO(0, 120, MAKE_LOADCHAIN, _)
#define MAKE_STOREBUF(z, n, _) makei(rs_storebuf ## n, maker, "storebuf-" #n, \
"storebuf: " #n " stores", 32);
BOOST_PP_REPEAT_FROM_TO(0, 80, MAKE_STOREBUF, _)
#endif // #if !UARCH_BENCH_PORTABLE
}
#define REG_THIS(CLOCK) template void register_rstalls<CLOCK>(GroupList& list);
ALL_TIMERS_X(REG_THIS)