-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmhz.c
271 lines (237 loc) · 6.71 KB
/
mhz.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
#include <sys/time.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
static int cpu_only;
static int tsc_only;
static int use_ints;
/* returns current time in microseconds */
static inline unsigned long long microseconds(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec * 1000000ULL + tv.tv_usec;
}
#if defined(__i386__) || defined(__x86_64__)
#define HAVE_RDTSC 1
#endif
#ifdef HAVE_RDTSC
static inline unsigned long long rdtsc(void)
{
unsigned int a, d;
asm volatile("rdtsc" : "=a" (a), "=d" (d));
return a + ((unsigned long long)d << 32);
}
#else
#define rdtsc() 0
#endif
/* performs read-after-write operations that the CPU is not supposed to be able
* to parallelize. The "asm" statements are here to prevent the compiler from
* reordering this code.
*/
#define dont_move(var) do { asm volatile("" : "=r"(var) : "0" (var)); } while (0)
#define run1cycle_ae() do { a ^= e; dont_move(a); } while (0)
#define run1cycle_ba() do { b ^= a; dont_move(b); } while (0)
#define run1cycle_cb() do { c ^= b; dont_move(c); } while (0)
#define run1cycle_dc() do { d ^= c; dont_move(d); } while (0)
#define run1cycle_ed() do { e ^= d; dont_move(e); } while (0)
#define run1cycle_eb() do { e ^= b; dont_move(e); } while (0)
#define run5cycles() \
do { \
run1cycle_ae(); \
run1cycle_ba(); \
run1cycle_cb(); \
run1cycle_dc(); \
run1cycle_ed(); \
} while (0)
#define run10cycles() \
do { \
run5cycles(); \
run5cycles(); \
} while (0)
#define run100cycles() \
do { \
run10cycles(); \
run10cycles(); \
run10cycles(); \
run10cycles(); \
run10cycles(); \
run10cycles(); \
run10cycles(); \
run10cycles(); \
run10cycles(); \
run10cycles(); \
} while (0)
/* performs 50 operations in a loop, all dependant on each other, so that the
* CPU cannot parallelize them, hoping to take 50 cycles per loop, plus the
* loop counter overhead.
*/
static __attribute__((noinline,aligned(64))) void loop50(unsigned int n)
{
unsigned int a = 0, b = 0, c = 0, d = 0, e = 0;
do {
run10cycles();
run10cycles();
run10cycles();
run10cycles();
run10cycles();
} while (__builtin_expect(--n, 1));
}
/* performs 250 operations in a loop, all dependant on each other, so that the
* CPU cannot parallelize them, hoping to take 250 cycles per loop, plus the
* loop counter overhead. Do not increase this loop so that it fits in a small
* 1 kB L1 cache on 32-bit instruction sets.
*/
static __attribute__((noinline,aligned(64))) void loop250(unsigned int n)
{
unsigned int a = 0, b = 0, c = 0, d = 0, e = 0;
do {
run10cycles();
run10cycles();
run10cycles();
run10cycles();
run10cycles();
run100cycles();
run100cycles();
} while (__builtin_expect(--n, 1));
}
void run_once(long count)
{
long long tsc_begin;
long long tsc_duration50 __attribute__((unused));
long long tsc_duration250 __attribute__((unused));
long long us_begin, us_duration50, us_duration250;
long long us_duration;
unsigned int i;
char mhz[20];
/* now run the 50 cycles loop. We'll pick the lowest value
* among 5 runs of the short loop.
*/
us_duration50 = LLONG_MAX;
for (i = 0; i < 5; i++) {
us_begin = microseconds();
tsc_begin = rdtsc();
loop50(count);
tsc_duration50 = rdtsc() - tsc_begin;
us_duration = microseconds() - us_begin;
if (us_duration < us_duration50)
us_duration50 = us_duration;
}
/* now run the 250 cycles loop. We'll pick the lowest value
* among 5 runs of the long loop.
*/
us_duration250 = LLONG_MAX;
for (i = 0; i < 5; i++) {
us_begin = microseconds();
tsc_begin = rdtsc();
loop250(count);
tsc_duration250 = rdtsc() - tsc_begin;
us_duration = microseconds() - us_begin;
if (us_duration < us_duration250)
us_duration250 = us_duration;
}
if (use_ints)
snprintf(mhz, sizeof(mhz), "%.0f", count * 200.0 / (us_duration250 - us_duration50) + 0.5);
else
snprintf(mhz, sizeof(mhz), "%.3f", count * 200.0 / (us_duration250 - us_duration50));
if (!cpu_only && !tsc_only) {
printf("count=%ld us50=%lld us250=%lld diff=%lld cpu_MHz=%s",
count, us_duration50, us_duration250, us_duration250 - us_duration50,
mhz);
}
else if (cpu_only) {
printf("%s\n", mhz);
return;
}
#ifdef HAVE_RDTSC
if (use_ints)
snprintf(mhz, sizeof(mhz), "%.0f", (tsc_duration250 - tsc_duration50) / (float)(us_duration250 - us_duration50) + 0.5);
else
snprintf(mhz, sizeof(mhz), "%.3f", (tsc_duration250 - tsc_duration50) / (float)(us_duration250 - us_duration50));
if (!tsc_only) {
printf(" tsc50=%lld tsc250=%lld diff=%lld rdtsc_MHz=%s",
tsc_duration50, tsc_duration250, (tsc_duration250 - tsc_duration50) / count,
mhz);
} else {
printf("%s\n", mhz);
return;
}
#endif
putchar('\n');
}
/* spend <delay> us waiting for the CPU's frequency to raise. Will also stop
* on backwards time jumps if any.
*/
void pre_heat(long delay)
{
unsigned long long start = microseconds();
while (microseconds() - start < (unsigned long long)delay)
;
}
/* determines how long loop50() must be run to reach more than 20 milliseconds.
* This will ensure that an integral number of clock ticks will have happened
* on 100, 250, 1000 Hz systems.
*/
unsigned int calibrate(void)
{
unsigned long long duration = 0;
unsigned long long start;
unsigned int count = 1000;
while (duration < 10000) {
count = count * 5 / 4;
start = microseconds();
loop50(count);
duration = microseconds() - start;
}
return (count * 20000ULL + duration / 2) / duration;
}
void usage(const char *name)
{
printf("Usage: %s [-h|-c%s]* [lines [heat [count]]]\n"
" -h show this help\n"
" -c show CPU freq only (in MHz)\n"
" -i report integral frequencies only\n"
#ifdef HAVE_RDTSC
" -t show TSC freq only (in MHz)\n"
#endif
" lines number of measurements (one line per measurement). Def: 1\n"
" heat pre-heat time in microseconds. Def: 0\n"
" count calibration value, higher is slower but more accurate. Def: auto\n"
"\n", name,
#ifdef HAVE_RDTSC
"|-t"
#else
""
#endif
);
exit(0);
}
int main(int argc, char **argv)
{
const char *name = argv[0];
unsigned int count;
long runs = 1;
while (argc > 1 && *argv[1] == '-') {
if (argv[1][1] == 'c')
cpu_only = 1;
else if (argv[1][1] == 'i')
use_ints = 1;
#ifdef HAVE_RDTSC
else if (argv[1][1] == 't')
tsc_only = 1;
#endif
else
usage(name);
argc--; argv++;
}
if (argc > 1)
runs = atol(argv[1]);
if (argc > 2)
pre_heat(atol(argv[2]));
count = calibrate();
if (argc > 3)
count = atol(argv[3]);
while (runs--)
run_once(count);
return 0;
}