-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbioslow
executable file
·136 lines (114 loc) · 4.39 KB
/
bioslow
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/python3
# @lint-avoid-python-3-compatibility-imports
#
# bioslow Log slow I/O operations over a given threshold.
# For Linux, uses BCC, eBPF.
#
# USAGE: bioslow
#
# Copyright (c) 2024 Benedikt Braunger.
# Licensed under the Apache License, Version 2.0 (the "License")
from __future__ import print_function
from datetime import datetime, timedelta
from bcc import BPF
import argparse
import json
# arguments
examples = """examples:
./bioslow # print latency of I/O operations
./bioslow -t 10 # print I/O operations longer than 10us
./bioslow -t 100 -u ms # print I/O operations longer than 100ms
./bioslow -l foo.log # log to a file instread of stdout
"""
parser = argparse.ArgumentParser(
description="Log slow I/O operations over a given threshold",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("-t", "--threshold", default=1, type=int,
help="log requests which took longer than this (default: 1µs)")
parser.add_argument("-l", "--logfile",
help="log to a given file instead of stdout")
parser.add_argument('-u', "--unit", choices=['ns', 'us', 'ms', 's'], default='ms',
help="unit of threshold and reported times. Can be 'ns', 'us', 'ms' or 's'")
#parser.add_argument("count", nargs="?", default=-1,
# help="number of outputs")
parser.add_argument("-j", "--json", action="store_true",
help="generate json output")
parser.add_argument("-p", "--prometheus", action="store_true",
help="generate output for prometheus textfile exporter")
#parser.add_argument("-d", "--disk", type=str,
# help="Trace this disk only")
args = parser.parse_args()
REQ_WRITE = 1 # from include/linux/blk_types.h
bpf_text='''
#include <uapi/linux/ptrace.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
BPF_HASH(start, struct request *);
void trace_start(struct pt_regs *ctx, struct request *req) {
// stash start timestamp by request ptr
u64 ts = bpf_ktime_get_ns();
start.update(&req, &ts);
}
void trace_stop(struct pt_regs *ctx, struct request *req) {
u64 *tsp, delta;
tsp = start.lookup(&req);
if (tsp != 0) {
delta = bpf_ktime_get_ns() - *tsp;
FACTOR
bpf_trace_printk("%d %x %d\\n",
req->__data_len, req->cmd_flags, delta);
start.delete(&req);
}
}
'''
if args.unit == "s":
bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000000;')
unit = "secs"
elif args.unit == "ms":
bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;')
unit = "msecs"
elif args.unit == "us":
bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;')
unit = "usecs"
elif args.unit == "ns":
bpf_text = bpf_text.replace('FACTOR', '')
unit = "nsecs"
if not args.prometheus and not args.json:
print(f"\nLogging I/O operations longer than {args.threshold} {unit}")
print("Tracing ... Ctrl-C to end.")
# create BPF program
b = BPF(text=bpf_text)
# load BPF program
if BPF.get_kprobe_functions(b'blk_start_request'):
b.attach_kprobe(event="blk_start_request", fn_name="trace_start")
b.attach_kprobe(event="blk_mq_start_request", fn_name="trace_start")
if BPF.get_kprobe_functions(b'__blk_account_io_done'):
b.attach_kprobe(event="__blk_account_io_done", fn_name="trace_stop")
else:
b.attach_kprobe(event="blk_account_io_done", fn_name="trace_stop")
#b.attach_kprobe(event="blk_mq_end_request", fn_name="trace_stop")
#b.attach_kprobe(event="blk_mq_end_request_batch", fn_name="trace_stop")
#b.attach_kprobe(event="blk_mq_complete_request", fn_name="trace_stop")
logfile = None
if args.logfile:
logfile = open(args.logfile, "at")
while 1:
start_time = datetime.now()
(task, pid, cpu, flags, ts, msg) = b.trace_fields()
(req_size, req_flags, req_time) = msg.decode('ASCII').split()
td_ts = timedelta(seconds=ts)
if int(req_time) >= args.threshold:
log_time = start_time + td_ts
if args.json:
log_string = json.dumps({"timestamp": str(log_time), "request_time": req_time, "unit": unit})
elif args.prometheus:
log_string = "block_io_slow{unit=%s} %s %s" % (unit, req_time, datetime.timestamp(log_time))
else:
log_string = f"{log_time}: {req_time} {unit}"
if not logfile:
print(log_string)
if logfile:
logfile.write(log_string)
logfile.write("\n")
logfile.flush()