Skip to content

Commit

Permalink
Clean up sockops
Browse files Browse the repository at this point in the history
  • Loading branch information
ccanel committed Apr 30, 2024
1 parent c639ceb commit 0a148b1
Showing 1 changed file with 58 additions and 32 deletions.
90 changes: 58 additions & 32 deletions ratemon/runtime/ratemon.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ char LICENSE[] SEC("license") = "Dual BSD/GPL";
#define TCPOPT_WINDOW 3 /* Window scaling */
#define AF_INET 2 /* Internet IP Protocol */

#define EINVAL 22

#define min(x, y) ((x) < (y) ? (x) : (y))

// Read RWND limit for flow, as set by userspace. Even though the advertised
Expand Down Expand Up @@ -179,44 +181,68 @@ int do_rwnd_at_egress(struct __sk_buff *skb) {
// The next several functions are helpers for the sockops program that records
// the receiver's TCP window scale value.

inline int set_hdr_cb_flags(struct bpf_sock_ops *skops) {
__always_inline int set_hdr_cb_flags(struct bpf_sock_ops *skops, int flags) {
long ret = bpf_sock_ops_cb_flags_set(skops, flags);
if (ret == -EINVAL) {
// This is not a fullsock.
// Note: bpf_sk_fullsock() is not available in sockops, so if this is not a
// fullsock there is nothing we can do.
bpf_printk(
"ERROR: failed to set sockops flags because socket is not full socket");
return SOCKOPS_ERR;
} else if (ret) {
bpf_printk("ERROR: failed to set specific sockops flag: %ld", ret);
return SOCKOPS_ERR;
}
return SOCKOPS_OK;
}

__always_inline int enable_hdr_cbs(struct bpf_sock_ops *skops) {
// Set the flag enabling the BPF_SOCK_OPS_HDR_OPT_LEN_CB and
// BPF_SOCK_OPS_WRITE_HDR_OPT_CB callbacks.
if (bpf_sock_ops_cb_flags_set(
skops,
skops->bpf_sock_ops_cb_flags | BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG)) {
bpf_printk("ERROR when setting sockops flag for writing a header option");
if (set_hdr_cb_flags(skops, skops->bpf_sock_ops_cb_flags |
BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG) ==
SOCKOPS_ERR) {
bpf_printk("ERROR: could not enable sockops header callbacks");
return SOCKOPS_ERR;
}
return SOCKOPS_OK;
}

inline int clear_hdr_cb_flags(struct bpf_sock_ops *skops) {
__always_inline int disable_hdr_cbs(struct bpf_sock_ops *skops) {
// Clear the flag enabling the BPF_SOCK_OPS_HDR_OPT_LEN_CB and
// BPF_SOCK_OPS_WRITE_HDR_OPT_CB callbacks.
if (bpf_sock_ops_cb_flags_set(
skops,
skops->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG)) {
bpf_printk("ERROR when clearing sockops flag for writing a header option");
if (set_hdr_cb_flags(skops, skops->bpf_sock_ops_cb_flags &
~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG) ==
SOCKOPS_ERR) {
bpf_printk("ERROR: could not disable sockops header callbacks");
return SOCKOPS_ERR;
}
return SOCKOPS_OK;
}

inline int handle_hdr_opt_len(struct bpf_sock_ops *skops) {
__always_inline int handle_hdr_opt_len(struct bpf_sock_ops *skops) {
// Keep in mind that the window scale is set by the local host on
// _outgoing_ SYN and SYNACK packets. The handle_write_hdr_opt() sockops
// callback is only triggered for outgoing packets, so all we need to do
// is filter for SYN and SYNACK.
if ((skops->skb_tcp_flags & TCPHDR_SYN) != TCPHDR_SYN) {
// This is not a SYN or SYNACK packet.
return SOCKOPS_OK;
}

// If this is a SYN or SYNACK, then trigger the BPF_SOCK_OPS_WRITE_HDR_OPT_CB
// callback by reserving three bytes (the minimim) for a TCP header option.
// These three bytes will never actually be used, but reserving space is the
// only way for that callback to be triggered.
if (((skops->skb_tcp_flags & TCPHDR_SYN) == TCPHDR_SYN) &&
bpf_reserve_hdr_opt(skops, 3, 0)) {
if (bpf_reserve_hdr_opt(skops, 3, 0)) {
bpf_printk("ERROR: failed to reserve space for a header option");
return SOCKOPS_ERR;
}
return SOCKOPS_OK;
}

int handle_write_hdr_opt(struct bpf_sock_ops *skops) {
__always_inline int handle_write_hdr_opt(struct bpf_sock_ops *skops) {
// Keep in mind that the window scale is set by the local host on
// _outgoing_ SYN and SYNACK packets. The handle_write_hdr_opt() sockops
// callback is only triggered for outgoing packets, so all we need to do
Expand All @@ -226,17 +252,24 @@ int handle_write_hdr_opt(struct bpf_sock_ops *skops) {
return SOCKOPS_OK;
}

// This is an outgoing SYN or SYNACK packet. It should contain the window
// scale. Let's try to look it up.

// Look up the TCP window scale.
struct tcp_opt win_scale_opt = {.kind = TCPOPT_WINDOW, .len = 0, .data = 0};
int ret = bpf_load_hdr_opt(skops, &win_scale_opt, sizeof(win_scale_opt), 0);
if (ret != 3 || win_scale_opt.len != 3 ||
win_scale_opt.kind != TCPOPT_WINDOW) {
if (bpf_load_hdr_opt(skops, &win_scale_opt, sizeof(win_scale_opt), 0) != 3 ||
win_scale_opt.len != 3 || win_scale_opt.kind != TCPOPT_WINDOW) {
bpf_printk("ERROR: failed to retrieve window scale option");
return SOCKOPS_ERR;
}

if (skops->family != AF_INET) {
// This is not an IPv4 packet. We only support IPv4 packets because the
// struct we use as a map key stores IP addresses as 32 bits. This is purely
// an implementation detail.
bpf_printk("WARNING: not using IPv4 for flow on local port %u: family=%u",
skops->local_port, skops->family);
disable_hdr_cbs(skops);
return SOCKOPS_OK;
}

struct rm_flow flow = {.local_addr = skops->local_ip4,
.remote_addr = skops->remote_ip4,
.local_port = (u16)skops->local_port,
Expand All @@ -252,24 +285,17 @@ int handle_write_hdr_opt(struct bpf_sock_ops *skops) {
bpf_map_update_elem(&flow_to_win_scale, &flow, &win_scale_opt.data, BPF_ANY);

// Clear the flag that enables the header option write callback.
return clear_hdr_cb_flags(skops);
disable_hdr_cbs(skops);
return SOCKOPS_OK;
}

// This sockops program records a flow's TCP window scale, which is set in
// receiver's outgoing SYNACK packet.
SEC("sockops")
int read_win_scale(struct bpf_sock_ops *skops) {
if (skops->family != AF_INET) {
// This is not an IPv4 packet. We only support IPv4 packets because the
// struct we use as a map key stores IP addresses as 32 bits. This is purely
// an implementation detail.
bpf_printk("WARNING: not using IPv4 for flow on local port %u: family=%u",
skops->local_port, skops->family);
return SOCKOPS_OK;
}
switch (skops->op) {
case BPF_SOCK_OPS_TCP_LISTEN_CB:
return set_hdr_cb_flags(skops);
return enable_hdr_cbs(skops);
case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
return handle_hdr_opt_len(skops);
case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
Expand All @@ -278,8 +304,8 @@ int read_win_scale(struct bpf_sock_ops *skops) {
return SOCKOPS_OK;
}

// The remainder of this file is all of the struct_ops programs for bpf_cubic.
// The all simply delegate to the regular tcp_cubic functions, except for
// The next several functions are the struct_ops programs for bpf_cubic. The all
// simply delegate to the regular tcp_cubic functions, except for
// 'bpf_cubic_get_into', as described below.

// These are the regular tcp_cubic function that will be called below.
Expand Down

0 comments on commit 0a148b1

Please sign in to comment.