From a394ebb79add03c401b879c2c363cfb99ebf8a87 Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Thu, 11 Apr 2024 01:32:51 +0800 Subject: [PATCH 01/11] bpf: parse_transport() accept *l3proto --- control/kern/tproxy.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/control/kern/tproxy.c b/control/kern/tproxy.c index 5b74c299e..0905f83b9 100644 --- a/control/kern/tproxy.c +++ b/control/kern/tproxy.c @@ -553,7 +553,7 @@ static __always_inline int parse_transport(const struct __sk_buff *skb, __u32 link_h_len, struct ethhdr *ethh, struct iphdr *iph, struct ipv6hdr *ipv6h, struct icmp6hdr *icmp6h, struct tcphdr *tcph, - struct udphdr *udph, __u8 *ihl, __u8 *l4proto) + struct udphdr *udph, __u8 *ihl, __u16 *l3proto, __u8 *l4proto) { __u32 offset = 0; int ret; @@ -571,6 +571,7 @@ parse_transport(const struct __sk_buff *skb, __u32 link_h_len, __builtin_memset(ethh, 0, sizeof(struct ethhdr)); ethh->h_proto = skb->protocol; } + *l3proto = ethh->h_proto; *ihl = 0; *l4proto = 0; @@ -994,13 +995,14 @@ int tproxy_lan_egress(struct __sk_buff *skb) struct tcphdr tcph; struct udphdr udph; __u8 ihl; + __u16 l3proto; __u8 l4proto; __u32 link_h_len; if (get_link_h_len(skb->ifindex, &link_h_len)) return TC_ACT_OK; int ret = parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, - &tcph, &udph, &ihl, &l4proto); + &tcph, &udph, &ihl, &l3proto, &l4proto); if (ret) { bpf_printk("parse_transport: %d", ret); return TC_ACT_OK; @@ -1022,13 +1024,14 @@ int tproxy_lan_ingress(struct __sk_buff *skb) struct tcphdr tcph; struct udphdr udph; __u8 ihl; + __u16 l3proto; __u8 l4proto; __u32 link_h_len; if (get_link_h_len(skb->ifindex, &link_h_len)) return TC_ACT_OK; int ret = parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, - &tcph, &udph, &ihl, &l4proto); + &tcph, &udph, &ihl, &l3proto, &l4proto); if (ret) { bpf_printk("parse_transport: %d", ret); return TC_ACT_OK; @@ -1311,13 +1314,14 @@ int tproxy_wan_ingress(struct __sk_buff *skb) struct tcphdr tcph; struct udphdr udph; __u8 ihl; + __u16 l3proto; __u8 l4proto; __u32 link_h_len; if (get_link_h_len(skb->ifindex, &link_h_len)) return TC_ACT_OK; int ret = parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, - &tcph, &udph, &ihl, &l4proto); + &tcph, &udph, &ihl, &l3proto, &l4proto); if (ret) return TC_ACT_OK; if (l4proto != IPPROTO_UDP) @@ -1354,6 +1358,7 @@ int tproxy_wan_egress(struct __sk_buff *skb) struct tcphdr tcph; struct udphdr udph; __u8 ihl; + __u16 l3proto; __u8 l4proto; __u32 link_h_len; @@ -1361,7 +1366,7 @@ int tproxy_wan_egress(struct __sk_buff *skb) return TC_ACT_OK; bool tcp_state_syn; int ret = parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, - &tcph, &udph, &ihl, &l4proto); + &tcph, &udph, &ihl, &l3proto, &l4proto); if (ret) return TC_ACT_OK; if (l4proto == IPPROTO_ICMPV6) @@ -1635,11 +1640,12 @@ int tproxy_dae0_ingress(struct __sk_buff *skb) struct tcphdr tcph; struct udphdr udph; __u8 ihl; + __u16 l3proto; __u8 l4proto; __u32 link_h_len = 14; if (parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, - &tcph, &udph, &ihl, &l4proto)) + &tcph, &udph, &ihl, &l3proto, &l4proto)) return TC_ACT_OK; struct tuples tuples; From 301c9fbd1d953745593104df0960cb141115a1df Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Thu, 11 Apr 2024 01:35:51 +0800 Subject: [PATCH 02/11] bpf: get_tuples() doesn't need skb parameter Because we pass l3proto instead, which is parsed from parse_transport() --- control/kern/tproxy.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/control/kern/tproxy.c b/control/kern/tproxy.c index 0905f83b9..1afd8c976 100644 --- a/control/kern/tproxy.c +++ b/control/kern/tproxy.c @@ -413,14 +413,15 @@ static __always_inline __u8 ipv6_get_dscp(const struct ipv6hdr *ipv6h) } static __always_inline void -get_tuples(const struct __sk_buff *skb, struct tuples *tuples, +get_tuples(struct tuples *tuples, const struct iphdr *iph, const struct ipv6hdr *ipv6h, - const struct tcphdr *tcph, const struct udphdr *udph, __u8 l4proto) + const struct tcphdr *tcph, const struct udphdr *udph, + __u16 l3proto, __u8 l4proto) { __builtin_memset(tuples, 0, sizeof(*tuples)); tuples->five.l4proto = l4proto; - if (skb->protocol == bpf_htons(ETH_P_IP)) { + if (l3proto == bpf_htons(ETH_P_IP)) { tuples->five.sip.u6_addr32[2] = bpf_htonl(0x0000ffff); tuples->five.sip.u6_addr32[3] = iph->saddr; @@ -1042,7 +1043,7 @@ int tproxy_lan_ingress(struct __sk_buff *skb) // Prepare five tuples. struct tuples tuples; - get_tuples(skb, &tuples, &iph, &ipv6h, &tcph, &udph, l4proto); + get_tuples(&tuples, &iph, &ipv6h, &tcph, &udph, l3proto, l4proto); /* * ip rule add fwmark 0x8000000/0x8000000 table 2023 @@ -1330,7 +1331,7 @@ int tproxy_wan_ingress(struct __sk_buff *skb) struct tuples tuples; struct tuples_key reversed_tuples_key; - get_tuples(skb, &tuples, &iph, &ipv6h, &tcph, &udph, l4proto); + get_tuples(&tuples, &iph, &ipv6h, &tcph, &udph, l3proto, l4proto); copy_reversed_tuples(&tuples.five, &reversed_tuples_key); if (refresh_udp_conn_state_timer(&reversed_tuples_key)) @@ -1375,7 +1376,7 @@ int tproxy_wan_egress(struct __sk_buff *skb) // Backup for further use. struct tuples tuples; - get_tuples(skb, &tuples, &iph, &ipv6h, &tcph, &udph, l4proto); + get_tuples(&tuples, &iph, &ipv6h, &tcph, &udph, l3proto, l4proto); // Normal packets. if (l4proto == IPPROTO_TCP) { @@ -1649,7 +1650,7 @@ int tproxy_dae0_ingress(struct __sk_buff *skb) return TC_ACT_OK; struct tuples tuples; - get_tuples(skb, &tuples, &iph, &ipv6h, &tcph, &udph, l4proto); + get_tuples(&tuples, &iph, &ipv6h, &tcph, &udph, l3proto, l4proto); // reverse the tuple! struct redirect_tuple redirect_tuple = {}; From 40e8a0844bd6ba566cba5a49028d5ef2aaac9fd7 Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Thu, 11 Apr 2024 01:46:36 +0800 Subject: [PATCH 03/11] bpf: redirect_to_control_plane() accepts l3proto And it also returns bpf_redirect(). --- control/kern/tproxy.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/control/kern/tproxy.c b/control/kern/tproxy.c index 1afd8c976..fef3c63e7 100644 --- a/control/kern/tproxy.c +++ b/control/kern/tproxy.c @@ -937,14 +937,14 @@ static __always_inline int assign_listener(struct __sk_buff *skb, __u8 l4proto) return ret; } -static __always_inline void prep_redirect_to_control_plane( - struct __sk_buff *skb, __u32 link_h_len, struct tuples *tuples, - __u8 l4proto, struct ethhdr *ethh, __u8 from_wan, struct tcphdr *tcph) +static __always_inline int +redirect_to_control_plane(struct __sk_buff *skb, __u32 link_h_len, + struct tuples *tuples, + struct ethhdr *ethh, struct tcphdr *tcph, + __u8 from_wan, __u16 l3proto, __u8 l4proto) { /* Redirect from L3 dev to L2 dev, e.g. wg0 -> veth */ if (!link_h_len) { - __u16 l3proto = skb->protocol; - bpf_skb_change_head(skb, sizeof(struct ethhdr), 0); bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_proto), &l3proto, sizeof(l3proto), 0); @@ -956,7 +956,7 @@ static __always_inline void prep_redirect_to_control_plane( struct redirect_tuple redirect_tuple = {}; - if (skb->protocol == bpf_htons(ETH_P_IP)) { + if (l3proto == bpf_htons(ETH_P_IP)) { redirect_tuple.sip.u6_addr32[3] = tuples->five.sip.u6_addr32[3]; redirect_tuple.dip.u6_addr32[3] = tuples->five.dip.u6_addr32[3]; } else { @@ -981,6 +981,8 @@ static __always_inline void prep_redirect_to_control_plane( skb->cb[1] = 0; if ((l4proto == IPPROTO_TCP && tcph->syn) || l4proto == IPPROTO_UDP) skb->cb[1] = l4proto; + + return bpf_redirect(PARAM.dae0_ifindex, 0); } SEC("tc/egress") @@ -1192,9 +1194,8 @@ int tproxy_lan_ingress(struct __sk_buff *skb) // Assign to control plane. control_plane: - prep_redirect_to_control_plane(skb, link_h_len, &tuples, l4proto, ðh, - 0, &tcph); - return bpf_redirect(PARAM.dae0_ifindex, 0); + return redirect_to_control_plane(skb, link_h_len, &tuples, ðh, &tcph, + 0, l3proto, l4proto); direct: return TC_ACT_OK; @@ -1601,9 +1602,8 @@ int tproxy_wan_egress(struct __sk_buff *skb) } } - prep_redirect_to_control_plane(skb, link_h_len, &tuples, l4proto, ðh, - 1, &tcph); - return bpf_redirect(PARAM.dae0_ifindex, 0); + return redirect_to_control_plane(skb, link_h_len, &tuples, ðh, &tcph, + 1, l3proto, l4proto); } SEC("tc/dae0peer_ingress") From 468dc2e1ef5763f8eb5e505112ba16a869be8c3e Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Fri, 12 Apr 2024 18:53:43 +0800 Subject: [PATCH 04/11] bpf: set meta in ethhdr instead of skb->cb This is because: 1. XDP can't operate skb->cb 2. Further handling doesn't need L2 info at all --- control/kern/tproxy.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/control/kern/tproxy.c b/control/kern/tproxy.c index fef3c63e7..0ab326a94 100644 --- a/control/kern/tproxy.c +++ b/control/kern/tproxy.c @@ -171,6 +171,12 @@ struct tuples { __u8 dscp; }; +struct redirect_meta { + __u32 mark; + __u8 l4proto; + __u8 pad[3]; +}; + struct dae_param { __u32 tproxy_port; __u32 control_plane_pid; @@ -950,10 +956,6 @@ redirect_to_control_plane(struct __sk_buff *skb, __u32 link_h_len, &l3proto, sizeof(l3proto), 0); } - bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest), - (void *)&PARAM.dae0peer_mac, sizeof(ethh->h_dest), - 0); - struct redirect_tuple redirect_tuple = {}; if (l3proto == bpf_htons(ETH_P_IP)) { @@ -977,10 +979,14 @@ redirect_to_control_plane(struct __sk_buff *skb, __u32 link_h_len, bpf_map_update_elem(&redirect_track, &redirect_tuple, &redirect_entry, BPF_ANY); - skb->cb[0] = TPROXY_MARK; - skb->cb[1] = 0; + struct redirect_meta *meta = (void *)(long)skb->data; + if ((void *)(meta + 1) > (void *)(long)skb->data_end) + return TC_ACT_SHOT; + + __builtin_memset(meta, 0, sizeof(*meta)); + meta->mark = TPROXY_MARK; if ((l4proto == IPPROTO_TCP && tcph->syn) || l4proto == IPPROTO_UDP) - skb->cb[1] = l4proto; + meta->l4proto = l4proto; return bpf_redirect(PARAM.dae0_ifindex, 0); } @@ -1609,9 +1615,14 @@ int tproxy_wan_egress(struct __sk_buff *skb) SEC("tc/dae0peer_ingress") int tproxy_dae0peer_ingress(struct __sk_buff *skb) { - /* Only packets redirected from wan_egress or lan_ingress have this cb mark. + /* Only packets redirected from wan_egress or lan_ingress have this mark. */ - if (skb->cb[0] != TPROXY_MARK) + struct redirect_meta *meta = (void *)(long)skb->data; + + if ((void *)(meta + 1) > (void *)(long)skb->data_end) + return TC_ACT_SHOT; + + if (meta->mark != TPROXY_MARK) return TC_ACT_SHOT; /* ip rule add fwmark 0x8000000/0x8000000 table 2023 @@ -1620,11 +1631,11 @@ int tproxy_dae0peer_ingress(struct __sk_buff *skb) skb->mark = TPROXY_MARK; bpf_skb_change_type(skb, PACKET_HOST); - /* l4proto is stored in skb->cb[1] only for UDP and new TCP. As for + /* l4proto is stored in meta only for UDP and new TCP. As for * established TCP, kernel can take care of socket lookup, so just * return them to stack without calling bpf_sk_assign. */ - __u8 l4proto = skb->cb[1]; + __u8 l4proto = meta->l4proto; if (l4proto != 0) assign_listener(skb, l4proto); From 0d425f905b2f2270768b6baa844b95dd1cf62b40 Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Fri, 12 Apr 2024 19:01:30 +0800 Subject: [PATCH 05/11] bpf: Move three functions to lib/skb.h we will implement the corresponding functions for XDP soon. --- control/kern/lib/skb.h | 226 ++++++++++++++++++++++++++++++++++++++++ control/kern/tproxy.c | 228 +---------------------------------------- 2 files changed, 227 insertions(+), 227 deletions(-) create mode 100644 control/kern/lib/skb.h diff --git a/control/kern/lib/skb.h b/control/kern/lib/skb.h new file mode 100644 index 000000000..2e9a497be --- /dev/null +++ b/control/kern/lib/skb.h @@ -0,0 +1,226 @@ +static __always_inline int +handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr, + struct icmp6hdr *icmp6h, struct tcphdr *tcph, + struct udphdr *udph, __u8 *ihl, __u8 *l4proto) +{ + __u8 hdr_length = 0; + __u8 nexthdr = 0; + *ihl = sizeof(struct ipv6hdr) / 4; + int ret; + // We only process TCP and UDP traffic. + + // Unroll can give less instructions but more memory consumption when loading. + // We disable it here to support more poor memory devices. + // #pragma unroll + for (int i = 0; i < IPV6_MAX_EXTENSIONS; + i++, offset += hdr_length, hdr = nexthdr, *ihl += hdr_length / 4) { + if (hdr_length % 4) { + bpf_printk( + "IPv6 extension length is not multiples of 4"); + return 1; + } + // See control/control_plane.go. + + switch (hdr) { + case IPPROTO_ICMPV6: + *l4proto = hdr; + hdr_length = sizeof(struct icmp6hdr); + // Assume ICMPV6 as a level 4 protocol. + ret = bpf_skb_load_bytes(skb, offset, icmp6h, + hdr_length); + if (ret) { + bpf_printk("not a valid IPv6 packet"); + return -EFAULT; + } + return 0; + + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + ret = bpf_skb_load_bytes(skb, offset + 1, &hdr_length, + sizeof(hdr_length)); + if (ret) { + bpf_printk("not a valid IPv6 packet"); + return -EFAULT; + } + +special_n1: + ret = bpf_skb_load_bytes(skb, offset, &nexthdr, + sizeof(nexthdr)); + if (ret) { + bpf_printk("not a valid IPv6 packet"); + return -EFAULT; + } + break; + case IPPROTO_FRAGMENT: + hdr_length = 4; + goto special_n1; + case IPPROTO_TCP: + case IPPROTO_UDP: + *l4proto = hdr; + if (hdr == IPPROTO_TCP) { + // Upper layer; + ret = bpf_skb_load_bytes(skb, offset, tcph, + sizeof(struct tcphdr)); + if (ret) { + bpf_printk("not a valid IPv6 packet"); + return -EFAULT; + } + } else if (hdr == IPPROTO_UDP) { + // Upper layer; + ret = bpf_skb_load_bytes(skb, offset, udph, + sizeof(struct udphdr)); + if (ret) { + bpf_printk("not a valid IPv6 packet"); + return -EFAULT; + } + } else { + // Unknown hdr. + bpf_printk("Unexpected hdr."); + return 1; + } + return 0; + default: + /// EXPECTED: Maybe ICMP, etc. + // bpf_printk("IPv6 but unrecognized extension protocol: %u", hdr); + return 1; + } + } + bpf_printk("exceeds IPV6_MAX_EXTENSIONS limit"); + return 1; +} + +static __always_inline int +parse_transport(const struct __sk_buff *skb, __u32 link_h_len, + struct ethhdr *ethh, struct iphdr *iph, struct ipv6hdr *ipv6h, + struct icmp6hdr *icmp6h, struct tcphdr *tcph, + struct udphdr *udph, __u8 *ihl, __u16 *l3proto, __u8 *l4proto) +{ + __u32 offset = 0; + int ret; + + if (link_h_len == ETH_HLEN) { + ret = bpf_skb_load_bytes(skb, offset, ethh, + sizeof(struct ethhdr)); + if (ret) { + bpf_printk("not ethernet packet"); + return 1; + } + // Skip ethhdr for next hdr. + offset += sizeof(struct ethhdr); + } else { + __builtin_memset(ethh, 0, sizeof(struct ethhdr)); + ethh->h_proto = skb->protocol; + } + *l3proto = ethh->h_proto; + + *ihl = 0; + *l4proto = 0; + __builtin_memset(iph, 0, sizeof(struct iphdr)); + __builtin_memset(ipv6h, 0, sizeof(struct ipv6hdr)); + __builtin_memset(icmp6h, 0, sizeof(struct icmp6hdr)); + __builtin_memset(tcph, 0, sizeof(struct tcphdr)); + __builtin_memset(udph, 0, sizeof(struct udphdr)); + + // bpf_printk("parse_transport: h_proto: %u ? %u %u", ethh->h_proto, + // bpf_htons(ETH_P_IP), + // bpf_htons(ETH_P_IPV6)); + if (ethh->h_proto == bpf_htons(ETH_P_IP)) { + ret = bpf_skb_load_bytes(skb, offset, iph, + sizeof(struct iphdr)); + if (ret) + return -EFAULT; + // Skip ipv4hdr and options for next hdr. + offset += iph->ihl * 4; + + // We only process TCP and UDP traffic. + *l4proto = iph->protocol; + switch (iph->protocol) { + case IPPROTO_TCP: { + ret = bpf_skb_load_bytes(skb, offset, tcph, + sizeof(struct tcphdr)); + if (ret) { + // Not a complete tcphdr. + return -EFAULT; + } + } break; + case IPPROTO_UDP: { + ret = bpf_skb_load_bytes(skb, offset, udph, + sizeof(struct udphdr)); + if (ret) { + // Not a complete udphdr. + return -EFAULT; + } + } break; + default: + return 1; + } + *ihl = iph->ihl; + return 0; + } else if (ethh->h_proto == bpf_htons(ETH_P_IPV6)) { + ret = bpf_skb_load_bytes(skb, offset, ipv6h, + sizeof(struct ipv6hdr)); + if (ret) { + bpf_printk("not a valid IPv6 packet"); + return -EFAULT; + } + + offset += sizeof(struct ipv6hdr); + + return handle_ipv6_extensions(skb, offset, ipv6h->nexthdr, + icmp6h, tcph, udph, ihl, l4proto); + } else { + /// EXPECTED: Maybe ICMP, MPLS, etc. + // bpf_printk("IP but not supported packet: protocol is %u", + // iph->protocol); + // bpf_printk("unknown link proto: %u", bpf_ntohl(skb->protocol)); + return 1; + } +} + +static __always_inline int +redirect_to_control_plane(struct __sk_buff *skb, __u32 link_h_len, + struct tuples *tuples, + struct ethhdr *ethh, struct tcphdr *tcph, + __u8 from_wan, __u16 l3proto, __u8 l4proto) +{ + /* Redirect from L3 dev to L2 dev, e.g. wg0 -> veth */ + if (!link_h_len) { + bpf_skb_change_head(skb, sizeof(struct ethhdr), 0); + bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_proto), + &l3proto, sizeof(l3proto), 0); + } + + struct redirect_tuple redirect_tuple = {}; + + if (l3proto == bpf_htons(ETH_P_IP)) { + redirect_tuple.sip.u6_addr32[3] = tuples->five.sip.u6_addr32[3]; + redirect_tuple.dip.u6_addr32[3] = tuples->five.dip.u6_addr32[3]; + } else { + __builtin_memcpy(&redirect_tuple.sip, &tuples->five.sip, + IPV6_BYTE_LENGTH); + __builtin_memcpy(&redirect_tuple.dip, &tuples->five.dip, + IPV6_BYTE_LENGTH); + } + redirect_tuple.l4proto = l4proto; + struct redirect_entry redirect_entry = {}; + + redirect_entry.ifindex = skb->ifindex; + redirect_entry.from_wan = from_wan; + __builtin_memcpy(redirect_entry.smac, ethh->h_source, + sizeof(ethh->h_source)); + __builtin_memcpy(redirect_entry.dmac, ethh->h_dest, + sizeof(ethh->h_dest)); + bpf_map_update_elem(&redirect_track, &redirect_tuple, &redirect_entry, + BPF_ANY); + + struct redirect_meta *meta = (void *)(long)skb->data; + if ((void *)(meta + 1) > (void *)(long)skb->data_end) + return TC_ACT_SHOT; + + __builtin_memset(meta, 0, sizeof(*meta)); + meta->mark = TPROXY_MARK; + if ((l4proto == IPPROTO_TCP && tcph->syn) || l4proto == IPPROTO_UDP) + meta->l4proto = l4proto; + + return bpf_redirect(PARAM.dae0_ifindex, 0); +} diff --git a/control/kern/tproxy.c b/control/kern/tproxy.c index 0ab326a94..bbfdeb4ef 100644 --- a/control/kern/tproxy.c +++ b/control/kern/tproxy.c @@ -407,6 +407,7 @@ struct { } udp_conn_state_map SEC(".maps"); // Functions: +#include "lib/skb.h" static __always_inline __u8 ipv4_get_dscp(const struct iphdr *iph) { @@ -465,185 +466,6 @@ static __always_inline bool equal16(const __be32 x[4], const __be32 y[4]) #endif } -static __always_inline int -handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr, - struct icmp6hdr *icmp6h, struct tcphdr *tcph, - struct udphdr *udph, __u8 *ihl, __u8 *l4proto) -{ - __u8 hdr_length = 0; - __u8 nexthdr = 0; - *ihl = sizeof(struct ipv6hdr) / 4; - int ret; - // We only process TCP and UDP traffic. - - // Unroll can give less instructions but more memory consumption when loading. - // We disable it here to support more poor memory devices. - // #pragma unroll - for (int i = 0; i < IPV6_MAX_EXTENSIONS; - i++, offset += hdr_length, hdr = nexthdr, *ihl += hdr_length / 4) { - if (hdr_length % 4) { - bpf_printk( - "IPv6 extension length is not multiples of 4"); - return 1; - } - // See control/control_plane.go. - - switch (hdr) { - case IPPROTO_ICMPV6: - *l4proto = hdr; - hdr_length = sizeof(struct icmp6hdr); - // Assume ICMPV6 as a level 4 protocol. - ret = bpf_skb_load_bytes(skb, offset, icmp6h, - hdr_length); - if (ret) { - bpf_printk("not a valid IPv6 packet"); - return -EFAULT; - } - return 0; - - case IPPROTO_HOPOPTS: - case IPPROTO_ROUTING: - ret = bpf_skb_load_bytes(skb, offset + 1, &hdr_length, - sizeof(hdr_length)); - if (ret) { - bpf_printk("not a valid IPv6 packet"); - return -EFAULT; - } - -special_n1: - ret = bpf_skb_load_bytes(skb, offset, &nexthdr, - sizeof(nexthdr)); - if (ret) { - bpf_printk("not a valid IPv6 packet"); - return -EFAULT; - } - break; - case IPPROTO_FRAGMENT: - hdr_length = 4; - goto special_n1; - case IPPROTO_TCP: - case IPPROTO_UDP: - *l4proto = hdr; - if (hdr == IPPROTO_TCP) { - // Upper layer; - ret = bpf_skb_load_bytes(skb, offset, tcph, - sizeof(struct tcphdr)); - if (ret) { - bpf_printk("not a valid IPv6 packet"); - return -EFAULT; - } - } else if (hdr == IPPROTO_UDP) { - // Upper layer; - ret = bpf_skb_load_bytes(skb, offset, udph, - sizeof(struct udphdr)); - if (ret) { - bpf_printk("not a valid IPv6 packet"); - return -EFAULT; - } - } else { - // Unknown hdr. - bpf_printk("Unexpected hdr."); - return 1; - } - return 0; - default: - /// EXPECTED: Maybe ICMP, etc. - // bpf_printk("IPv6 but unrecognized extension protocol: %u", hdr); - return 1; - } - } - bpf_printk("exceeds IPV6_MAX_EXTENSIONS limit"); - return 1; -} - -static __always_inline int -parse_transport(const struct __sk_buff *skb, __u32 link_h_len, - struct ethhdr *ethh, struct iphdr *iph, struct ipv6hdr *ipv6h, - struct icmp6hdr *icmp6h, struct tcphdr *tcph, - struct udphdr *udph, __u8 *ihl, __u16 *l3proto, __u8 *l4proto) -{ - __u32 offset = 0; - int ret; - - if (link_h_len == ETH_HLEN) { - ret = bpf_skb_load_bytes(skb, offset, ethh, - sizeof(struct ethhdr)); - if (ret) { - bpf_printk("not ethernet packet"); - return 1; - } - // Skip ethhdr for next hdr. - offset += sizeof(struct ethhdr); - } else { - __builtin_memset(ethh, 0, sizeof(struct ethhdr)); - ethh->h_proto = skb->protocol; - } - *l3proto = ethh->h_proto; - - *ihl = 0; - *l4proto = 0; - __builtin_memset(iph, 0, sizeof(struct iphdr)); - __builtin_memset(ipv6h, 0, sizeof(struct ipv6hdr)); - __builtin_memset(icmp6h, 0, sizeof(struct icmp6hdr)); - __builtin_memset(tcph, 0, sizeof(struct tcphdr)); - __builtin_memset(udph, 0, sizeof(struct udphdr)); - - // bpf_printk("parse_transport: h_proto: %u ? %u %u", ethh->h_proto, - // bpf_htons(ETH_P_IP), - // bpf_htons(ETH_P_IPV6)); - if (ethh->h_proto == bpf_htons(ETH_P_IP)) { - ret = bpf_skb_load_bytes(skb, offset, iph, - sizeof(struct iphdr)); - if (ret) - return -EFAULT; - // Skip ipv4hdr and options for next hdr. - offset += iph->ihl * 4; - - // We only process TCP and UDP traffic. - *l4proto = iph->protocol; - switch (iph->protocol) { - case IPPROTO_TCP: { - ret = bpf_skb_load_bytes(skb, offset, tcph, - sizeof(struct tcphdr)); - if (ret) { - // Not a complete tcphdr. - return -EFAULT; - } - } break; - case IPPROTO_UDP: { - ret = bpf_skb_load_bytes(skb, offset, udph, - sizeof(struct udphdr)); - if (ret) { - // Not a complete udphdr. - return -EFAULT; - } - } break; - default: - return 1; - } - *ihl = iph->ihl; - return 0; - } else if (ethh->h_proto == bpf_htons(ETH_P_IPV6)) { - ret = bpf_skb_load_bytes(skb, offset, ipv6h, - sizeof(struct ipv6hdr)); - if (ret) { - bpf_printk("not a valid IPv6 packet"); - return -EFAULT; - } - - offset += sizeof(struct ipv6hdr); - - return handle_ipv6_extensions(skb, offset, ipv6h->nexthdr, - icmp6h, tcph, udph, ihl, l4proto); - } else { - /// EXPECTED: Maybe ICMP, MPLS, etc. - // bpf_printk("IP but not supported packet: protocol is %u", - // iph->protocol); - // bpf_printk("unknown link proto: %u", bpf_ntohl(skb->protocol)); - return 1; - } -} - // Do not use __always_inline here because this function is too heavy. // low -> high: outbound(8b) mark(32b) unused(23b) sign(1b) static __s64 __attribute__((noinline)) @@ -943,54 +765,6 @@ static __always_inline int assign_listener(struct __sk_buff *skb, __u8 l4proto) return ret; } -static __always_inline int -redirect_to_control_plane(struct __sk_buff *skb, __u32 link_h_len, - struct tuples *tuples, - struct ethhdr *ethh, struct tcphdr *tcph, - __u8 from_wan, __u16 l3proto, __u8 l4proto) -{ - /* Redirect from L3 dev to L2 dev, e.g. wg0 -> veth */ - if (!link_h_len) { - bpf_skb_change_head(skb, sizeof(struct ethhdr), 0); - bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_proto), - &l3proto, sizeof(l3proto), 0); - } - - struct redirect_tuple redirect_tuple = {}; - - if (l3proto == bpf_htons(ETH_P_IP)) { - redirect_tuple.sip.u6_addr32[3] = tuples->five.sip.u6_addr32[3]; - redirect_tuple.dip.u6_addr32[3] = tuples->five.dip.u6_addr32[3]; - } else { - __builtin_memcpy(&redirect_tuple.sip, &tuples->five.sip, - IPV6_BYTE_LENGTH); - __builtin_memcpy(&redirect_tuple.dip, &tuples->five.dip, - IPV6_BYTE_LENGTH); - } - redirect_tuple.l4proto = l4proto; - struct redirect_entry redirect_entry = {}; - - redirect_entry.ifindex = skb->ifindex; - redirect_entry.from_wan = from_wan; - __builtin_memcpy(redirect_entry.smac, ethh->h_source, - sizeof(ethh->h_source)); - __builtin_memcpy(redirect_entry.dmac, ethh->h_dest, - sizeof(ethh->h_dest)); - bpf_map_update_elem(&redirect_track, &redirect_tuple, &redirect_entry, - BPF_ANY); - - struct redirect_meta *meta = (void *)(long)skb->data; - if ((void *)(meta + 1) > (void *)(long)skb->data_end) - return TC_ACT_SHOT; - - __builtin_memset(meta, 0, sizeof(*meta)); - meta->mark = TPROXY_MARK; - if ((l4proto == IPPROTO_TCP && tcph->syn) || l4proto == IPPROTO_UDP) - meta->l4proto = l4proto; - - return bpf_redirect(PARAM.dae0_ifindex, 0); -} - SEC("tc/egress") int tproxy_lan_egress(struct __sk_buff *skb) { From 30558947b39a6c893770d6bfbccdfc4262d0bd17 Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Thu, 11 Apr 2024 01:55:20 +0800 Subject: [PATCH 06/11] bpf: Rename lib funtion with prefix "skb_" To distinguish from XDP lib functions. --- control/kern/lib/skb.h | 26 +++++++++++++------------- control/kern/tproxy.c | 32 ++++++++++++++++---------------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/control/kern/lib/skb.h b/control/kern/lib/skb.h index 2e9a497be..78529735a 100644 --- a/control/kern/lib/skb.h +++ b/control/kern/lib/skb.h @@ -1,7 +1,7 @@ static __always_inline int -handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr, - struct icmp6hdr *icmp6h, struct tcphdr *tcph, - struct udphdr *udph, __u8 *ihl, __u8 *l4proto) +skb_handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr, + struct icmp6hdr *icmp6h, struct tcphdr *tcph, + struct udphdr *udph, __u8 *ihl, __u8 *l4proto) { __u8 hdr_length = 0; __u8 nexthdr = 0; @@ -90,10 +90,10 @@ handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr, } static __always_inline int -parse_transport(const struct __sk_buff *skb, __u32 link_h_len, - struct ethhdr *ethh, struct iphdr *iph, struct ipv6hdr *ipv6h, - struct icmp6hdr *icmp6h, struct tcphdr *tcph, - struct udphdr *udph, __u8 *ihl, __u16 *l3proto, __u8 *l4proto) +skb_parse_transport(const struct __sk_buff *skb, __u32 link_h_len, + struct ethhdr *ethh, struct iphdr *iph, struct ipv6hdr *ipv6h, + struct icmp6hdr *icmp6h, struct tcphdr *tcph, + struct udphdr *udph, __u8 *ihl, __u16 *l3proto, __u8 *l4proto) { __u32 offset = 0; int ret; @@ -166,8 +166,8 @@ parse_transport(const struct __sk_buff *skb, __u32 link_h_len, offset += sizeof(struct ipv6hdr); - return handle_ipv6_extensions(skb, offset, ipv6h->nexthdr, - icmp6h, tcph, udph, ihl, l4proto); + return skb_handle_ipv6_extensions(skb, offset, ipv6h->nexthdr, + icmp6h, tcph, udph, ihl, l4proto); } else { /// EXPECTED: Maybe ICMP, MPLS, etc. // bpf_printk("IP but not supported packet: protocol is %u", @@ -178,10 +178,10 @@ parse_transport(const struct __sk_buff *skb, __u32 link_h_len, } static __always_inline int -redirect_to_control_plane(struct __sk_buff *skb, __u32 link_h_len, - struct tuples *tuples, - struct ethhdr *ethh, struct tcphdr *tcph, - __u8 from_wan, __u16 l3proto, __u8 l4proto) +skb_redirect_to_control_plane(struct __sk_buff *skb, __u32 link_h_len, + struct tuples *tuples, + struct ethhdr *ethh, struct tcphdr *tcph, + __u8 from_wan, __u16 l3proto, __u8 l4proto) { /* Redirect from L3 dev to L2 dev, e.g. wg0 -> veth */ if (!link_h_len) { diff --git a/control/kern/tproxy.c b/control/kern/tproxy.c index bbfdeb4ef..bdb2486ef 100644 --- a/control/kern/tproxy.c +++ b/control/kern/tproxy.c @@ -784,10 +784,10 @@ int tproxy_lan_egress(struct __sk_buff *skb) if (get_link_h_len(skb->ifindex, &link_h_len)) return TC_ACT_OK; - int ret = parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, - &tcph, &udph, &ihl, &l3proto, &l4proto); + int ret = skb_parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, + &tcph, &udph, &ihl, &l3proto, &l4proto); if (ret) { - bpf_printk("parse_transport: %d", ret); + bpf_printk("skb_parse_transport: %d", ret); return TC_ACT_OK; } if (l4proto == IPPROTO_ICMPV6 && icmp6h.icmp6_type == NDP_REDIRECT) { @@ -813,10 +813,10 @@ int tproxy_lan_ingress(struct __sk_buff *skb) if (get_link_h_len(skb->ifindex, &link_h_len)) return TC_ACT_OK; - int ret = parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, - &tcph, &udph, &ihl, &l3proto, &l4proto); + int ret = skb_parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, + &tcph, &udph, &ihl, &l3proto, &l4proto); if (ret) { - bpf_printk("parse_transport: %d", ret); + bpf_printk("skb_parse_transport: %d", ret); return TC_ACT_OK; } if (l4proto == IPPROTO_ICMPV6) @@ -974,8 +974,8 @@ int tproxy_lan_ingress(struct __sk_buff *skb) // Assign to control plane. control_plane: - return redirect_to_control_plane(skb, link_h_len, &tuples, ðh, &tcph, - 0, l3proto, l4proto); + return skb_redirect_to_control_plane(skb, link_h_len, &tuples, ðh, &tcph, + 0, l3proto, l4proto); direct: return TC_ACT_OK; @@ -1102,8 +1102,8 @@ int tproxy_wan_ingress(struct __sk_buff *skb) if (get_link_h_len(skb->ifindex, &link_h_len)) return TC_ACT_OK; - int ret = parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, - &tcph, &udph, &ihl, &l3proto, &l4proto); + int ret = skb_parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, + &tcph, &udph, &ihl, &l3proto, &l4proto); if (ret) return TC_ACT_OK; if (l4proto != IPPROTO_UDP) @@ -1147,8 +1147,8 @@ int tproxy_wan_egress(struct __sk_buff *skb) if (get_link_h_len(skb->ifindex, &link_h_len)) return TC_ACT_OK; bool tcp_state_syn; - int ret = parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, - &tcph, &udph, &ihl, &l3proto, &l4proto); + int ret = skb_parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, + &tcph, &udph, &ihl, &l3proto, &l4proto); if (ret) return TC_ACT_OK; if (l4proto == IPPROTO_ICMPV6) @@ -1382,8 +1382,8 @@ int tproxy_wan_egress(struct __sk_buff *skb) } } - return redirect_to_control_plane(skb, link_h_len, &tuples, ðh, &tcph, - 1, l3proto, l4proto); + return skb_redirect_to_control_plane(skb, link_h_len, &tuples, ðh, &tcph, + 1, l3proto, l4proto); } SEC("tc/dae0peer_ingress") @@ -1430,8 +1430,8 @@ int tproxy_dae0_ingress(struct __sk_buff *skb) __u8 l4proto; __u32 link_h_len = 14; - if (parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, - &tcph, &udph, &ihl, &l3proto, &l4proto)) + if (skb_parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, + &tcph, &udph, &ihl, &l3proto, &l4proto)) return TC_ACT_OK; struct tuples tuples; From d499e6de3d5e7458c9af25ede83f57178fa65a1c Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Fri, 12 Apr 2024 19:02:09 +0800 Subject: [PATCH 07/11] bpf: Copy lib/skb.h to lib/xdp.h Just copy without a single character change. --- control/kern/lib/xdp.h | 226 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 control/kern/lib/xdp.h diff --git a/control/kern/lib/xdp.h b/control/kern/lib/xdp.h new file mode 100644 index 000000000..78529735a --- /dev/null +++ b/control/kern/lib/xdp.h @@ -0,0 +1,226 @@ +static __always_inline int +skb_handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr, + struct icmp6hdr *icmp6h, struct tcphdr *tcph, + struct udphdr *udph, __u8 *ihl, __u8 *l4proto) +{ + __u8 hdr_length = 0; + __u8 nexthdr = 0; + *ihl = sizeof(struct ipv6hdr) / 4; + int ret; + // We only process TCP and UDP traffic. + + // Unroll can give less instructions but more memory consumption when loading. + // We disable it here to support more poor memory devices. + // #pragma unroll + for (int i = 0; i < IPV6_MAX_EXTENSIONS; + i++, offset += hdr_length, hdr = nexthdr, *ihl += hdr_length / 4) { + if (hdr_length % 4) { + bpf_printk( + "IPv6 extension length is not multiples of 4"); + return 1; + } + // See control/control_plane.go. + + switch (hdr) { + case IPPROTO_ICMPV6: + *l4proto = hdr; + hdr_length = sizeof(struct icmp6hdr); + // Assume ICMPV6 as a level 4 protocol. + ret = bpf_skb_load_bytes(skb, offset, icmp6h, + hdr_length); + if (ret) { + bpf_printk("not a valid IPv6 packet"); + return -EFAULT; + } + return 0; + + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + ret = bpf_skb_load_bytes(skb, offset + 1, &hdr_length, + sizeof(hdr_length)); + if (ret) { + bpf_printk("not a valid IPv6 packet"); + return -EFAULT; + } + +special_n1: + ret = bpf_skb_load_bytes(skb, offset, &nexthdr, + sizeof(nexthdr)); + if (ret) { + bpf_printk("not a valid IPv6 packet"); + return -EFAULT; + } + break; + case IPPROTO_FRAGMENT: + hdr_length = 4; + goto special_n1; + case IPPROTO_TCP: + case IPPROTO_UDP: + *l4proto = hdr; + if (hdr == IPPROTO_TCP) { + // Upper layer; + ret = bpf_skb_load_bytes(skb, offset, tcph, + sizeof(struct tcphdr)); + if (ret) { + bpf_printk("not a valid IPv6 packet"); + return -EFAULT; + } + } else if (hdr == IPPROTO_UDP) { + // Upper layer; + ret = bpf_skb_load_bytes(skb, offset, udph, + sizeof(struct udphdr)); + if (ret) { + bpf_printk("not a valid IPv6 packet"); + return -EFAULT; + } + } else { + // Unknown hdr. + bpf_printk("Unexpected hdr."); + return 1; + } + return 0; + default: + /// EXPECTED: Maybe ICMP, etc. + // bpf_printk("IPv6 but unrecognized extension protocol: %u", hdr); + return 1; + } + } + bpf_printk("exceeds IPV6_MAX_EXTENSIONS limit"); + return 1; +} + +static __always_inline int +skb_parse_transport(const struct __sk_buff *skb, __u32 link_h_len, + struct ethhdr *ethh, struct iphdr *iph, struct ipv6hdr *ipv6h, + struct icmp6hdr *icmp6h, struct tcphdr *tcph, + struct udphdr *udph, __u8 *ihl, __u16 *l3proto, __u8 *l4proto) +{ + __u32 offset = 0; + int ret; + + if (link_h_len == ETH_HLEN) { + ret = bpf_skb_load_bytes(skb, offset, ethh, + sizeof(struct ethhdr)); + if (ret) { + bpf_printk("not ethernet packet"); + return 1; + } + // Skip ethhdr for next hdr. + offset += sizeof(struct ethhdr); + } else { + __builtin_memset(ethh, 0, sizeof(struct ethhdr)); + ethh->h_proto = skb->protocol; + } + *l3proto = ethh->h_proto; + + *ihl = 0; + *l4proto = 0; + __builtin_memset(iph, 0, sizeof(struct iphdr)); + __builtin_memset(ipv6h, 0, sizeof(struct ipv6hdr)); + __builtin_memset(icmp6h, 0, sizeof(struct icmp6hdr)); + __builtin_memset(tcph, 0, sizeof(struct tcphdr)); + __builtin_memset(udph, 0, sizeof(struct udphdr)); + + // bpf_printk("parse_transport: h_proto: %u ? %u %u", ethh->h_proto, + // bpf_htons(ETH_P_IP), + // bpf_htons(ETH_P_IPV6)); + if (ethh->h_proto == bpf_htons(ETH_P_IP)) { + ret = bpf_skb_load_bytes(skb, offset, iph, + sizeof(struct iphdr)); + if (ret) + return -EFAULT; + // Skip ipv4hdr and options for next hdr. + offset += iph->ihl * 4; + + // We only process TCP and UDP traffic. + *l4proto = iph->protocol; + switch (iph->protocol) { + case IPPROTO_TCP: { + ret = bpf_skb_load_bytes(skb, offset, tcph, + sizeof(struct tcphdr)); + if (ret) { + // Not a complete tcphdr. + return -EFAULT; + } + } break; + case IPPROTO_UDP: { + ret = bpf_skb_load_bytes(skb, offset, udph, + sizeof(struct udphdr)); + if (ret) { + // Not a complete udphdr. + return -EFAULT; + } + } break; + default: + return 1; + } + *ihl = iph->ihl; + return 0; + } else if (ethh->h_proto == bpf_htons(ETH_P_IPV6)) { + ret = bpf_skb_load_bytes(skb, offset, ipv6h, + sizeof(struct ipv6hdr)); + if (ret) { + bpf_printk("not a valid IPv6 packet"); + return -EFAULT; + } + + offset += sizeof(struct ipv6hdr); + + return skb_handle_ipv6_extensions(skb, offset, ipv6h->nexthdr, + icmp6h, tcph, udph, ihl, l4proto); + } else { + /// EXPECTED: Maybe ICMP, MPLS, etc. + // bpf_printk("IP but not supported packet: protocol is %u", + // iph->protocol); + // bpf_printk("unknown link proto: %u", bpf_ntohl(skb->protocol)); + return 1; + } +} + +static __always_inline int +skb_redirect_to_control_plane(struct __sk_buff *skb, __u32 link_h_len, + struct tuples *tuples, + struct ethhdr *ethh, struct tcphdr *tcph, + __u8 from_wan, __u16 l3proto, __u8 l4proto) +{ + /* Redirect from L3 dev to L2 dev, e.g. wg0 -> veth */ + if (!link_h_len) { + bpf_skb_change_head(skb, sizeof(struct ethhdr), 0); + bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_proto), + &l3proto, sizeof(l3proto), 0); + } + + struct redirect_tuple redirect_tuple = {}; + + if (l3proto == bpf_htons(ETH_P_IP)) { + redirect_tuple.sip.u6_addr32[3] = tuples->five.sip.u6_addr32[3]; + redirect_tuple.dip.u6_addr32[3] = tuples->five.dip.u6_addr32[3]; + } else { + __builtin_memcpy(&redirect_tuple.sip, &tuples->five.sip, + IPV6_BYTE_LENGTH); + __builtin_memcpy(&redirect_tuple.dip, &tuples->five.dip, + IPV6_BYTE_LENGTH); + } + redirect_tuple.l4proto = l4proto; + struct redirect_entry redirect_entry = {}; + + redirect_entry.ifindex = skb->ifindex; + redirect_entry.from_wan = from_wan; + __builtin_memcpy(redirect_entry.smac, ethh->h_source, + sizeof(ethh->h_source)); + __builtin_memcpy(redirect_entry.dmac, ethh->h_dest, + sizeof(ethh->h_dest)); + bpf_map_update_elem(&redirect_track, &redirect_tuple, &redirect_entry, + BPF_ANY); + + struct redirect_meta *meta = (void *)(long)skb->data; + if ((void *)(meta + 1) > (void *)(long)skb->data_end) + return TC_ACT_SHOT; + + __builtin_memset(meta, 0, sizeof(*meta)); + meta->mark = TPROXY_MARK; + if ((l4proto == IPPROTO_TCP && tcph->syn) || l4proto == IPPROTO_UDP) + meta->l4proto = l4proto; + + return bpf_redirect(PARAM.dae0_ifindex, 0); +} From 3d7e6a28defda3c86fa4b61beade362d540e6c49 Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Fri, 12 Apr 2024 19:09:34 +0800 Subject: [PATCH 08/11] bpf: Finalize xdp.h 1. xdp won't be attached to L3 netdev, so no need to handle 0 hlen 2. use xdp helpers instead of skb helpers --- control/kern/lib/xdp.h | 62 +++++++++++++++++------------------------- control/kern/tproxy.c | 1 + 2 files changed, 26 insertions(+), 37 deletions(-) diff --git a/control/kern/lib/xdp.h b/control/kern/lib/xdp.h index 78529735a..85b35b40b 100644 --- a/control/kern/lib/xdp.h +++ b/control/kern/lib/xdp.h @@ -1,5 +1,5 @@ static __always_inline int -skb_handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr, +xdp_handle_ipv6_extensions(struct xdp_md *ctx, __u32 offset, __u32 hdr, struct icmp6hdr *icmp6h, struct tcphdr *tcph, struct udphdr *udph, __u8 *ihl, __u8 *l4proto) { @@ -26,7 +26,7 @@ skb_handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr, *l4proto = hdr; hdr_length = sizeof(struct icmp6hdr); // Assume ICMPV6 as a level 4 protocol. - ret = bpf_skb_load_bytes(skb, offset, icmp6h, + ret = bpf_xdp_load_bytes(ctx, offset, icmp6h, hdr_length); if (ret) { bpf_printk("not a valid IPv6 packet"); @@ -36,7 +36,7 @@ skb_handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr, case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: - ret = bpf_skb_load_bytes(skb, offset + 1, &hdr_length, + ret = bpf_xdp_load_bytes(ctx, offset + 1, &hdr_length, sizeof(hdr_length)); if (ret) { bpf_printk("not a valid IPv6 packet"); @@ -44,7 +44,7 @@ skb_handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr, } special_n1: - ret = bpf_skb_load_bytes(skb, offset, &nexthdr, + ret = bpf_xdp_load_bytes(ctx, offset, &nexthdr, sizeof(nexthdr)); if (ret) { bpf_printk("not a valid IPv6 packet"); @@ -59,7 +59,7 @@ skb_handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr, *l4proto = hdr; if (hdr == IPPROTO_TCP) { // Upper layer; - ret = bpf_skb_load_bytes(skb, offset, tcph, + ret = bpf_xdp_load_bytes(ctx, offset, tcph, sizeof(struct tcphdr)); if (ret) { bpf_printk("not a valid IPv6 packet"); @@ -67,7 +67,7 @@ skb_handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr, } } else if (hdr == IPPROTO_UDP) { // Upper layer; - ret = bpf_skb_load_bytes(skb, offset, udph, + ret = bpf_xdp_load_bytes(ctx, offset, udph, sizeof(struct udphdr)); if (ret) { bpf_printk("not a valid IPv6 packet"); @@ -90,7 +90,7 @@ skb_handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr, } static __always_inline int -skb_parse_transport(const struct __sk_buff *skb, __u32 link_h_len, +xdp_parse_transport(struct xdp_md *ctx, struct ethhdr *ethh, struct iphdr *iph, struct ipv6hdr *ipv6h, struct icmp6hdr *icmp6h, struct tcphdr *tcph, struct udphdr *udph, __u8 *ihl, __u16 *l3proto, __u8 *l4proto) @@ -98,19 +98,14 @@ skb_parse_transport(const struct __sk_buff *skb, __u32 link_h_len, __u32 offset = 0; int ret; - if (link_h_len == ETH_HLEN) { - ret = bpf_skb_load_bytes(skb, offset, ethh, - sizeof(struct ethhdr)); - if (ret) { - bpf_printk("not ethernet packet"); - return 1; - } - // Skip ethhdr for next hdr. - offset += sizeof(struct ethhdr); - } else { - __builtin_memset(ethh, 0, sizeof(struct ethhdr)); - ethh->h_proto = skb->protocol; + ret = bpf_xdp_load_bytes(ctx, offset, ethh, + sizeof(struct ethhdr)); + if (ret) { + bpf_printk("not ethernet packet"); + return 1; } + // Skip ethhdr for next hdr. + offset += sizeof(struct ethhdr); *l3proto = ethh->h_proto; *ihl = 0; @@ -125,7 +120,7 @@ skb_parse_transport(const struct __sk_buff *skb, __u32 link_h_len, // bpf_htons(ETH_P_IP), // bpf_htons(ETH_P_IPV6)); if (ethh->h_proto == bpf_htons(ETH_P_IP)) { - ret = bpf_skb_load_bytes(skb, offset, iph, + ret = bpf_xdp_load_bytes(ctx, offset, iph, sizeof(struct iphdr)); if (ret) return -EFAULT; @@ -136,7 +131,7 @@ skb_parse_transport(const struct __sk_buff *skb, __u32 link_h_len, *l4proto = iph->protocol; switch (iph->protocol) { case IPPROTO_TCP: { - ret = bpf_skb_load_bytes(skb, offset, tcph, + ret = bpf_xdp_load_bytes(ctx, offset, tcph, sizeof(struct tcphdr)); if (ret) { // Not a complete tcphdr. @@ -144,7 +139,7 @@ skb_parse_transport(const struct __sk_buff *skb, __u32 link_h_len, } } break; case IPPROTO_UDP: { - ret = bpf_skb_load_bytes(skb, offset, udph, + ret = bpf_xdp_load_bytes(ctx, offset, udph, sizeof(struct udphdr)); if (ret) { // Not a complete udphdr. @@ -157,7 +152,7 @@ skb_parse_transport(const struct __sk_buff *skb, __u32 link_h_len, *ihl = iph->ihl; return 0; } else if (ethh->h_proto == bpf_htons(ETH_P_IPV6)) { - ret = bpf_skb_load_bytes(skb, offset, ipv6h, + ret = bpf_xdp_load_bytes(ctx, offset, ipv6h, sizeof(struct ipv6hdr)); if (ret) { bpf_printk("not a valid IPv6 packet"); @@ -166,30 +161,23 @@ skb_parse_transport(const struct __sk_buff *skb, __u32 link_h_len, offset += sizeof(struct ipv6hdr); - return skb_handle_ipv6_extensions(skb, offset, ipv6h->nexthdr, + return xdp_handle_ipv6_extensions(ctx, offset, ipv6h->nexthdr, icmp6h, tcph, udph, ihl, l4proto); } else { /// EXPECTED: Maybe ICMP, MPLS, etc. // bpf_printk("IP but not supported packet: protocol is %u", // iph->protocol); - // bpf_printk("unknown link proto: %u", bpf_ntohl(skb->protocol)); + // bpf_printk("unknown link proto: %u", bpf_ntohl(ctx->protocol)); return 1; } } static __always_inline int -skb_redirect_to_control_plane(struct __sk_buff *skb, __u32 link_h_len, +xdp_redirect_to_control_plane(struct xdp_md *ctx, struct tuples *tuples, struct ethhdr *ethh, struct tcphdr *tcph, __u8 from_wan, __u16 l3proto, __u8 l4proto) { - /* Redirect from L3 dev to L2 dev, e.g. wg0 -> veth */ - if (!link_h_len) { - bpf_skb_change_head(skb, sizeof(struct ethhdr), 0); - bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_proto), - &l3proto, sizeof(l3proto), 0); - } - struct redirect_tuple redirect_tuple = {}; if (l3proto == bpf_htons(ETH_P_IP)) { @@ -204,7 +192,7 @@ skb_redirect_to_control_plane(struct __sk_buff *skb, __u32 link_h_len, redirect_tuple.l4proto = l4proto; struct redirect_entry redirect_entry = {}; - redirect_entry.ifindex = skb->ifindex; + redirect_entry.ifindex = ctx->ingress_ifindex; redirect_entry.from_wan = from_wan; __builtin_memcpy(redirect_entry.smac, ethh->h_source, sizeof(ethh->h_source)); @@ -213,9 +201,9 @@ skb_redirect_to_control_plane(struct __sk_buff *skb, __u32 link_h_len, bpf_map_update_elem(&redirect_track, &redirect_tuple, &redirect_entry, BPF_ANY); - struct redirect_meta *meta = (void *)(long)skb->data; - if ((void *)(meta + 1) > (void *)(long)skb->data_end) - return TC_ACT_SHOT; + struct redirect_meta *meta = (void *)(long)ctx->data; + if ((void *)(meta + 1) > (void *)(long)ctx->data_end) + return XDP_DROP; __builtin_memset(meta, 0, sizeof(*meta)); meta->mark = TPROXY_MARK; diff --git a/control/kern/tproxy.c b/control/kern/tproxy.c index bdb2486ef..5ae759d75 100644 --- a/control/kern/tproxy.c +++ b/control/kern/tproxy.c @@ -408,6 +408,7 @@ struct { // Functions: #include "lib/skb.h" +#include "lib/xdp.h" static __always_inline __u8 ipv4_get_dscp(const struct iphdr *iph) { From 939b6fae5fb2f3999a42333b6ee05591ffa4829d Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Thu, 11 Apr 2024 02:16:23 +0800 Subject: [PATCH 09/11] bpf: Copy tc/ingress to xdp/ingress No adjustment. --- control/control_plane_core.go | 2 +- control/kern/tproxy.c | 189 +++++++++++++++++++++++++++++++++- 2 files changed, 189 insertions(+), 2 deletions(-) diff --git a/control/control_plane_core.go b/control/control_plane_core.go index ab84170c4..b59fbff5d 100644 --- a/control/control_plane_core.go +++ b/control/control_plane_core.go @@ -318,7 +318,7 @@ func (c *controlPlaneCore) _bindLan(ifname string) error { // Priority should be behind of WAN's Priority: 2, }, - Fd: c.bpf.bpfPrograms.TproxyLanIngress.FD(), + Fd: c.bpf.bpfPrograms.TcTproxyLanIngress.FD(), Name: consts.AppName + "_lan_ingress", DirectAction: true, } diff --git a/control/kern/tproxy.c b/control/kern/tproxy.c index 5ae759d75..053447214 100644 --- a/control/kern/tproxy.c +++ b/control/kern/tproxy.c @@ -799,7 +799,194 @@ int tproxy_lan_egress(struct __sk_buff *skb) } SEC("tc/ingress") -int tproxy_lan_ingress(struct __sk_buff *skb) +int tc_tproxy_lan_ingress(struct __sk_buff *skb) +{ + struct ethhdr ethh; + struct iphdr iph; + struct ipv6hdr ipv6h; + struct icmp6hdr icmp6h; + struct tcphdr tcph; + struct udphdr udph; + __u8 ihl; + __u16 l3proto; + __u8 l4proto; + __u32 link_h_len; + + if (get_link_h_len(skb->ifindex, &link_h_len)) + return TC_ACT_OK; + int ret = skb_parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, + &tcph, &udph, &ihl, &l3proto, &l4proto); + if (ret) { + bpf_printk("skb_parse_transport: %d", ret); + return TC_ACT_OK; + } + if (l4proto == IPPROTO_ICMPV6) + return TC_ACT_OK; + + // Prepare five tuples. + struct tuples tuples; + + get_tuples(&tuples, &iph, &ipv6h, &tcph, &udph, l3proto, l4proto); + + /* + * ip rule add fwmark 0x8000000/0x8000000 table 2023 + * ip route add local default dev lo table 2023 + * ip -6 rule add fwmark 0x8000000/0x8000000 table 2023 + * ip -6 route add local default dev lo table 2023 + + * ip rule del fwmark 0x8000000/0x8000000 table 2023 + * ip route del local default dev lo table 2023 + * ip -6 rule del fwmark 0x8000000/0x8000000 table 2023 + * ip -6 route del local default dev lo table 2023 + */ + // Socket lookup and assign skb to existing socket connection. + struct bpf_sock_tuple tuple = { 0 }; + __u32 tuple_size; + struct bpf_sock *sk; + __u32 flag[8]; + void *l4hdr; + + if (skb->protocol == bpf_htons(ETH_P_IP)) { + tuple.ipv4.daddr = tuples.five.dip.u6_addr32[3]; + tuple.ipv4.saddr = tuples.five.sip.u6_addr32[3]; + tuple.ipv4.dport = tuples.five.dport; + tuple.ipv4.sport = tuples.five.sport; + tuple_size = sizeof(tuple.ipv4); + } else { + __builtin_memcpy(tuple.ipv6.daddr, &tuples.five.dip, + IPV6_BYTE_LENGTH); + __builtin_memcpy(tuple.ipv6.saddr, &tuples.five.sip, + IPV6_BYTE_LENGTH); + tuple.ipv6.dport = tuples.five.dport; + tuple.ipv6.sport = tuples.five.sport; + tuple_size = sizeof(tuple.ipv6); + } + + if (l4proto == IPPROTO_TCP) { + // TCP. + if (tcph.syn && !tcph.ack) + goto new_connection; + + sk = bpf_skc_lookup_tcp(skb, &tuple, tuple_size, + PARAM.dae_netns_id, 0); + if (sk) { + if (sk->state != BPF_TCP_LISTEN) { + bpf_sk_release(sk); + goto control_plane; + } + bpf_sk_release(sk); + } + } + +// Routing for new connection. +new_connection: + __builtin_memset(flag, 0, sizeof(flag)); + if (l4proto == IPPROTO_TCP) { + if (!(tcph.syn && !tcph.ack)) { + // Not a new TCP connection. + // Perhaps single-arm. + return TC_ACT_OK; + } + l4hdr = &tcph; + flag[0] = L4ProtoType_TCP; + } else { + l4hdr = &udph; + flag[0] = L4ProtoType_UDP; + } + if (skb->protocol == bpf_htons(ETH_P_IP)) + flag[1] = IpVersionType_4; + else + flag[1] = IpVersionType_6; + flag[6] = tuples.dscp; + __be32 mac[4] = { + 0, + 0, + bpf_htonl((ethh.h_source[0] << 8) + (ethh.h_source[1])), + bpf_htonl((ethh.h_source[2] << 24) + (ethh.h_source[3] << 16) + + (ethh.h_source[4] << 8) + (ethh.h_source[5])), + }; + __s64 s64_ret; + + s64_ret = route(flag, l4hdr, tuples.five.sip.u6_addr32, + tuples.five.dip.u6_addr32, mac); + if (s64_ret < 0) { + bpf_printk("shot routing: %d", s64_ret); + return TC_ACT_SHOT; + } + struct routing_result routing_result = { 0 }; + + routing_result.outbound = s64_ret; + routing_result.mark = s64_ret >> 8; + routing_result.must = (s64_ret >> 40) & 1; + routing_result.dscp = tuples.dscp; + __builtin_memcpy(routing_result.mac, ethh.h_source, + sizeof(routing_result.mac)); + /// NOTICE: No pid pname info for LAN packet. + // // Maybe this packet is also in the host (such as docker) ? + // // I tried and it is false. + //__u64 cookie = bpf_get_socket_cookie(skb); + //struct pid_pname *pid_pname = + // bpf_map_lookup_elem(&cookie_pid_map, &cookie); + //if (pid_pname) { + // __builtin_memcpy(routing_result.pname, pid_pname->pname, + // TASK_COMM_LEN); + // routing_result.pid = pid_pname->pid; + //} + + // Save routing result. + ret = bpf_map_update_elem(&routing_tuples_map, &tuples.five, + &routing_result, BPF_ANY); + if (ret) { + bpf_printk("shot save routing result: %d", ret); + return TC_ACT_SHOT; + } +#if defined(__DEBUG_ROUTING) || defined(__PRINT_ROUTING_RESULT) + if (l4proto == IPPROTO_TCP) { + bpf_printk("tcp(lan): outbound: %u, target: %pI6:%u", ret, + tuples.five.dip.u6_addr32, + bpf_ntohs(tuples.five.dport)); + } else { + bpf_printk("udp(lan): outbound: %u, target: %pI6:%u", + routing_result.outbound, tuples.five.dip.u6_addr32, + bpf_ntohs(tuples.five.dport)); + } +#endif + if (routing_result.outbound == OUTBOUND_DIRECT) { + skb->mark = routing_result.mark; + goto direct; + } else if (unlikely(routing_result.outbound == OUTBOUND_BLOCK)) { + goto block; + } + + // Check outbound connectivity in specific ipversion and l4proto. + struct outbound_connectivity_query q = { 0 }; + + q.outbound = routing_result.outbound; + q.ipversion = skb->protocol == bpf_htons(ETH_P_IP) ? 4 : 6; + q.l4proto = l4proto; + __u32 *alive; + + alive = bpf_map_lookup_elem(&outbound_connectivity_map, &q); + if (alive && *alive == 0 && + !(l4proto == IPPROTO_UDP && tuples.five.dport == bpf_htons(53))) { + // Outbound is not alive. Dns is an exception. + goto block; + } + + // Assign to control plane. +control_plane: + return skb_redirect_to_control_plane(skb, link_h_len, &tuples, ðh, &tcph, + 0, l3proto, l4proto); + +direct: + return TC_ACT_OK; + +block: + return TC_ACT_SHOT; +} + +SEC("xdp/ingress") +int xdp_tproxy_lan_ingress(struct __sk_buff *skb) { struct ethhdr ethh; struct iphdr iph; From dadec6601327b7909fa4b381c5008794c89bc5ed Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Thu, 11 Apr 2024 02:21:25 +0800 Subject: [PATCH 10/11] bpf: Finalize xdp/ingress --- control/kern/tproxy.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/control/kern/tproxy.c b/control/kern/tproxy.c index 053447214..142eee006 100644 --- a/control/kern/tproxy.c +++ b/control/kern/tproxy.c @@ -986,7 +986,7 @@ int tc_tproxy_lan_ingress(struct __sk_buff *skb) } SEC("xdp/ingress") -int xdp_tproxy_lan_ingress(struct __sk_buff *skb) +int xdp_tproxy_lan_ingress(struct xdp_md *ctx) { struct ethhdr ethh; struct iphdr iph; @@ -997,18 +997,14 @@ int xdp_tproxy_lan_ingress(struct __sk_buff *skb) __u8 ihl; __u16 l3proto; __u8 l4proto; - __u32 link_h_len; - if (get_link_h_len(skb->ifindex, &link_h_len)) - return TC_ACT_OK; - int ret = skb_parse_transport(skb, link_h_len, ðh, &iph, &ipv6h, &icmp6h, + int ret = xdp_parse_transport(ctx, ðh, &iph, &ipv6h, &icmp6h, &tcph, &udph, &ihl, &l3proto, &l4proto); - if (ret) { - bpf_printk("skb_parse_transport: %d", ret); - return TC_ACT_OK; - } + if (ret) + return XDP_PASS; + if (l4proto == IPPROTO_ICMPV6) - return TC_ACT_OK; + return XDP_PASS; // Prepare five tuples. struct tuples tuples; @@ -1033,7 +1029,7 @@ int xdp_tproxy_lan_ingress(struct __sk_buff *skb) __u32 flag[8]; void *l4hdr; - if (skb->protocol == bpf_htons(ETH_P_IP)) { + if (l3proto == bpf_htons(ETH_P_IP)) { tuple.ipv4.daddr = tuples.five.dip.u6_addr32[3]; tuple.ipv4.saddr = tuples.five.sip.u6_addr32[3]; tuple.ipv4.dport = tuples.five.dport; @@ -1054,7 +1050,7 @@ int xdp_tproxy_lan_ingress(struct __sk_buff *skb) if (tcph.syn && !tcph.ack) goto new_connection; - sk = bpf_skc_lookup_tcp(skb, &tuple, tuple_size, + sk = bpf_skc_lookup_tcp(ctx, &tuple, tuple_size, PARAM.dae_netns_id, 0); if (sk) { if (sk->state != BPF_TCP_LISTEN) { @@ -1072,7 +1068,7 @@ int xdp_tproxy_lan_ingress(struct __sk_buff *skb) if (!(tcph.syn && !tcph.ack)) { // Not a new TCP connection. // Perhaps single-arm. - return TC_ACT_OK; + return XDP_PASS; } l4hdr = &tcph; flag[0] = L4ProtoType_TCP; @@ -1080,7 +1076,7 @@ int xdp_tproxy_lan_ingress(struct __sk_buff *skb) l4hdr = &udph; flag[0] = L4ProtoType_UDP; } - if (skb->protocol == bpf_htons(ETH_P_IP)) + if (l3proto == bpf_htons(ETH_P_IP)) flag[1] = IpVersionType_4; else flag[1] = IpVersionType_6; @@ -1098,7 +1094,7 @@ int xdp_tproxy_lan_ingress(struct __sk_buff *skb) tuples.five.dip.u6_addr32, mac); if (s64_ret < 0) { bpf_printk("shot routing: %d", s64_ret); - return TC_ACT_SHOT; + return XDP_DROP; } struct routing_result routing_result = { 0 }; @@ -1125,7 +1121,7 @@ int xdp_tproxy_lan_ingress(struct __sk_buff *skb) &routing_result, BPF_ANY); if (ret) { bpf_printk("shot save routing result: %d", ret); - return TC_ACT_SHOT; + return XDP_DROP; } #if defined(__DEBUG_ROUTING) || defined(__PRINT_ROUTING_RESULT) if (l4proto == IPPROTO_TCP) { @@ -1139,7 +1135,7 @@ int xdp_tproxy_lan_ingress(struct __sk_buff *skb) } #endif if (routing_result.outbound == OUTBOUND_DIRECT) { - skb->mark = routing_result.mark; + // xdp doesn't support mark settings. goto direct; } else if (unlikely(routing_result.outbound == OUTBOUND_BLOCK)) { goto block; @@ -1149,7 +1145,7 @@ int xdp_tproxy_lan_ingress(struct __sk_buff *skb) struct outbound_connectivity_query q = { 0 }; q.outbound = routing_result.outbound; - q.ipversion = skb->protocol == bpf_htons(ETH_P_IP) ? 4 : 6; + q.ipversion = l3proto == bpf_htons(ETH_P_IP) ? 4 : 6; q.l4proto = l4proto; __u32 *alive; @@ -1162,14 +1158,14 @@ int xdp_tproxy_lan_ingress(struct __sk_buff *skb) // Assign to control plane. control_plane: - return skb_redirect_to_control_plane(skb, link_h_len, &tuples, ðh, &tcph, - 0, l3proto, l4proto); + return xdp_redirect_to_control_plane(ctx, &tuples, ðh, + &tcph, 0, l3proto, l4proto); direct: - return TC_ACT_OK; + return XDP_PASS; block: - return TC_ACT_SHOT; + return XDP_DROP; } // Cookie will change after the first packet, so we just use it for From 71a6d3462640a63bf079909705fdce774ba875be Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Fri, 12 Apr 2024 19:19:33 +0800 Subject: [PATCH 11/11] control: Attach XDP to lan --- control/control_plane_core.go | 77 ++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/control/control_plane_core.go b/control/control_plane_core.go index b59fbff5d..bfd2a4313 100644 --- a/control/control_plane_core.go +++ b/control/control_plane_core.go @@ -284,7 +284,7 @@ func (c *controlPlaneCore) _bindLan(ifname string) error { } c.log.Infof("Bind to LAN: %v", ifname) - link, err := netlink.LinkByName(ifname) + iface, err := netlink.LinkByName(ifname) if err != nil { return err } @@ -297,52 +297,65 @@ func (c *controlPlaneCore) _bindLan(ifname string) error { _ = c.addQdisc(ifname) _ = c.mapLinkType(ifname) /// Insert an elem into IfindexParamsMap. - ifParams, err := getIfParamsFromLink(link) + ifParams, err := getIfParamsFromLink(iface) if err != nil { return err } if err = ifParams.CheckVersionRequirement(c.kernelVersion); err != nil { return err } - if err := c.bpf.IfindexParamsMap.Update(uint32(link.Attrs().Index), ifParams, ebpf.UpdateAny); err != nil { + if err := c.bpf.IfindexParamsMap.Update(uint32(iface.Attrs().Index), ifParams, ebpf.UpdateAny); err != nil { return fmt.Errorf("update IfindexIpsMap: %w", err) } - // Insert filters. - filterIngress := &netlink.BpfFilter{ - FilterAttrs: netlink.FilterAttrs{ - LinkIndex: link.Attrs().Index, - Parent: netlink.HANDLE_MIN_INGRESS, - Handle: netlink.MakeHandle(0x2023, 0b100+uint16(c.flip)), - Protocol: unix.ETH_P_ALL, - // Priority should be behind of WAN's - Priority: 2, - }, - Fd: c.bpf.bpfPrograms.TcTproxyLanIngress.FD(), - Name: consts.AppName + "_lan_ingress", - DirectAction: true, - } - // Remove and add. - _ = netlink.FilterDel(filterIngress) - if !c.isReload { - // Clean up thoroughly. - filterIngressFlipped := deepcopy.Copy(filterIngress).(*netlink.BpfFilter) - filterIngressFlipped.FilterAttrs.Handle ^= 1 - _ = netlink.FilterDel(filterIngressFlipped) - } - if err := netlink.FilterAdd(filterIngress); err != nil { - return fmt.Errorf("cannot attach ebpf object to filter ingress: %w", err) + // Don't specify XDP mode, let kernel decide if driver supports XDP or fallback to XDP generic. + l, err := link.AttachXDP(link.XDPOptions{ + Program: c.bpf.bpfPrograms.XdpTproxyLanIngress, + Interface: iface.Attrs().Index, + Flags: link.XDPGenericMode, + }) + if err != nil { + return fmt.Errorf("AttachXDP: %w", err) } c.deferFuncs = append(c.deferFuncs, func() error { - if err := netlink.FilterDel(filterIngress); err != nil { - return fmt.Errorf("FilterDel(%v:%v): %w", ifname, filterIngress.Name, err) - } - return nil + return l.Close() }) + // Insert filters. + //filterIngress := &netlink.BpfFilter{ + // FilterAttrs: netlink.FilterAttrs{ + // LinkIndex: iface.Attrs().Index, + // Parent: netlink.HANDLE_MIN_INGRESS, + // Handle: netlink.MakeHandle(0x2023, 0b100+uint16(c.flip)), + // Protocol: unix.ETH_P_ALL, + // // Priority should be behind of WAN's + // Priority: 2, + // }, + // Fd: c.bpf.bpfPrograms.TcTproxyLanIngress.FD(), + // Name: consts.AppName + "_lan_ingress", + // DirectAction: true, + //} + //// Remove and add. + //_ = netlink.FilterDel(filterIngress) + //if !c.isReload { + // // Clean up thoroughly. + // filterIngressFlipped := deepcopy.Copy(filterIngress).(*netlink.BpfFilter) + // filterIngressFlipped.FilterAttrs.Handle ^= 1 + // _ = netlink.FilterDel(filterIngressFlipped) + //} + //if err := netlink.FilterAdd(filterIngress); err != nil { + // return fmt.Errorf("cannot attach ebpf object to filter ingress: %w", err) + //} + //c.deferFuncs = append(c.deferFuncs, func() error { + // if err := netlink.FilterDel(filterIngress); err != nil { + // return fmt.Errorf("FilterDel(%v:%v): %w", ifname, filterIngress.Name, err) + // } + // return nil + //}) + filterEgress := &netlink.BpfFilter{ FilterAttrs: netlink.FilterAttrs{ - LinkIndex: link.Attrs().Index, + LinkIndex: iface.Attrs().Index, Parent: netlink.HANDLE_MIN_EGRESS, Handle: netlink.MakeHandle(0x2023, 0b010+uint16(c.flip)), Protocol: unix.ETH_P_ALL,