jschwinger233 · jschwinger233 · Apr 10, 2024 · Apr 10, 2024 · Apr 10, 2024 · Apr 12, 2024
diff --git a/control/control_plane_core.go b/control/control_plane_core.go
@@ -284,7 +284,7 @@ func (c *controlPlaneCore) _bindLan(ifname string) error {
 	}
 	c.log.Infof("Bind to LAN: %v", ifname)
 
-	link, err := netlink.LinkByName(ifname)
+	iface, err := netlink.LinkByName(ifname)
 	if err != nil {
 		return err
 	}
@@ -297,52 +297,65 @@ func (c *controlPlaneCore) _bindLan(ifname string) error {
 	_ = c.addQdisc(ifname)
 	_ = c.mapLinkType(ifname)
 	/// Insert an elem into IfindexParamsMap.
-	ifParams, err := getIfParamsFromLink(link)
+	ifParams, err := getIfParamsFromLink(iface)
 	if err != nil {
 		return err
 	}
 	if err = ifParams.CheckVersionRequirement(c.kernelVersion); err != nil {
 		return err
 	}
-	if err := c.bpf.IfindexParamsMap.Update(uint32(link.Attrs().Index), ifParams, ebpf.UpdateAny); err != nil {
+	if err := c.bpf.IfindexParamsMap.Update(uint32(iface.Attrs().Index), ifParams, ebpf.UpdateAny); err != nil {
 		return fmt.Errorf("update IfindexIpsMap: %w", err)
 	}
 
-	// Insert filters.
-	filterIngress := &netlink.BpfFilter{
-		FilterAttrs: netlink.FilterAttrs{
-			LinkIndex: link.Attrs().Index,
-			Parent:    netlink.HANDLE_MIN_INGRESS,
-			Handle:    netlink.MakeHandle(0x2023, 0b100+uint16(c.flip)),
-			Protocol:  unix.ETH_P_ALL,
-			// Priority should be behind of WAN's
-			Priority: 2,
-		},
-		Fd:           c.bpf.bpfPrograms.TproxyLanIngress.FD(),
-		Name:         consts.AppName + "_lan_ingress",
-		DirectAction: true,
-	}
-	// Remove and add.
-	_ = netlink.FilterDel(filterIngress)
-	if !c.isReload {
-		// Clean up thoroughly.
-		filterIngressFlipped := deepcopy.Copy(filterIngress).(*netlink.BpfFilter)
-		filterIngressFlipped.FilterAttrs.Handle ^= 1
-		_ = netlink.FilterDel(filterIngressFlipped)
-	}
-	if err := netlink.FilterAdd(filterIngress); err != nil {
-		return fmt.Errorf("cannot attach ebpf object to filter ingress: %w", err)
+	// Don't specify XDP mode, let kernel decide if driver supports XDP or fallback to XDP generic.
+	l, err := link.AttachXDP(link.XDPOptions{
+		Program:   c.bpf.bpfPrograms.XdpTproxyLanIngress,
+		Interface: iface.Attrs().Index,
+		Flags:     link.XDPGenericMode,
+	})
+	if err != nil {
+		return fmt.Errorf("AttachXDP: %w", err)
 	}
 	c.deferFuncs = append(c.deferFuncs, func() error {
-		if err := netlink.FilterDel(filterIngress); err != nil {
-			return fmt.Errorf("FilterDel(%v:%v): %w", ifname, filterIngress.Name, err)
-		}
-		return nil
+		return l.Close()
 	})
 
+	// Insert filters.
+	//filterIngress := &netlink.BpfFilter{
+	//	FilterAttrs: netlink.FilterAttrs{
+	//		LinkIndex: iface.Attrs().Index,
+	//		Parent:    netlink.HANDLE_MIN_INGRESS,
+	//		Handle:    netlink.MakeHandle(0x2023, 0b100+uint16(c.flip)),
+	//		Protocol:  unix.ETH_P_ALL,
+	//		// Priority should be behind of WAN's
+	//		Priority: 2,
+	//	},
+	//	Fd:           c.bpf.bpfPrograms.TcTproxyLanIngress.FD(),
+	//	Name:         consts.AppName + "_lan_ingress",
+	//	DirectAction: true,
+	//}
+	//// Remove and add.
+	//_ = netlink.FilterDel(filterIngress)
+	//if !c.isReload {
+	//	// Clean up thoroughly.
+	//	filterIngressFlipped := deepcopy.Copy(filterIngress).(*netlink.BpfFilter)
+	//	filterIngressFlipped.FilterAttrs.Handle ^= 1
+	//	_ = netlink.FilterDel(filterIngressFlipped)
+	//}
+	//if err := netlink.FilterAdd(filterIngress); err != nil {
+	//	return fmt.Errorf("cannot attach ebpf object to filter ingress: %w", err)
+	//}
+	//c.deferFuncs = append(c.deferFuncs, func() error {
+	//	if err := netlink.FilterDel(filterIngress); err != nil {
+	//		return fmt.Errorf("FilterDel(%v:%v): %w", ifname, filterIngress.Name, err)
+	//	}
+	//	return nil
+	//})
+
 	filterEgress := &netlink.BpfFilter{
 		FilterAttrs: netlink.FilterAttrs{
-			LinkIndex: link.Attrs().Index,
+			LinkIndex: iface.Attrs().Index,
 			Parent:    netlink.HANDLE_MIN_EGRESS,
 			Handle:    netlink.MakeHandle(0x2023, 0b010+uint16(c.flip)),
 			Protocol:  unix.ETH_P_ALL,

diff --git a/control/kern/lib/skb.h b/control/kern/lib/skb.h
@@ -0,0 +1,226 @@
+static __always_inline int
+skb_handle_ipv6_extensions(const struct __sk_buff *skb, __u32 offset, __u32 hdr,
+			   struct icmp6hdr *icmp6h, struct tcphdr *tcph,
+			   struct udphdr *udph, __u8 *ihl, __u8 *l4proto)
+{
+	__u8 hdr_length = 0;
+	__u8 nexthdr = 0;
+	*ihl = sizeof(struct ipv6hdr) / 4;
+	int ret;
+	// We only process TCP and UDP traffic.
+
+	// Unroll can give less instructions but more memory consumption when loading.
+	// We disable it here to support more poor memory devices.
+	// #pragma unroll
+	for (int i = 0; i < IPV6_MAX_EXTENSIONS;
+	     i++, offset += hdr_length, hdr = nexthdr, *ihl += hdr_length / 4) {
+		if (hdr_length % 4) {
+			bpf_printk(
+				"IPv6 extension length is not multiples of 4");
+			return 1;
+		}
+		// See control/control_plane.go.
+
+		switch (hdr) {
+		case IPPROTO_ICMPV6:
+			*l4proto = hdr;
+			hdr_length = sizeof(struct icmp6hdr);
+			// Assume ICMPV6 as a level 4 protocol.
+			ret = bpf_skb_load_bytes(skb, offset, icmp6h,
+						 hdr_length);
+			if (ret) {
+				bpf_printk("not a valid IPv6 packet");
+				return -EFAULT;
+			}
+			return 0;
+
+		case IPPROTO_HOPOPTS:
+		case IPPROTO_ROUTING:
+			ret = bpf_skb_load_bytes(skb, offset + 1, &hdr_length,
+						 sizeof(hdr_length));
+			if (ret) {
+				bpf_printk("not a valid IPv6 packet");
+				return -EFAULT;
+			}
+
+special_n1:
+			ret = bpf_skb_load_bytes(skb, offset, &nexthdr,
+						 sizeof(nexthdr));
+			if (ret) {
+				bpf_printk("not a valid IPv6 packet");
+				return -EFAULT;
+			}
+			break;
+		case IPPROTO_FRAGMENT:
+			hdr_length = 4;
+			goto special_n1;
+		case IPPROTO_TCP:
+		case IPPROTO_UDP:
+			*l4proto = hdr;
+			if (hdr == IPPROTO_TCP) {
+				// Upper layer;
+				ret = bpf_skb_load_bytes(skb, offset, tcph,
+							 sizeof(struct tcphdr));
+				if (ret) {
+					bpf_printk("not a valid IPv6 packet");
+					return -EFAULT;
+				}
+			} else if (hdr == IPPROTO_UDP) {
+				// Upper layer;
+				ret = bpf_skb_load_bytes(skb, offset, udph,
+							 sizeof(struct udphdr));
+				if (ret) {
+					bpf_printk("not a valid IPv6 packet");
+					return -EFAULT;
+				}
+			} else {
+				// Unknown hdr.
+				bpf_printk("Unexpected hdr.");
+				return 1;
+			}
+			return 0;
+		default:
+			/// EXPECTED: Maybe ICMP, etc.
+			// bpf_printk("IPv6 but unrecognized extension protocol: %u", hdr);
+			return 1;
+		}
+	}
+	bpf_printk("exceeds IPV6_MAX_EXTENSIONS limit");
+	return 1;
+}
+
+static __always_inline int
+skb_parse_transport(const struct __sk_buff *skb, __u32 link_h_len,
+		    struct ethhdr *ethh, struct iphdr *iph, struct ipv6hdr *ipv6h,
+		    struct icmp6hdr *icmp6h, struct tcphdr *tcph,
+		    struct udphdr *udph, __u8 *ihl, __u16 *l3proto, __u8 *l4proto)
+{
+	__u32 offset = 0;
+	int ret;
+
+	if (link_h_len == ETH_HLEN) {
+		ret = bpf_skb_load_bytes(skb, offset, ethh,
+					 sizeof(struct ethhdr));
+		if (ret) {
+			bpf_printk("not ethernet packet");
+			return 1;
+		}
+		// Skip ethhdr for next hdr.
+		offset += sizeof(struct ethhdr);
+	} else {
+		__builtin_memset(ethh, 0, sizeof(struct ethhdr));
+		ethh->h_proto = skb->protocol;
+	}
+	*l3proto = ethh->h_proto;
+
+	*ihl = 0;
+	*l4proto = 0;
+	__builtin_memset(iph, 0, sizeof(struct iphdr));
+	__builtin_memset(ipv6h, 0, sizeof(struct ipv6hdr));
+	__builtin_memset(icmp6h, 0, sizeof(struct icmp6hdr));
+	__builtin_memset(tcph, 0, sizeof(struct tcphdr));
+	__builtin_memset(udph, 0, sizeof(struct udphdr));
+
+	// bpf_printk("parse_transport: h_proto: %u ? %u %u", ethh->h_proto,
+	//						bpf_htons(ETH_P_IP),
+	// bpf_htons(ETH_P_IPV6));
+	if (ethh->h_proto == bpf_htons(ETH_P_IP)) {
+		ret = bpf_skb_load_bytes(skb, offset, iph,
+					 sizeof(struct iphdr));
+		if (ret)
+			return -EFAULT;
+		// Skip ipv4hdr and options for next hdr.
+		offset += iph->ihl * 4;
+
+		// We only process TCP and UDP traffic.
+		*l4proto = iph->protocol;
+		switch (iph->protocol) {
+		case IPPROTO_TCP: {
+			ret = bpf_skb_load_bytes(skb, offset, tcph,
+						 sizeof(struct tcphdr));
+			if (ret) {
+				// Not a complete tcphdr.
+				return -EFAULT;
+			}
+		} break;
+		case IPPROTO_UDP: {
+			ret = bpf_skb_load_bytes(skb, offset, udph,
+						 sizeof(struct udphdr));
+			if (ret) {
+				// Not a complete udphdr.
+				return -EFAULT;
+			}
+		} break;
+		default:
+			return 1;
+		}
+		*ihl = iph->ihl;
+		return 0;
+	} else if (ethh->h_proto == bpf_htons(ETH_P_IPV6)) {
+		ret = bpf_skb_load_bytes(skb, offset, ipv6h,
+					 sizeof(struct ipv6hdr));
+		if (ret) {
+			bpf_printk("not a valid IPv6 packet");
+			return -EFAULT;
+		}
+
+		offset += sizeof(struct ipv6hdr);
+
+		return skb_handle_ipv6_extensions(skb, offset, ipv6h->nexthdr,
+						  icmp6h, tcph, udph, ihl, l4proto);
+	} else {
+		/// EXPECTED: Maybe ICMP, MPLS, etc.
+		// bpf_printk("IP but not supported packet: protocol is %u",
+		// iph->protocol);
+		// bpf_printk("unknown link proto: %u", bpf_ntohl(skb->protocol));
+		return 1;
+	}
+}
+
+static __always_inline int
+skb_redirect_to_control_plane(struct __sk_buff *skb, __u32 link_h_len,
+			      struct tuples *tuples,
+			      struct ethhdr *ethh, struct tcphdr *tcph,
+			      __u8 from_wan, __u16 l3proto, __u8 l4proto)
+{
+	/* Redirect from L3 dev to L2 dev, e.g. wg0 -> veth */
+	if (!link_h_len) {
+		bpf_skb_change_head(skb, sizeof(struct ethhdr), 0);
+		bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_proto),
+				    &l3proto, sizeof(l3proto), 0);
+	}
+
+	struct redirect_tuple redirect_tuple = {};
+
+	if (l3proto == bpf_htons(ETH_P_IP)) {
+		redirect_tuple.sip.u6_addr32[3] = tuples->five.sip.u6_addr32[3];
+		redirect_tuple.dip.u6_addr32[3] = tuples->five.dip.u6_addr32[3];
+	} else {
+		__builtin_memcpy(&redirect_tuple.sip, &tuples->five.sip,
+				 IPV6_BYTE_LENGTH);
+		__builtin_memcpy(&redirect_tuple.dip, &tuples->five.dip,
+				 IPV6_BYTE_LENGTH);
+	}
+	redirect_tuple.l4proto = l4proto;
+	struct redirect_entry redirect_entry = {};
+
+	redirect_entry.ifindex = skb->ifindex;
+	redirect_entry.from_wan = from_wan;
+	__builtin_memcpy(redirect_entry.smac, ethh->h_source,
+			 sizeof(ethh->h_source));
+	__builtin_memcpy(redirect_entry.dmac, ethh->h_dest,
+			 sizeof(ethh->h_dest));
+	bpf_map_update_elem(&redirect_track, &redirect_tuple, &redirect_entry,
+			    BPF_ANY);
+
+	struct redirect_meta *meta = (void *)(long)skb->data;
+	if ((void *)(meta + 1) > (void *)(long)skb->data_end)
+		return TC_ACT_SHOT;
+
+	__builtin_memset(meta, 0, sizeof(*meta));
+	meta->mark = TPROXY_MARK;
+	if ((l4proto == IPPROTO_TCP && tcph->syn) || l4proto == IPPROTO_UDP)
+		meta->l4proto = l4proto;
+
+	return bpf_redirect(PARAM.dae0_ifindex, 0);
+}