diff --git a/.github/workflows/kernel-test.yml b/.github/workflows/kernel-test.yml index 805fec2e6..1cd843443 100644 --- a/.github/workflows/kernel-test.yml +++ b/.github/workflows/kernel-test.yml @@ -144,6 +144,7 @@ jobs: lan_interface: auto wan_interface: auto allow_insecure: false + auto_config_kernel_parameter: true } node { @@ -267,6 +268,7 @@ jobs: lan_interface: dae-veth-peer wan_interface: auto allow_insecure: false + auto_config_kernel_parameter: true } node { diff --git a/control/control.go b/control/control.go index d4a984192..5b65e47d1 100644 --- a/control/control.go +++ b/control/control.go @@ -5,4 +5,4 @@ package control -//go:generate go run -mod=mod github.com/cilium/ebpf/cmd/bpf2go -cc "$BPF_CLANG" "$BPF_STRIP_FLAG" -cflags "$BPF_CFLAGS" -target "$BPF_TARGET" bpf kern/tproxy.c -- -I./headers +//go:generate go run -mod=mod github.com/cilium/ebpf/cmd/bpf2go -cc "$BPF_CLANG" "$BPF_STRIP_FLAG" -cflags "$BPF_CFLAGS" -target "$BPF_TARGET" -type dst_routing_result bpf kern/tproxy.c -- -I./headers diff --git a/control/control_plane.go b/control/control_plane.go index bcd89a8e7..dddac44e0 100644 --- a/control/control_plane.go +++ b/control/control_plane.go @@ -144,6 +144,7 @@ func NewControlPlane( //var bpf bpfObjects var ProgramOptions = ebpf.ProgramOptions{ KernelTypes: nil, + LogSize: ebpf.DefaultVerifierLogSize * 10, } if log.Level == logrus.PanicLevel { ProgramOptions.LogLevel = ebpf.LogLevelBranch | ebpf.LogLevelStats @@ -215,7 +216,7 @@ func NewControlPlane( return nil, err } for _, ifname := range global.WanInterface { - if err = core.bindWan(ifname); err != nil { + if err = core.bindWan(ifname, global.AutoConfigKernelParameter); err != nil { return nil, fmt.Errorf("bindWan: %v: %w", ifname, err) } } @@ -736,38 +737,11 @@ func (c *ControlPlane) Serve(readyChan chan<- bool, listener *Listener) (err err pktDst := RetrieveOriginalDest(oob) routingResult, err := c.core.RetrieveRoutingResult(src, pktDst, unix.IPPROTO_UDP) if err != nil { - // WAN. Old method. - lastErr := err - addrHdr, dataOffset, err := ParseAddrHdr(data) - if err != nil { - if c.tproxyPortProtect { - c.log.Warnf("No AddrPort presented: %v, %v", lastErr, err) - return - } else { - routingResult = &bpfRoutingResult{ - Mark: 0, - Must: 0, - Mac: [6]uint8{}, - Outbound: uint8(consts.OutboundControlPlaneRouting), - Pname: [16]uint8{}, - Pid: 0, - Dscp: 0, - } - realDst = pktDst - goto destRetrieved - } - } - data = data[dataOffset:] - routingResult = &addrHdr.RoutingResult - __ip := common.Ipv6Uint32ArrayToByteSlice(addrHdr.Ip) - _ip, _ := netip.AddrFromSlice(__ip) - // Comment it because them SHOULD equal. - //src = netip.AddrPortFrom(_ip, src.Port()) - realDst = netip.AddrPortFrom(_ip, addrHdr.Port) + c.log.Warnf("No AddrPort presented: %v", err) + return } else { realDst = pktDst } - destRetrieved: if e := c.handlePkt(udpConn, data, common.ConvergeAddrPort(src), common.ConvergeAddrPort(pktDst), common.ConvergeAddrPort(realDst), routingResult, false); e != nil { c.log.Warnln("handlePkt:", e) } diff --git a/control/control_plane_core.go b/control/control_plane_core.go index 206f0d063..b70b2acd0 100644 --- a/control/control_plane_core.go +++ b/control/control_plane_core.go @@ -547,7 +547,10 @@ func (c *controlPlaneCore) setupSkPidMonitor() error { return nil } -func (c *controlPlaneCore) bindWan(ifname string) error { +func (c *controlPlaneCore) bindWan(ifname string, autoConfigKernelParameter bool) error { + if autoConfigKernelParameter { + SetAcceptLocal(ifname, "1") + } return c._bindWan(ifname) } diff --git a/control/kern/tproxy.c b/control/kern/tproxy.c index b38a16c3c..b64bc5cac 100644 --- a/control/kern/tproxy.c +++ b/control/kern/tproxy.c @@ -140,6 +140,9 @@ struct dst_routing_result { struct routing_result routing_result; }; +// force emitting struct into the ELF. +const struct dst_routing_result *_ __attribute__((unused)); + struct tuples_key { union ip6 sip; union ip6 dip; @@ -160,20 +163,6 @@ struct dae_param { static volatile const struct dae_param PARAM = {}; -struct { - __uint(type, BPF_MAP_TYPE_LRU_HASH); - __type(key, - struct ip_port); // As TCP client side [SYN, !ACK], - // (source ip, source port, tcp) is - // enough for identifier. And UDP client - // side does not care it (full-cone). - __type(value, struct dst_routing_result); // Original target. - __uint(max_entries, MAX_DST_MAPPING_NUM); - /// NOTICE: It MUST be pinned, or connection may break. - __uint(pinning, LIBBPF_PIN_BY_NAME); -} tcp_dst_map - SEC(".maps"); // This map is only for old method (redirect mode in WAN). - struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __type(key, @@ -814,76 +803,6 @@ static __always_inline int adjust_ipv4_len(struct __sk_buff *skb, return 0; } -static __always_inline int encap_after_udp_hdr(struct __sk_buff *skb, - __u32 link_h_len, __u8 ihl, - __be16 iphdr_tot_len, - void *newhdr, __u32 newhdrlen, - bool disable_l4_checksum) { - if (unlikely(newhdrlen % 4 != 0)) { - bpf_printk("encap_after_udp_hdr: unexpected newhdrlen value %u :must " - "be a multiple of 4", - newhdrlen); - return -EINVAL; - } - - int ret = 0; - long ip_off = link_h_len; - // Calculate offsets using add instead of subtract to avoid verifier problems. - long ipp_len = ihl * 4; - long udp_payload_off = ip_off + ipp_len + sizeof(struct udphdr); - - // Backup for further use. - struct udphdr reserved_udphdr; - if ((ret = bpf_skb_load_bytes(skb, ip_off + ipp_len, &reserved_udphdr, - sizeof(reserved_udphdr)))) { - bpf_printk("bpf_skb_load_bytes: %d", ret); - return ret; - } - // Add room for new udp payload header. - if ((ret = bpf_skb_adjust_room(skb, newhdrlen, BPF_ADJ_ROOM_NET, - BPF_F_ADJ_ROOM_NO_CSUM_RESET))) { - bpf_printk("UDP ADJUST ROOM(encap): %d", ret); - return ret; - } - // Move the new room to the front of the UDP payload. - if ((ret = bpf_skb_store_bytes(skb, ip_off + ipp_len, &reserved_udphdr, - sizeof(reserved_udphdr), 0))) { - bpf_printk("bpf_skb_store_bytes reserved_udphdr: %d", ret); - return ret; - } - - // Rewrite ip len. - if (skb->protocol == bpf_htons(ETH_P_IP)) { - if ((ret = adjust_ipv4_len(skb, link_h_len, iphdr_tot_len, newhdrlen))) { - bpf_printk("adjust_ip_len: %d", ret); - return ret; - } - } - - // Rewrite udp len. - if ((ret = adjust_udp_len(skb, link_h_len, reserved_udphdr.len, ihl, - newhdrlen, disable_l4_checksum))) { - bpf_printk("adjust_udp_len: %d", ret); - return ret; - } - - // Rewrite udp payload. - if (!disable_l4_checksum) { - __u32 l4_cksm_off = l4_checksum_off(link_h_len, IPPROTO_UDP, ihl); - __s64 cksm = bpf_csum_diff(NULL, 0, newhdr, newhdrlen, 0); - if ((ret = bpf_l4_csum_replace(skb, l4_cksm_off, 0, cksm, - BPF_F_MARK_MANGLED_0))) { - bpf_printk("bpf_l4_csum_replace 2: %d", ret); - return ret; - } - } - if ((ret = bpf_skb_store_bytes(skb, udp_payload_off, newhdr, newhdrlen, 0))) { - bpf_printk("bpf_skb_store_bytes 2: %d", ret); - return ret; - } - return 0; -} - static __always_inline int decap_after_udp_hdr(struct __sk_buff *skb, __u32 link_h_len, __u8 ihl, __be16 ipv4hdr_tot_len, void *to, __u32 decap_hdrlen, @@ -1230,6 +1149,45 @@ static __always_inline __u32 get_link_h_len(__u32 ifindex, return 0; } +static __always_inline int +assign_socket_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, + __u32 len, bool established) { + int ret = -1; + struct bpf_sock *sk = bpf_skc_lookup_tcp(skb, tuple, len, BPF_F_CURRENT_NETNS, 0); + if (!sk) + return -1; + + if (established && + (sk->state == BPF_TCP_LISTEN || sk->state == BPF_TCP_TIME_WAIT)) { + goto release; + } + + ret = bpf_sk_assign(skb, sk, 0); +release: + bpf_sk_release(sk); + return ret; +} + +static __always_inline int +assign_socket_udp(struct __sk_buff *skb, + struct bpf_sock_tuple *tuple, __u32 len) { + struct bpf_sock *sk = bpf_sk_lookup_udp(skb, tuple, len, BPF_F_CURRENT_NETNS, 0); + if (!sk) + return -1; + + int ret = bpf_sk_assign(skb, sk, 0); + bpf_sk_release(sk); + return ret; +} + +static __always_inline int +assign_socket(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, + __u32 len, __u8 nexthdr, bool established) { + if (nexthdr == IPPROTO_TCP) + return assign_socket_tcp(skb, tuple, len, established); + return assign_socket_udp(skb, tuple, len); +} + // SNAT for UDP packet. SEC("tc/egress") int tproxy_lan_egress(struct __sk_buff *skb) { @@ -1613,200 +1571,30 @@ int tproxy_wan_egress(struct __sk_buff *skb) { return TC_ACT_OK; } bool tproxy_response = tproxy_port == tuples.five.sport; - // Double check to avoid conflicts when binding wan and lan to the same - // interface. - if (tproxy_response && l4proto == IPPROTO_TCP) { - // If it is a TCP first handshake, it is not a tproxy response. - if (tcph.syn && !tcph.ack) { - tproxy_response = false; - // Abnormal. - return TC_ACT_SHOT; - } else { - // If there is an existing socket on localhost, it is not a tproxy - // response. - struct bpf_sock_tuple tuple = {0}; - __u32 tuple_size; - if (skb->protocol == bpf_htons(ETH_P_IP)) { - tuple.ipv4.daddr = tuples.five.dip.u6_addr32[3]; - tuple.ipv4.saddr = tuples.five.sip.u6_addr32[3]; - tuple.ipv4.dport = tuples.five.dport; - tuple.ipv4.sport = tuples.five.sport; - tuple_size = sizeof(tuple.ipv4); - } else { - __builtin_memcpy(tuple.ipv6.daddr, &tuples.five.dip, IPV6_BYTE_LENGTH); - __builtin_memcpy(tuple.ipv6.saddr, &tuples.five.sip, IPV6_BYTE_LENGTH); - tuple.ipv6.dport = tuples.five.dport; - tuple.ipv6.sport = tuples.five.sport; - tuple_size = sizeof(tuple.ipv6); - } - struct bpf_sock *sk = - bpf_skc_lookup_tcp(skb, &tuple, tuple_size, BPF_F_CURRENT_NETNS, 0); - if (sk) { - // Not a tproxy WAN response. It is a tproxy LAN response. - bpf_sk_release(sk); - return TC_ACT_PIPE; - } - } - } - if (tproxy_response) { - // Packets from tproxy port. - // We need to redirect it to original port. - - // bpf_printk("tproxy_response: %pI6:%u", tuples.five.dip.u6_addr32, - // bpf_ntohs(tuples.five.dport)); - - // Write mac. - if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest), - ethh.h_source, sizeof(ethh.h_source), 0))) { - return TC_ACT_SHOT; - } - if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source), - special_mac_from_tproxy, - sizeof(ethh.h_source), 0))) { - return TC_ACT_SHOT; - }; - } else { - // Normal packets. - - if (l4proto == IPPROTO_TCP) { - // Backup for further use. - tcp_state_syn = tcph.syn && !tcph.ack; - struct ip_port key_src; - __builtin_memset(&key_src, 0, sizeof(key_src)); - // Use daddr as key in WAN because tproxy (control plane) also lookups the - // map element using income client ip (that is daddr). - __builtin_memcpy(&key_src.ip, &tuples.five.dip, IPV6_BYTE_LENGTH); - key_src.port = tcph.source; - __u8 outbound; - bool must; - __u32 mark; - struct pid_pname *pid_pname = NULL; - if (unlikely(tcp_state_syn)) { - // New TCP connection. - // bpf_printk("[%X]New Connection", bpf_ntohl(tcph.seq)); - __u32 flag[8] = {L4ProtoType_TCP}; // TCP - if (skb->protocol == bpf_htons(ETH_P_IP)) { - flag[1] = IpVersionType_4; - } else { - flag[1] = IpVersionType_6; - } - flag[6] = tuples.dscp; - if (pid_is_control_plane(skb, &pid_pname)) { - // From control plane. Direct. - return TC_ACT_OK; - } - if (pid_pname) { - // 2, 3, 4, 5 - __builtin_memcpy(&flag[2], pid_pname->pname, TASK_COMM_LEN); - } - __be32 mac[4] = { - 0, - 0, - bpf_htonl((ethh.h_source[0] << 8) + (ethh.h_source[1])), - bpf_htonl((ethh.h_source[2] << 24) + (ethh.h_source[3] << 16) + - (ethh.h_source[4] << 8) + (ethh.h_source[5])), - }; - __s64 s64_ret; - if ((s64_ret = route(flag, &tcph, tuples.five.sip.u6_addr32, - tuples.five.dip.u6_addr32, mac)) < 0) { - bpf_printk("shot routing: %d", s64_ret); - return TC_ACT_SHOT; - } - - outbound = s64_ret & 0xff; - mark = s64_ret >> 8; - must = (s64_ret >> 40) & 1; - -#if defined(__DEBUG_ROUTING) || defined(__PRINT_ROUTING_RESULT) - // Print only new connection. - __u32 pid = pid_pname ? pid_pname->pid : 0; - bpf_printk("tcp(wan): from %pI6:%u [PID %u]", tuples.five.sip.u6_addr32, - bpf_ntohs(tuples.five.sport), pid); - bpf_printk("tcp(wan): outbound: %u, %pI6:%u", outbound, - tuples.five.dip.u6_addr32, bpf_ntohs(tuples.five.dport)); -#endif - } else { - // bpf_printk("[%X]Old Connection", bpf_ntohl(tcph.seq)); - // The TCP connection exists. - struct dst_routing_result *dst = - bpf_map_lookup_elem(&tcp_dst_map, &key_src); - if (!dst) { - // Do not impact previous connections and server connections. - return TC_ACT_OK; - } - outbound = dst->routing_result.outbound; - mark = dst->routing_result.mark; - must = dst->routing_result.must; - } - - if (outbound == OUTBOUND_DIRECT && - mark == 0 // If mark is not zero, we should re-route it, so we send it - // to control plane in WAN. - ) { - return TC_ACT_OK; - } else if (unlikely(outbound == OUTBOUND_BLOCK)) { - return TC_ACT_SHOT; - } - // Rewrite to control plane. - - // Check outbound connectivity in specific ipversion and l4proto. - struct outbound_connectivity_query q = {0}; - q.outbound = outbound; - q.ipversion = skb->protocol == bpf_htons(ETH_P_IP) ? 4 : 6; - q.l4proto = l4proto; - __u32 *alive; - alive = bpf_map_lookup_elem(&outbound_connectivity_map, &q); - if (alive && *alive == 0 && - !(l4proto == IPPROTO_UDP && tuples.five.dport == bpf_htons(53))) { - // Outbound is not alive. Dns is an exception. - return TC_ACT_SHOT; - } - - if (unlikely(tcp_state_syn)) { - struct dst_routing_result routing_info; - __builtin_memset(&routing_info, 0, sizeof(routing_info)); - __builtin_memcpy(routing_info.ip, &tuples.five.dip, IPV6_BYTE_LENGTH); - routing_info.port = tcph.dest; - routing_info.routing_result.outbound = outbound; - routing_info.routing_result.mark = mark; - routing_info.routing_result.must = must; - routing_info.routing_result.dscp = tuples.dscp; - __builtin_memcpy(routing_info.routing_result.mac, ethh.h_source, - sizeof(ethh.h_source)); - if (pid_pname) { - __builtin_memcpy(routing_info.routing_result.pname, pid_pname->pname, - TASK_COMM_LEN); - routing_info.routing_result.pid = pid_pname->pid; - } - // bpf_printk("UPDATE: %pI6:%u", key_src.ip.u6_addr32, - // bpf_ntohs(key_src.port)); - bpf_map_update_elem(&tcp_dst_map, &key_src, &routing_info, BPF_ANY); - } - - // Write mac. - if ((ret = - bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest), - ethh.h_source, sizeof(ethh.h_source), 0))) { - return TC_ACT_SHOT; - } - if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source), - special_mac_to_tproxy, - sizeof(ethh.h_source), 0))) { - return TC_ACT_SHOT; - }; + // WAN response won't reach here, must be a LAN response. + return TC_ACT_PIPE; + } - } else if (l4proto == IPPROTO_UDP) { + // Normal packets. - // Routing. It decides if we redirect traffic to control plane. - __u32 flag[8] = {L4ProtoType_UDP}; + if (l4proto == IPPROTO_TCP) { + // Backup for further use. + tcp_state_syn = tcph.syn && !tcph.ack; + __u8 outbound; + bool must; + __u32 mark; + struct pid_pname *pid_pname = NULL; + if (unlikely(tcp_state_syn)) { + // New TCP connection. + // bpf_printk("[%X]New Connection", bpf_ntohl(tcph.seq)); + __u32 flag[8] = {L4ProtoType_TCP}; // TCP if (skb->protocol == bpf_htons(ETH_P_IP)) { flag[1] = IpVersionType_4; } else { flag[1] = IpVersionType_6; } flag[6] = tuples.dscp; - struct pid_pname *pid_pname; if (pid_is_control_plane(skb, &pid_pname)) { // From control plane. Direct. return TC_ACT_OK; @@ -1823,82 +1611,180 @@ int tproxy_wan_egress(struct __sk_buff *skb) { (ethh.h_source[4] << 8) + (ethh.h_source[5])), }; __s64 s64_ret; - if ((s64_ret = route(flag, &udph, tuples.five.sip.u6_addr32, + if ((s64_ret = route(flag, &tcph, tuples.five.sip.u6_addr32, tuples.five.dip.u6_addr32, mac)) < 0) { bpf_printk("shot routing: %d", s64_ret); return TC_ACT_SHOT; } - // Construct new hdr to encap. - struct dst_routing_result new_hdr; - __builtin_memset(&new_hdr, 0, sizeof(new_hdr)); - __builtin_memcpy(new_hdr.ip, &tuples.five.dip, IPV6_BYTE_LENGTH); - new_hdr.port = udph.dest; - new_hdr.recognize = RECOGNIZE; - new_hdr.routing_result.outbound = s64_ret; - new_hdr.routing_result.mark = s64_ret >> 8; - new_hdr.routing_result.must = (s64_ret >> 40) & 1; - new_hdr.routing_result.dscp = tuples.dscp; - __builtin_memcpy(new_hdr.routing_result.mac, ethh.h_source, - sizeof(ethh.h_source)); - if (pid_pname) { - __builtin_memcpy(new_hdr.routing_result.pname, pid_pname->pname, - TASK_COMM_LEN); - new_hdr.routing_result.pid = pid_pname->pid; - } + + outbound = s64_ret & 0xff; + mark = s64_ret >> 8; + must = (s64_ret >> 40) & 1; + #if defined(__DEBUG_ROUTING) || defined(__PRINT_ROUTING_RESULT) + // Print only new connection. __u32 pid = pid_pname ? pid_pname->pid : 0; - bpf_printk("udp(wan): from %pI6:%u [PID %u]", tuples.five.sip.u6_addr32, + bpf_printk("tcp(wan): from %pI6:%u [PID %u]", tuples.five.sip.u6_addr32, bpf_ntohs(tuples.five.sport), pid); - bpf_printk("udp(wan): outbound: %u, %pI6:%u", - new_hdr.routing_result.outbound, tuples.five.dip.u6_addr32, - bpf_ntohs(tuples.five.dport)); + bpf_printk("tcp(wan): outbound: %u, %pI6:%u", outbound, + tuples.five.dip.u6_addr32, bpf_ntohs(tuples.five.dport)); #endif - - if (new_hdr.routing_result.outbound == OUTBOUND_DIRECT && - new_hdr.routing_result.mark == - 0 // If mark is not zero, we should re-route it, so we - // send it to control plane in WAN. - ) { - return TC_ACT_OK; - } else if (unlikely(new_hdr.routing_result.outbound == OUTBOUND_BLOCK)) { - return TC_ACT_SHOT; + } else { + // bpf_printk("[%X]Old Connection", bpf_ntohl(tcph.seq)); + // The TCP connection exists. + struct routing_result *routing_result = + bpf_map_lookup_elem(&routing_tuples_map, &tuples.five); + if (!routing_result) { + // Do not impact previous connections and server connections. + return TC_ACT_OK; } + outbound = routing_result->outbound; + mark = routing_result->mark; + must = routing_result->must; + } - // Rewrite to control plane. - - // Check outbound connectivity in specific ipversion and l4proto. - struct outbound_connectivity_query q = {0}; - q.outbound = new_hdr.routing_result.outbound; - q.ipversion = skb->protocol == bpf_htons(ETH_P_IP) ? 4 : 6; - q.l4proto = l4proto; - __u32 *alive; - alive = bpf_map_lookup_elem(&outbound_connectivity_map, &q); - if (alive && *alive == 0 && - !(l4proto == IPPROTO_UDP && tuples.five.dport == bpf_htons(53))) { - // Outbound is not alive. Dns is an exception. - return TC_ACT_SHOT; - } + if (outbound == OUTBOUND_DIRECT && + mark == 0 // If mark is not zero, we should re-route it, so we send it + // to control plane in WAN. + ) { + return TC_ACT_OK; + } else if (unlikely(outbound == OUTBOUND_BLOCK)) { + return TC_ACT_SHOT; + } + // Rewrite to control plane. + + // Check outbound connectivity in specific ipversion and l4proto. + struct outbound_connectivity_query q = {0}; + q.outbound = outbound; + q.ipversion = skb->protocol == bpf_htons(ETH_P_IP) ? 4 : 6; + q.l4proto = l4proto; + __u32 *alive; + alive = bpf_map_lookup_elem(&outbound_connectivity_map, &q); + if (alive && *alive == 0 && + !(l4proto == IPPROTO_UDP && tuples.five.dport == bpf_htons(53))) { + // Outbound is not alive. Dns is an exception. + return TC_ACT_SHOT; + } - // Write mac. - if ((ret = - bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest), - ethh.h_source, sizeof(ethh.h_source), 0))) { - return TC_ACT_SHOT; + if (unlikely(tcp_state_syn)) { + struct routing_result routing_result = {}; + routing_result.outbound = outbound; + routing_result.mark = mark; + routing_result.must = must; + routing_result.dscp = tuples.dscp; + __builtin_memcpy(routing_result.mac, ethh.h_source, + sizeof(ethh.h_source)); + if (pid_pname) { + __builtin_memcpy(routing_result.pname, pid_pname->pname, + TASK_COMM_LEN); + routing_result.pid = pid_pname->pid; } - if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source), - special_mac_to_tproxy, - sizeof(ethh.h_source), 0))) { - return TC_ACT_SHOT; - }; + bpf_map_update_elem(&routing_tuples_map, &tuples.five, + &routing_result, BPF_ANY); + } - // Encap a header to transmit fullcone tuple. - if ((ret = encap_after_udp_hdr( - skb, link_h_len, ihl, - skb->protocol == bpf_htons(ETH_P_IP) ? iph.tot_len : 0, &new_hdr, - sizeof(new_hdr), true))) { - return TC_ACT_SHOT; - } + // Write mac. + if ((ret = + bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest), + ethh.h_source, sizeof(ethh.h_source), 0))) { + return TC_ACT_SHOT; + } + if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source), + special_mac_to_tproxy, + sizeof(ethh.h_source), 0))) { + return TC_ACT_SHOT; + }; + + } else if (l4proto == IPPROTO_UDP) { + + // Routing. It decides if we redirect traffic to control plane. + __u32 flag[8] = {L4ProtoType_UDP}; + if (skb->protocol == bpf_htons(ETH_P_IP)) { + flag[1] = IpVersionType_4; + } else { + flag[1] = IpVersionType_6; + } + flag[6] = tuples.dscp; + struct pid_pname *pid_pname; + if (pid_is_control_plane(skb, &pid_pname)) { + // From control plane. Direct. + return TC_ACT_OK; + } + if (pid_pname) { + // 2, 3, 4, 5 + __builtin_memcpy(&flag[2], pid_pname->pname, TASK_COMM_LEN); + } + __be32 mac[4] = { + 0, + 0, + bpf_htonl((ethh.h_source[0] << 8) + (ethh.h_source[1])), + bpf_htonl((ethh.h_source[2] << 24) + (ethh.h_source[3] << 16) + + (ethh.h_source[4] << 8) + (ethh.h_source[5])), + }; + __s64 s64_ret; + if ((s64_ret = route(flag, &udph, tuples.five.sip.u6_addr32, + tuples.five.dip.u6_addr32, mac)) < 0) { + bpf_printk("shot routing: %d", s64_ret); + return TC_ACT_SHOT; + } + // Construct new hdr to encap. + struct routing_result routing_result = {}; + routing_result.outbound = s64_ret; + routing_result.mark = s64_ret >> 8; + routing_result.must = (s64_ret >> 40) & 1; + routing_result.dscp = tuples.dscp; + __builtin_memcpy(routing_result.mac, ethh.h_source, sizeof(ethh.h_source)); + if (pid_pname) { + __builtin_memcpy(routing_result.pname, pid_pname->pname, + TASK_COMM_LEN); + routing_result.pid = pid_pname->pid; + } + bpf_map_update_elem(&routing_tuples_map, &tuples.five, &routing_result, + BPF_ANY); +#if defined(__DEBUG_ROUTING) || defined(__PRINT_ROUTING_RESULT) + __u32 pid = pid_pname ? pid_pname->pid : 0; + bpf_printk("udp(wan): from %pI6:%u [PID %u]", tuples.five.sip.u6_addr32, + bpf_ntohs(tuples.five.sport), pid); + bpf_printk("udp(wan): outbound: %u, %pI6:%u", + routing_result.outbound, tuples.five.dip.u6_addr32, + bpf_ntohs(tuples.five.dport)); +#endif + + if (routing_result.outbound == OUTBOUND_DIRECT && routing_result.mark == 0 + // If mark is not zero, we should re-route it, so we send it to control + // plane in WAN. + ) { + return TC_ACT_OK; + } else if (unlikely(routing_result.outbound == OUTBOUND_BLOCK)) { + return TC_ACT_SHOT; + } + + // Rewrite to control plane. + + // Check outbound connectivity in specific ipversion and l4proto. + struct outbound_connectivity_query q = {0}; + q.outbound = routing_result.outbound; + q.ipversion = skb->protocol == bpf_htons(ETH_P_IP) ? 4 : 6; + q.l4proto = l4proto; + __u32 *alive; + alive = bpf_map_lookup_elem(&outbound_connectivity_map, &q); + if (alive && *alive == 0 && + !(l4proto == IPPROTO_UDP && tuples.five.dport == bpf_htons(53))) { + // Outbound is not alive. Dns is an exception. + return TC_ACT_SHOT; + } + + // Write mac. + if ((ret = + bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest), + ethh.h_source, sizeof(ethh.h_source), 0))) { + return TC_ACT_SHOT; } + if ((ret = bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source), + special_mac_to_tproxy, + sizeof(ethh.h_source), 0))) { + return TC_ACT_SHOT; + }; } // // Print packet in hex for debugging (checksum or something else). @@ -1981,156 +1867,53 @@ int tproxy_wan_ingress(struct __sk_buff *skb) { accept: return TC_ACT_PIPE; } - bool tproxy_response = tproxy_typ == 1; - - // // Print packet in hex for debugging (checksum or something else). - // if (dport == bpf_htons(8443)) { - // bpf_printk("PRINT BEFORE PACKET"); - // for (__u32 i = 0; i < skb->len && i < 500; i++) { - // __u8 t = 0; - // bpf_skb_load_bytes(skb, i, &t, 1); - // bpf_printk("%02x", t); - // } - // } - if (tproxy_response) { - // Send the tproxy response packet to origin. - - // If a client sent a packet at the begining, let's say the client is - // sender and its ip is right host ip. - // saddr is host ip and right sender ip. - // Now when tproxy responses, dport is sender's sport. See (1) below. daddr - // is original dest ip (target address). - - // bpf_printk("[%u]should send to origin: %pI6:%u", - // l4proto, saddr, - // bpf_ntohs(dport)); - - if (l4proto == IPPROTO_TCP) { - // Lookup original dest as sip and sport. - struct ip_port key_dst; - __builtin_memset(&key_dst, 0, sizeof(key_dst)); - // Use daddr as key in WAN because tproxy (control plane) also lookups the - // map element using income client ip (that is daddr). - __builtin_memcpy(&key_dst.ip, &tuples.five.dip, IPV6_BYTE_LENGTH); - key_dst.port = tcph.dest; - struct dst_routing_result *original_dst = - bpf_map_lookup_elem(&tcp_dst_map, &key_dst); - if (!original_dst) { - bpf_printk("[%X]Bad Connection: to: %pI6:%u", bpf_ntohl(tcph.seq), - key_dst.ip.u6_addr32, bpf_ntohs(key_dst.port)); - return TC_ACT_SHOT; - } - - // Rewrite sip and sport. - if ((ret = rewrite_ip(skb, link_h_len, IPPROTO_TCP, ihl, - tuples.five.sip.u6_addr32, original_dst->ip, false, - true))) { - bpf_printk("Shot IP: %d", ret); - return TC_ACT_SHOT; - } - if ((ret = rewrite_port(skb, link_h_len, IPPROTO_TCP, ihl, - tuples.five.sport, original_dst->port, false, - true))) { - bpf_printk("Shot Port: %d", ret); - return TC_ACT_SHOT; - } - } else if (l4proto == IPPROTO_UDP) { - - /// NOTICE: Actually, we do not need symmetrical headers in client and - /// server. We use it for convinience. This behavior may change in the - /// future. Outbound here is useless and redundant. - struct dst_routing_result ori_src; - - // Get source ip/port from our packet header. - // Decap header to get fullcone tuple. - if ((ret = decap_after_udp_hdr( - skb, link_h_len, ihl, - skb->protocol == bpf_htons(ETH_P_IP) ? iph.tot_len : 0, &ori_src, - sizeof(ori_src), NULL, true))) { - return TC_ACT_SHOT; - } - - // Rewrite udp src ip - if ((ret = rewrite_ip(skb, link_h_len, IPPROTO_UDP, ihl, - tuples.five.sip.u6_addr32, ori_src.ip, false, - true))) { - bpf_printk("Shot IP: %d", ret); - return TC_ACT_SHOT; - } - - // Rewrite udp src port - if ((ret = rewrite_port(skb, link_h_len, IPPROTO_UDP, ihl, - tuples.five.sport, ori_src.port, false, true))) { - bpf_printk("Shot Port: %d", ret); - return TC_ACT_SHOT; - } + // Should send the packet to tproxy. - // bpf_printk("real from: %pI6:%u", ori_src.ip, bpf_ntohs(ori_src.port)); + skb->mark = TPROXY_MARK; + struct bpf_sock_tuple tuple = {}; + __u32 tuple_size = sizeof(tuple.ipv4); - // Print packet in hex for debugging (checksum or something else). - // bpf_printk("UDP EGRESS OK"); - // for (__u32 i = 0; i < skb->len && i < 1500; i++) { - // __u8 t = 0; - // bpf_skb_load_bytes(skb, i, &t, 1); - // bpf_printk("%02x", t); - // } - } - // Rewrite dip to host ip. - if ((ret = rewrite_ip(skb, link_h_len, l4proto, ihl, - tuples.five.dip.u6_addr32, tuples.five.sip.u6_addr32, - true, true))) { - bpf_printk("Shot IP: %d", ret); - return TC_ACT_SHOT; - } + /* First look for established socket */ + if (skb->protocol == bpf_htons(ETH_P_IP)) { + tuple.ipv4.saddr = tuples.five.sip.u6_addr32[3]; + tuple.ipv4.daddr = tuples.five.dip.u6_addr32[3]; + tuple.ipv4.sport = tuples.five.sport; + tuple.ipv4.dport = tuples.five.dport; } else { - // Should send the packet to tproxy. - - // Get tproxy ip and port. - // saddr should be tproxy ip. - __be32 *tproxy_ip = tuples.five.sip.u6_addr32; - // __builtin_memcpy(tproxy_ip, saddr, sizeof(tproxy_ip)); - __be16 tproxy_port = PARAM.tproxy_port; - if (!tproxy_port) { - return TC_ACT_OK; - } - // bpf_printk("should send to: %pI6:%u", tproxy_ip, - // bpf_ntohs(*tproxy_port)); - - if ((ret = rewrite_ip(skb, link_h_len, l4proto, ihl, - tuples.five.dip.u6_addr32, tproxy_ip, true, true))) { - bpf_printk("Shot IP: %d", ret); - return TC_ACT_SHOT; - } - - // Rewrite dst port. - if ((ret = rewrite_port(skb, link_h_len, l4proto, ihl, tuples.five.dport, - tproxy_port, true, true))) { - bpf_printk("Shot Port: %d", ret); - return TC_ACT_SHOT; - } - - // (1) Use daddr as saddr to pass NIC verification. Notice that we do not - // modify the so tproxy will send packet to it. - if ((ret = rewrite_ip(skb, link_h_len, l4proto, ihl, - tuples.five.sip.u6_addr32, tuples.five.dip.u6_addr32, - false, true))) { - bpf_printk("Shot IP: %d", ret); - return TC_ACT_SHOT; - } + __builtin_memcpy(tuple.ipv6.saddr, &tuples.five.sip, IPV6_BYTE_LENGTH); + __builtin_memcpy(tuple.ipv6.daddr, &tuples.five.dip, IPV6_BYTE_LENGTH); + tuple.ipv6.sport = tuples.five.sport; + tuple.ipv6.dport = tuples.five.dport; + tuple_size = sizeof(tuple.ipv6); + } + ret = assign_socket(skb, &tuple, tuple_size, l4proto, true); + if (ret == 0) { + return TC_ACT_OK; } - // // Print packet in hex for debugging (checksum or something else). - // if (dport == bpf_htons(8443)) { - // bpf_printk("PRINT AFTER PACKET"); - // for (__u32 i = 0; i < skb->len && i < 500; i++) { - // __u8 t = 0; - // bpf_skb_load_bytes(skb, i, &t, 1); - // bpf_printk("%02x", t); - // } - // } + /* Then look for tproxy listening socket */ + __be16 tproxy_port = PARAM.tproxy_port; + if (!tproxy_port) { + return TC_ACT_OK; + } + if (skb->protocol == bpf_htons(ETH_P_IP)) { + tuple.ipv4.saddr = 0; + tuple.ipv4.daddr = tuples.five.sip.u6_addr32[3]; + tuple.ipv4.sport = 0; + tuple.ipv4.dport = tproxy_port; + } else { + __builtin_memset(tuple.ipv6.saddr, 0, IPV6_BYTE_LENGTH); + __builtin_memcpy(tuple.ipv6.daddr, &tuples.five.sip, IPV6_BYTE_LENGTH); + tuple.ipv6.sport = 0; + tuple.ipv6.dport = tproxy_port; + } + ret = assign_socket(skb, &tuple, tuple_size, l4proto, false); + if (ret == 0) { + return TC_ACT_OK; + } - return TC_ACT_OK; + return TC_ACT_SHOT; } static int __always_inline _update_map_elem_by_cookie(const __u64 cookie) { diff --git a/control/tcp.go b/control/tcp.go index ee6be13c3..85c0346f3 100644 --- a/control/tcp.go +++ b/control/tcp.go @@ -44,36 +44,8 @@ func (c *ControlPlane) handleConn(lConn net.Conn) (err error) { dst := lConn.LocalAddr().(*net.TCPAddr).AddrPort() routingResult, err := c.core.RetrieveRoutingResult(src, dst, unix.IPPROTO_TCP) if err != nil { - // WAN. Old method. - var value bpfDstRoutingResult - ip6 := src.Addr().As16() - if e := c.core.bpf.TcpDstMap.Lookup(bpfIpPort{ - Ip: struct{ U6Addr8 [16]uint8 }{U6Addr8: ip6}, - Port: common.Htons(src.Port()), - }, &value); e != nil { - if c.tproxyPortProtect { - return fmt.Errorf("failed to retrieve target info %v: %v, %v", src.String(), err, e) - } else { - routingResult = &bpfRoutingResult{ - Mark: 0, - Must: 0, - Mac: [6]uint8{}, - Outbound: uint8(consts.OutboundControlPlaneRouting), - Pname: [16]uint8{}, - Pid: 0, - } - goto destRetrieved - } - } - routingResult = &value.RoutingResult - - dstAddr, ok := netip.AddrFromSlice(common.Ipv6Uint32ArrayToByteSlice(value.Ip)) - if !ok { - return fmt.Errorf("failed to parse dest ip: %v", value.Ip) - } - dst = netip.AddrPortFrom(dstAddr, common.Htons(value.Port)) + return fmt.Errorf("failed to retrieve target info %v: %v", dst.String(), err) } -destRetrieved: src = common.ConvergeAddrPort(src) dst = common.ConvergeAddrPort(dst) diff --git a/control/udp.go b/control/udp.go index 1a436abf0..36fd4ad97 100644 --- a/control/udp.go +++ b/control/udp.go @@ -13,7 +13,6 @@ import ( "net/netip" "syscall" "time" - "unsafe" "github.com/daeuniverse/dae/common" "github.com/daeuniverse/dae/common/consts" @@ -51,19 +50,6 @@ func ChooseNatTimeout(data []byte, sniffDns bool) (dmsg *dnsmessage.Msg, timeout return nil, DefaultNatTimeout } -func ParseAddrHdr(data []byte) (hdr *bpfDstRoutingResult, dataOffset int, err error) { - dataOffset = int(unsafe.Sizeof(bpfDstRoutingResult{})) - if len(data) < dataOffset { - return nil, 0, fmt.Errorf("data is too short to parse AddrHdr") - } - _hdr := *(*bpfDstRoutingResult)(unsafe.Pointer(&data[0])) - if _hdr.Recognize != consts.Recognize { - return nil, 0, fmt.Errorf("bad recognize") - } - _hdr.Port = common.Ntohs(_hdr.Port) - return &_hdr, dataOffset, nil -} - func sendPktWithHdrWithFlag(data []byte, realFrom netip.AddrPort, lConn *net.UDPConn, to netip.AddrPort, lanWanFlag consts.LanWanFlag) error { realFrom16 := realFrom.Addr().As16() hdr := bpfDstRoutingResult{ diff --git a/control/utils.go b/control/utils.go index 9fbd09975..070127242 100644 --- a/control/utils.go +++ b/control/utils.go @@ -128,6 +128,10 @@ func SetForwarding(ifname string, val string) { _ = setForwarding(ifname, consts.IpVersionStr_6, val) } +func SetAcceptLocal(ifname, val string) error { + return os.WriteFile(fmt.Sprintf("/proc/sys/net/ipv4/conf/%s/accept_local", ifname), []byte(val), 0644) +} + func checkSendRedirects(ifname string, ipversion consts.IpVersionStr) error { path := fmt.Sprintf("/proc/sys/net/ipv%v/conf/%v/send_redirects", ipversion, ifname) b, err := os.ReadFile(path)