From d13535d71ed3e430e26c7538cd6266d026b26f54 Mon Sep 17 00:00:00 2001 From: TheDiveO <6920158+thediveo@users.noreply.github.com> Date: Fri, 5 Jul 2024 19:04:27 +0200 Subject: [PATCH] supports AF_XDP socket diagnosis; skip XSK diag test if kernel doesn't support XSK diag --- socket.go | 67 ++++++++++++++ socket_xdp_linux.go | 195 +++++++++++++++++++++++++++++++++++++++ socket_xdp_linux_test.go | 49 ++++++++++ xdp_diag.go | 34 +++++++ xdp_linux.go | 46 +++++++++ 5 files changed, 391 insertions(+) create mode 100644 socket_xdp_linux.go create mode 100644 socket_xdp_linux_test.go create mode 100644 xdp_diag.go create mode 100644 xdp_linux.go diff --git a/socket.go b/socket.go index ebcf8423..e65efb13 100644 --- a/socket.go +++ b/socket.go @@ -35,3 +35,70 @@ type UnixSocket struct { INode uint32 Cookie [2]uint32 } + +// XDPSocket represents an XDP socket (and the common diagnosis part in +// particular). Please note that in contrast to [UnixSocket] the XDPSocket type +// does not feature “State” information. +type XDPSocket struct { + // xdp_diag_msg + // https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L21 + Family uint8 + Type uint8 + pad uint16 + Ino uint32 + Cookie [2]uint32 +} + +type XDPInfo struct { + // XDP_DIAG_INFO/xdp_diag_info + // https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L51 + Ifindex uint32 + QueueID uint32 + + // XDP_DIAG_UID + UID uint32 + + // XDP_RX_RING + // https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L56 + RxRingEntries uint32 + TxRingEntries uint32 + UmemFillRingEntries uint32 + UmemCompletionRingEntries uint32 + + // XDR_DIAG_UMEM + Umem *XDPDiagUmem + + // XDR_DIAG_STATS + Stats *XDPDiagStats +} + +const ( + XDP_DU_F_ZEROCOPY = 1 << iota +) + +// XDPDiagUmem describes the umem attached to an XDP socket. +// +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L62 +type XDPDiagUmem struct { + Size uint64 + ID uint32 + NumPages uint32 + ChunkSize uint32 + Headroom uint32 + Ifindex uint32 + QueueID uint32 + Flags uint32 + Refs uint32 +} + +// XDPDiagStats contains ring statistics for an XDP socket. +// +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L74 +type XDPDiagStats struct { + RxDropped uint64 + RxInvalid uint64 + RxFull uint64 + FillRingEmpty uint64 + TxInvalid uint64 + TxRingEmpty uint64 +} diff --git a/socket_xdp_linux.go b/socket_xdp_linux.go new file mode 100644 index 00000000..20c82f9c --- /dev/null +++ b/socket_xdp_linux.go @@ -0,0 +1,195 @@ +package netlink + +import ( + "errors" + "fmt" + "syscall" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +const ( + sizeofXDPSocketRequest = 1 + 1 + 2 + 4 + 4 + 2*4 + sizeofXDPSocket = 0x10 +) + +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L12 +type xdpSocketRequest struct { + Family uint8 + Protocol uint8 + pad uint16 + Ino uint32 + Show uint32 + Cookie [2]uint32 +} + +func (r *xdpSocketRequest) Serialize() []byte { + b := writeBuffer{Bytes: make([]byte, sizeofSocketRequest)} + b.Write(r.Family) + b.Write(r.Protocol) + native.PutUint16(b.Next(2), r.pad) + native.PutUint32(b.Next(4), r.Ino) + native.PutUint32(b.Next(4), r.Show) + native.PutUint32(b.Next(4), r.Cookie[0]) + native.PutUint32(b.Next(4), r.Cookie[1]) + return b.Bytes +} + +func (r *xdpSocketRequest) Len() int { return sizeofXDPSocketRequest } + +func (s *XDPSocket) deserialize(b []byte) error { + if len(b) < sizeofXDPSocket { + return fmt.Errorf("XDP socket data short read (%d); want %d", len(b), sizeofXDPSocket) + } + rb := readBuffer{Bytes: b} + s.Family = rb.Read() + s.Type = rb.Read() + s.pad = native.Uint16(rb.Next(2)) + s.Ino = native.Uint32(rb.Next(4)) + s.Cookie[0] = native.Uint32(rb.Next(4)) + s.Cookie[1] = native.Uint32(rb.Next(4)) + return nil +} + +// XDPSocketGet returns the XDP socket identified by its inode number and/or +// socket cookie. Specify the cookie as SOCK_ANY_COOKIE if +func SocketXDPGetInfo(ino uint32, cookie uint64) (*XDPDiagInfoResp, error) { + // We have a problem here: dumping AF_XDP sockets currently does not support + // filtering. We thus need to dump all XSKs and then only filter afterwards + // :( + xsks, err := SocketDiagXDP() + if err != nil { + return nil, err + } + checkCookie := cookie != SOCK_ANY_COOKIE && cookie != 0 + crumblingCookie := [2]uint32{uint32(cookie), uint32(cookie >> 32)} + checkIno := ino != 0 + var xskinfo *XDPDiagInfoResp + for _, xsk := range xsks { + if checkIno && xsk.XDPDiagMsg.Ino != ino { + continue + } + if checkCookie && xsk.XDPDiagMsg.Cookie != crumblingCookie { + continue + } + if xskinfo != nil { + return nil, errors.New("multiple matching XDP sockets") + } + xskinfo = xsk + } + if xskinfo == nil { + return nil, errors.New("no matching XDP socket") + } + return xskinfo, nil +} + +// SocketDiagXDP requests XDP_DIAG_INFO for XDP family sockets. +func SocketDiagXDP() ([]*XDPDiagInfoResp, error) { + var result []*XDPDiagInfoResp + err := socketDiagXDPExecutor(func(m syscall.NetlinkMessage) error { + sockInfo := &XDPSocket{} + if err := sockInfo.deserialize(m.Data); err != nil { + return err + } + attrs, err := nl.ParseRouteAttr(m.Data[sizeofXDPSocket:]) + if err != nil { + return err + } + + res, err := attrsToXDPDiagInfoResp(attrs, sockInfo) + if err != nil { + return err + } + + result = append(result, res) + return nil + }) + if err != nil { + return nil, err + } + return result, nil +} + +// socketDiagXDPExecutor requests XDP_DIAG_INFO for XDP family sockets. +func socketDiagXDPExecutor(receiver func(syscall.NetlinkMessage) error) error { + s, err := nl.Subscribe(unix.NETLINK_INET_DIAG) + if err != nil { + return err + } + defer s.Close() + + req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) + req.AddData(&xdpSocketRequest{ + Family: unix.AF_XDP, + Show: XDP_SHOW_INFO | XDP_SHOW_RING_CFG | XDP_SHOW_UMEM | XDP_SHOW_STATS, + }) + if err := s.Send(req); err != nil { + return err + } + +loop: + for { + msgs, from, err := s.Receive() + if err != nil { + return err + } + if from.Pid != nl.PidKernel { + return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + } + if len(msgs) == 0 { + return errors.New("no message nor error from netlink") + } + + for _, m := range msgs { + switch m.Header.Type { + case unix.NLMSG_DONE: + break loop + case unix.NLMSG_ERROR: + error := int32(native.Uint32(m.Data[0:4])) + return syscall.Errno(-error) + } + if err := receiver(m); err != nil { + return err + } + } + } + return nil +} + +func attrsToXDPDiagInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *XDPSocket) (*XDPDiagInfoResp, error) { + resp := &XDPDiagInfoResp{ + XDPDiagMsg: sockInfo, + XDPInfo: &XDPInfo{}, + } + for _, a := range attrs { + switch a.Attr.Type { + case XDP_DIAG_INFO: + resp.XDPInfo.Ifindex = native.Uint32(a.Value[0:4]) + resp.XDPInfo.QueueID = native.Uint32(a.Value[4:8]) + case XDP_DIAG_UID: + resp.XDPInfo.UID = native.Uint32(a.Value[0:4]) + case XDP_DIAG_RX_RING: + resp.XDPInfo.RxRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_TX_RING: + resp.XDPInfo.TxRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_UMEM_FILL_RING: + resp.XDPInfo.UmemFillRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_UMEM_COMPLETION_RING: + resp.XDPInfo.UmemCompletionRingEntries = native.Uint32(a.Value[0:4]) + case XDP_DIAG_UMEM: + umem := &XDPDiagUmem{} + if err := umem.deserialize(a.Value); err != nil { + return nil, err + } + resp.XDPInfo.Umem = umem + case XDP_DIAG_STATS: + stats := &XDPDiagStats{} + if err := stats.deserialize(a.Value); err != nil { + return nil, err + } + resp.XDPInfo.Stats = stats + } + } + return resp, nil +} diff --git a/socket_xdp_linux_test.go b/socket_xdp_linux_test.go new file mode 100644 index 00000000..4f09487e --- /dev/null +++ b/socket_xdp_linux_test.go @@ -0,0 +1,49 @@ +//go:build linux +// +build linux + +package netlink + +import ( + "os" + "testing" + + "golang.org/x/sys/unix" +) + +func TestSocketXDPGetInfo(t *testing.T) { + xdpsockfd, err := unix.Socket(unix.AF_XDP, unix.SOCK_RAW, 0) + if err != nil { + t.Fatal(err) + } + defer unix.Close(xdpsockfd) + + wantFamily := unix.AF_XDP + + var xdpsockstat unix.Stat_t + err = unix.Fstat(xdpsockfd, &xdpsockstat) + if err != nil { + t.Fatal(err) + } + wantIno := xdpsockstat.Ino + + result, err := SocketXDPGetInfo(uint32(wantIno), SOCK_ANY_COOKIE) + if err != nil { + if os.IsNotExist(err) { + t.Skip("kernel lacks support for AF_XDP socket diagnosis") + } + t.Fatal(err) + } + + if got := result.XDPDiagMsg.Family; got != uint8(wantFamily) { + t.Fatalf("protocol family = %v, want %v", got, wantFamily) + } + if got := result.XDPDiagMsg.Ino; got != uint32(wantIno) { + t.Fatalf("protocol ino = %v, want %v", got, wantIno) + } + if result.XDPInfo == nil { + t.Fatalf("want non-nil XDPInfo, got nil") + } + if got := result.XDPInfo.Ifindex; got != 0 { + t.Fatalf("ifindex = %v, want 0", got) + } +} diff --git a/xdp_diag.go b/xdp_diag.go new file mode 100644 index 00000000..e88825bf --- /dev/null +++ b/xdp_diag.go @@ -0,0 +1,34 @@ +package netlink + +import "github.com/vishvananda/netlink/nl" + +const SOCK_ANY_COOKIE = uint64(nl.TCPDIAG_NOCOOKIE)<<32 + uint64(nl.TCPDIAG_NOCOOKIE) + +// XDP diagnosis show flag constants to request particular information elements. +const ( + XDP_SHOW_INFO = 1 << iota + XDP_SHOW_RING_CFG + XDP_SHOW_UMEM + XDP_SHOW_MEMINFO + XDP_SHOW_STATS +) + +// XDP diag element constants +const ( + XDP_DIAG_NONE = iota + XDP_DIAG_INFO // when using XDP_SHOW_INFO + XDP_DIAG_UID // when using XDP_SHOW_INFO + XDP_DIAG_RX_RING // when using XDP_SHOW_RING_CFG + XDP_DIAG_TX_RING // when using XDP_SHOW_RING_CFG + XDP_DIAG_UMEM // when using XDP_SHOW_UMEM + XDP_DIAG_UMEM_FILL_RING // when using XDP_SHOW_UMEM + XDP_DIAG_UMEM_COMPLETION_RING // when using XDP_SHOW_UMEM + XDP_DIAG_MEMINFO // when using XDP_SHOW_MEMINFO + XDP_DIAG_STATS // when using XDP_SHOW_STATS +) + +// https://elixir.bootlin.com/linux/v6.2/source/include/uapi/linux/xdp_diag.h#L21 +type XDPDiagInfoResp struct { + XDPDiagMsg *XDPSocket + XDPInfo *XDPInfo +} diff --git a/xdp_linux.go b/xdp_linux.go new file mode 100644 index 00000000..896a406d --- /dev/null +++ b/xdp_linux.go @@ -0,0 +1,46 @@ +package netlink + +import ( + "bytes" + "fmt" +) + +const ( + xdrDiagUmemLen = 8 + 8*4 + xdrDiagStatsLen = 6 * 8 +) + +func (x *XDPDiagUmem) deserialize(b []byte) error { + if len(b) < xdrDiagUmemLen { + return fmt.Errorf("XDP umem diagnosis data short read (%d); want %d", len(b), xdrDiagUmemLen) + } + + rb := bytes.NewBuffer(b) + x.Size = native.Uint64(rb.Next(8)) + x.ID = native.Uint32(rb.Next(4)) + x.NumPages = native.Uint32(rb.Next(4)) + x.ChunkSize = native.Uint32(rb.Next(4)) + x.Headroom = native.Uint32(rb.Next(4)) + x.Ifindex = native.Uint32(rb.Next(4)) + x.QueueID = native.Uint32(rb.Next(4)) + x.Flags = native.Uint32(rb.Next(4)) + x.Refs = native.Uint32(rb.Next(4)) + + return nil +} + +func (x *XDPDiagStats) deserialize(b []byte) error { + if len(b) < xdrDiagStatsLen { + return fmt.Errorf("XDP diagnosis statistics short read (%d); want %d", len(b), xdrDiagStatsLen) + } + + rb := bytes.NewBuffer(b) + x.RxDropped = native.Uint64(rb.Next(8)) + x.RxInvalid = native.Uint64(rb.Next(8)) + x.RxFull = native.Uint64(rb.Next(8)) + x.FillRingEmpty = native.Uint64(rb.Next(8)) + x.TxInvalid = native.Uint64(rb.Next(8)) + x.TxRingEmpty = native.Uint64(rb.Next(8)) + + return nil +}