From 39adee29adc96e7a4d12d21516e247b4b6aeaae2 Mon Sep 17 00:00:00 2001 From: "Doug Cook (WINDOWS)" Date: Tue, 16 Jan 2024 21:24:19 -0800 Subject: [PATCH] dwc_eqos - segmentation offload and vlan fixes - Add a README.md. - Add a REG file for enabling a diagnostic ETW autologger. - Implement TCP segmentation offload (LSOv2). - Implement UDP fragmentation offload. (USO). - Instead of making a half-baked effort to adapt to disabled hardware features (i.e. lack of Checksum or Segment offload support), check for the feature and don't load if it's missing. At present, I have nothing to test this on and it just makes the code messy. We can add adaptability back in the future if anybody actually wants it and has hardware to test it. - Fix problems with error logging in Tx path. - Fix problems with VLAN tag insertion. - Fix problems with checksum offload configuration. NetAdapterCx only recognizes the "granular" options. --- drivers/net/dwc_eqos/AutoLogger.reg | 12 + drivers/net/dwc_eqos/README.md | 48 +++ drivers/net/dwc_eqos/descriptors.h | 16 +- drivers/net/dwc_eqos/device.cpp | 91 ++++- drivers/net/dwc_eqos/device.h | 4 +- drivers/net/dwc_eqos/driver.cpp | 9 - drivers/net/dwc_eqos/dwc_eqos.inf | 94 ++++- drivers/net/dwc_eqos/dwc_eqos.vcxproj | 4 + drivers/net/dwc_eqos/dwc_eqos.vcxproj.filters | 8 + drivers/net/dwc_eqos/precomp.h | 1 + drivers/net/dwc_eqos/queue_common.h | 7 +- drivers/net/dwc_eqos/trace.h | 4 +- drivers/net/dwc_eqos/txqueue.cpp | 384 ++++++++++++------ drivers/net/dwc_eqos/txqueue.h | 7 + 14 files changed, 510 insertions(+), 179 deletions(-) create mode 100644 drivers/net/dwc_eqos/AutoLogger.reg create mode 100644 drivers/net/dwc_eqos/README.md diff --git a/drivers/net/dwc_eqos/AutoLogger.reg b/drivers/net/dwc_eqos/AutoLogger.reg new file mode 100644 index 0000000..f6bbc9d --- /dev/null +++ b/drivers/net/dwc_eqos/AutoLogger.reg @@ -0,0 +1,12 @@ +Windows Registry Editor Version 5.00 + +[HKEY_LOCAL_MACHINE\SYSTEM\ControlSet001\Control\WMI\Autologger\dwc_eqos] +"Guid"="{3fde989c-5470-4452-8f3c-91b0584f5a75}" +"BufferSize"=dword:00000010 +"LogFileMode"=dword:08080400 +"Start"=dword:00000001 + +[HKEY_LOCAL_MACHINE\SYSTEM\ControlSet001\Control\WMI\Autologger\dwc_eqos\{5d8331d3-70b3-5620-5664-db28f48a4b79}] +"Enabled"=dword:00000001 +"EnableLevel"=dword:00000004 +"EnableFlags"=dword:00000003 diff --git a/drivers/net/dwc_eqos/README.md b/drivers/net/dwc_eqos/README.md new file mode 100644 index 0000000..75f2b5b --- /dev/null +++ b/drivers/net/dwc_eqos/README.md @@ -0,0 +1,48 @@ +# Synopsys DesignWare Ethernet Quality of Service (GMAC) Driver + +This is a driver for the Synopsys DesignWare Ethernet Quality of Service (EQoS) +controller found in the RK35xx SoCs, supporting 1Gbps ethernet connections. + +## Compatibility + +EQoS is a configurable IP block that can be customized and added to a SoC. This +driver has been tested only on the RK3588(s) and assumes the presence of +optional features that may be missing on other SoCs. With minor fixes, it would +probably work on other EQoS-based SoCs. The driver specifically checks for the +following: + +- `GMAC_MAC_Version.RKVER` must be 0x51 or 0x52 (other values untested). +- `GMAC_MAC_HW_Feature0.SAVLANINS` must be enabled (require VLAN insertion support). +- `GMAC_MAC_HW_Feature0.RXCOESEL` and `TXCOESEL` must be enabled (require checksum offload support). +- `GMAC_MAC_HW_Feature1.TSOEN` must be enabled (require TCP/UDP segmentation offload support). + +There may be other requirements that are assumed but not checked. + +## ACPI Configuration + +This driver uses ACPI properties to configure the EQoS controller's DMA behavior: + +- `_DSD\snps,pblx8` (default = 1): Controls the value of `GMAC_DMA_CHx_Control.PBLx8`, i.e. controls whether PBL values are treated as 1-beat units (0) or 8-beat units (1, default). +- `_DSD\snps,pbl` (default = 8): Default value for `txpbl` and `rxpbl`. +- `_DSD\snps,txpbl` (default = `pbl`): Controls the value of `GMAC_DMA_CHx_Tx_Control.TxPBL`, i.e. transmit programmable burst length. +- `_DSD\snps,rxpbl` (default = `pbl`): Controls the value of `GMAC_DMA_CHx_Rx_Control.RxPBL`, i.e. receive programmable burst length. +- `_DSD\snps,fixed-burst` (default = 0): Controls the value of `GMAC_DMA_SysBus_Mode.FB`. +- `_DSD\snps,mixed-burst` (default = 1): Controls the value of `GMAC_DMA_SysBus_Mode.Bit14`. +- `_DSD\snps,axi-config` (default = none): Controls the `$(AXIC)` method name to use for the remaining properties. If not present, the driver will use default values for the remaining properties. Should generally be set to string `"AXIC"`. +- `$(AXIC)\snps,wr_osr_lmt` (default = 4): Controls the value of `GMAC_DMA_SysBus_Mode.WR_OSR_LMT`, i.e. AXI maximum write outstanding request limit. +- `$(AXIC)\snps,rd_osr_lmt` (default = 8): Controls the value of `GMAC_DMA_SysBus_Mode.RD_OSR_LMT`, i.e. AXI maximum read outstanding request limit. +- `$(AXIC)\snps,blen` (default = `{ 16, 8, 4 }`): Controls the values of `GMAC_DMA_SysBus_Mode.BLENx` (x = 4, 8, 16, 32, 64, 128, 256), i.e. AXI burst length. Should be a list of 7 integers, e.g. `Package () { 0, 0, 0, 0, 16, 8, 4 }`. + +## Areas for improvement: + +- Run against network test suites and fix any issues. +- Memory optimizations? Current implementation uses system-managed buffers. + System-managed buffer size is tied to MTU. When jumbo frames are enabled, + this is wasteful since most packets are still 1522 bytes or less. If we + used driver-managed buffers and updated the Rx queue to handle multi-buffer + packets, we could use 1536-byte or 2048-byte buffers for the Rx queue, saving + about 2MB per device when JumboPacket = 9014. +- Configure speed, duplex via Ndi\params? +- Power control, wake-on-LAN, ARP offload? +- Multi-queue, RSS support? +- Make it more generic (test with other EQoS-based SoCs)? diff --git a/drivers/net/dwc_eqos/descriptors.h b/drivers/net/dwc_eqos/descriptors.h index fca7785..1fe110d 100644 --- a/drivers/net/dwc_eqos/descriptors.h +++ b/drivers/net/dwc_eqos/descriptors.h @@ -21,6 +21,14 @@ enum TxChecksumInsertion : UINT16 TxChecksumInsertionEnabledIncludingPseudo = 3, }; +enum TxVlanTagControl : UINT16 +{ + TxVlanTagControlNone = 0, + TxVlanTagControlRemove = 1, + TxVlanTagControlInsert = 2, + TxVlanTagControlReplace = 3, +}; + struct TxDescriptorRead { // TDES0, TDES1 @@ -31,7 +39,7 @@ struct TxDescriptorRead // TDES2 UINT16 Buf1Length : 14; // B1L - UINT16 VlanTagControl : 2; // VTIR + TxVlanTagControl VlanTagControl : 2; // VTIR UINT16 Buf2Length : 14; // B2L UINT16 TransmitTimestampEnable : 1; // TTSE @@ -68,7 +76,7 @@ struct TxDescriptorReadTso // TDES2 UINT16 Buf1Length : 14; // B1L (10-bit header length if FD = 1) - UINT16 VlanTagControl : 2; // VTIR + TxVlanTagControl VlanTagControl : 2; // VTIR UINT16 Buf2Length : 14; // B2L UINT16 TsoMemoryWriteDisable : 1; // TMWD @@ -77,7 +85,7 @@ struct TxDescriptorReadTso // TDES3 UINT32 TcpPayloadLength : 18; // TPL - UINT32 TcpSegmentationEnable : 1; // TSE = 0 + UINT32 TcpSegmentationEnable : 1; // TSE = 1 UINT32 TcpHeaderLength : 4; // TCP/UDP header length (must be 2 for UDP) UINT32 SourceAddressInsertionControl : 3; // SAIC UINT32 Reserved26 : 2; // CPC, ignored when TSE = 1 @@ -159,7 +167,7 @@ struct TxDescriptorContext UINT8 VlanTagValid : 1; // VLTV UINT8 InnerVlanTagValid : 1; // IVLTV - UINT8 InnverVlanTagControl : 2; // IVTIR + UINT8 InnerVlanTagControl : 2; // IVTIR UINT8 Reserved20 : 3; UINT8 DescriptorError : 1; // DE diff --git a/drivers/net/dwc_eqos/device.cpp b/drivers/net/dwc_eqos/device.cpp index 97c1fdb..a187787 100644 --- a/drivers/net/dwc_eqos/device.cpp +++ b/drivers/net/dwc_eqos/device.cpp @@ -574,6 +574,25 @@ AdapterOffloadSetRxChecksum( TraceLoggingBoolean(Udp)); } +static EVT_NET_ADAPTER_OFFLOAD_SET_GSO AdapterOffloadSetGso; +static void +AdapterOffloadSetGso( + _In_ NETADAPTER adapter, + _In_ NETOFFLOAD offload) +{ + // PASSIVE_LEVEL, nonpaged (resume path) + UNREFERENCED_PARAMETER(adapter); + auto const LsoIPv4 = NetOffloadIsLsoIPv4Enabled(offload); + auto const LsoIPv6 = NetOffloadIsLsoIPv6Enabled(offload); + auto const UsoIPv4 = NetOffloadIsUsoIPv4Enabled(offload); + auto const UsoIPv6 = NetOffloadIsUsoIPv6Enabled(offload); + TraceEntryExit(AdapterOffloadSetGso, LEVEL_INFO, + TraceLoggingBoolean(LsoIPv4), + TraceLoggingBoolean(LsoIPv6), + TraceLoggingBoolean(UsoIPv4), + TraceLoggingBoolean(UsoIPv6)); +} + static EVT_WDF_DEVICE_D0_ENTRY DeviceD0Entry; static NTSTATUS DeviceD0Entry( @@ -644,7 +663,7 @@ DeviceD0Entry( //macConfig.PadOrCrcStripEnable = true; // Why doesn't this work? //macConfig.CrcStripEnableForType = true; // Why doesn't this work? macConfig.GiantPacketSizeLimitControlEnable = context->config.jumboFrame > JumboPacketMin; - macConfig.ChecksumOffloadEnable = context->config.txCoeSel || context->config.rxCoeSel; + macConfig.ChecksumOffloadEnable = true; Write32(&context->regs->Mac_Configuration, macConfig); MacExtConfiguration_t macExtConfig = {}; @@ -960,13 +979,35 @@ DevicePrepareHardware( status = STATUS_DEVICE_CONFIGURATION_ERROR; goto Done; } + + if (!context->feature0.TxChecksumOffload) + { + // Could adapt at runtime if needed, but assume it's present for now. + TraceWrite("DevicePrepareHardware-TxChecksumOffload-required", LEVEL_ERROR); + status = STATUS_DEVICE_CONFIGURATION_ERROR; + goto Done; + } + + if (!context->feature0.RxChecksumOffload) + { + // Could adapt at runtime if needed, but assume it's present for now. + TraceWrite("DevicePrepareHardware-RxChecksumOffload-required", LEVEL_ERROR); + status = STATUS_DEVICE_CONFIGURATION_ERROR; + goto Done; + } + + if (!context->feature1.TsoEn) + { + // Could adapt at runtime if needed, but assume it's present for now. + TraceWrite("DevicePrepareHardware-TsoEn-required", LEVEL_ERROR); + status = STATUS_DEVICE_CONFIGURATION_ERROR; + goto Done; + } } // Device Config { - context->config.txCoeSel = context->feature0.TxChecksumOffload; - context->config.rxCoeSel = context->feature0.RxChecksumOffload; context->config.pblX8 = true; context->config.pbl = 8; context->config.txPbl = context->config.pbl; @@ -1222,7 +1263,7 @@ DevicePrepareHardware( NET_ADAPTER_TX_CAPABILITIES txCaps; NET_ADAPTER_TX_CAPABILITIES_INIT_FOR_DMA(&txCaps, &dmaCaps, QueuesSupported); - txCaps.MaximumNumberOfFragments = QueueDescriptorMinCount - 2; // = 1 hole in the ring + 1 context descriptor. + txCaps.MaximumNumberOfFragments = TxMaximumNumberOfFragments; // TODO: Driver-managed buffering + multi-descriptor receive would // reduce memory overhead of Jumbo Packets. @@ -1246,21 +1287,18 @@ DevicePrepareHardware( NetPacketFilterFlagPromiscuous; NetAdapterSetReceiveFilterCapabilities(context->adapter, &rxFilterCaps); - if (context->config.txCoeSel) - { - NET_ADAPTER_OFFLOAD_TX_CHECKSUM_CAPABILITIES txChecksumCaps; - NET_ADAPTER_OFFLOAD_TX_CHECKSUM_CAPABILITIES_INIT(&txChecksumCaps, {}, AdapterOffloadSetTxChecksum); - txChecksumCaps.Layer3Flags = - NetAdapterOffloadLayer3FlagIPv4NoOptions | - NetAdapterOffloadLayer3FlagIPv4WithOptions | - NetAdapterOffloadLayer3FlagIPv6NoExtensions | - NetAdapterOffloadLayer3FlagIPv6WithExtensions; - txChecksumCaps.Layer4Flags = - NetAdapterOffloadLayer4FlagTcpNoOptions | - NetAdapterOffloadLayer4FlagTcpWithOptions | - NetAdapterOffloadLayer4FlagUdp; - NetAdapterOffloadSetTxChecksumCapabilities(context->adapter, &txChecksumCaps); - } + NET_ADAPTER_OFFLOAD_TX_CHECKSUM_CAPABILITIES txChecksumCaps; + NET_ADAPTER_OFFLOAD_TX_CHECKSUM_CAPABILITIES_INIT(&txChecksumCaps, + NetAdapterOffloadLayer3FlagIPv4NoOptions | + NetAdapterOffloadLayer3FlagIPv4WithOptions | + NetAdapterOffloadLayer3FlagIPv6NoExtensions | + NetAdapterOffloadLayer3FlagIPv6WithExtensions, + AdapterOffloadSetTxChecksum); + txChecksumCaps.Layer4Flags = + NetAdapterOffloadLayer4FlagTcpNoOptions | + NetAdapterOffloadLayer4FlagTcpWithOptions | + NetAdapterOffloadLayer4FlagUdp; + NetAdapterOffloadSetTxChecksumCapabilities(context->adapter, &txChecksumCaps); NET_ADAPTER_OFFLOAD_RX_CHECKSUM_CAPABILITIES rxChecksumCaps; NET_ADAPTER_OFFLOAD_RX_CHECKSUM_CAPABILITIES_INIT(&rxChecksumCaps, @@ -1272,6 +1310,21 @@ DevicePrepareHardware( NetAdapterOffloadIeee8021PriorityTaggingFlag | NetAdapterOffloadIeee8021VlanTaggingFlag); NetAdapterOffloadSetIeee8021qTagCapabilities(context->adapter, &ieee8021qCaps); + + NET_ADAPTER_OFFLOAD_GSO_CAPABILITIES gsoCaps; + NET_ADAPTER_OFFLOAD_GSO_CAPABILITIES_INIT(&gsoCaps, + NetAdapterOffloadLayer3FlagIPv4NoOptions | + NetAdapterOffloadLayer3FlagIPv4WithOptions | + NetAdapterOffloadLayer3FlagIPv6NoExtensions | + NetAdapterOffloadLayer3FlagIPv6WithExtensions, + NetAdapterOffloadLayer4FlagUdp | + NetAdapterOffloadLayer4FlagTcpNoOptions | + NetAdapterOffloadLayer4FlagTcpWithOptions, + TxMaximumOffloadSize, + 2, // MinimumSegmentCount + AdapterOffloadSetGso); + gsoCaps.Layer4HeaderOffsetLimit = TxLayer4HeaderOffsetLimit; + NetAdapterOffloadSetGsoCapabilities(context->adapter, &gsoCaps); } // Initialize adapter. diff --git a/drivers/net/dwc_eqos/device.h b/drivers/net/dwc_eqos/device.h index 94c0733..36741da 100644 --- a/drivers/net/dwc_eqos/device.h +++ b/drivers/net/dwc_eqos/device.h @@ -8,8 +8,6 @@ struct DeviceContext; // TODO: if we do multi-queue, make a DeviceQueueContext s // Information about the device provided to the queues. struct DeviceConfig { - bool txCoeSel; // MAC_HW_Feature0\TXCOESEL (hardware support for tx checksum offload). - bool rxCoeSel; // MAC_HW_Feature0\RXCOESEL (hardware support for rx checksum offload). bool pblX8; // _DSD\snps,pblx8 (default = 1). UINT8 pbl; // _DSD\snps,pbl (default = 8; effect depends on pblX8). UINT8 txPbl; // _DSD\snps,txpbl (default = pbl; effect depends on pblX8). @@ -21,7 +19,7 @@ struct DeviceConfig UINT8 blen : 7; // AXIC\snps,blen bitmask of 7 booleans 4..256 (default = 4, 8, 16). bool txFlowControl; // Adapter configuration (Ndi\params\*FlowControl). bool rxFlowControl; // Adapter configuration (Ndi\params\*FlowControl). - UINT16 jumboFrame; // Adapter configuration (Ndi\params\*JumboFrame). 1514..4088 + UINT16 jumboFrame; // Adapter configuration (Ndi\params\*JumboFrame). 1514..9014. UINT16 RxBufferSize() const { diff --git a/drivers/net/dwc_eqos/driver.cpp b/drivers/net/dwc_eqos/driver.cpp index ebb1588..c2acbcd 100644 --- a/drivers/net/dwc_eqos/driver.cpp +++ b/drivers/net/dwc_eqos/driver.cpp @@ -2,15 +2,6 @@ #include "device.h" #include "trace.h" -/* -Possible areas for improvement: -- Tx segmentation offload. -- Run against network test suites and fix any issues. -- Power control, wake-on-LAN, ARP offload. -- Configure speed, duplex in Ndi\params. -- Multi-queue support? -*/ - TRACELOGGING_DEFINE_PROVIDER( TraceProvider, "dwc_eqos", diff --git a/drivers/net/dwc_eqos/dwc_eqos.inf b/drivers/net/dwc_eqos/dwc_eqos.inf index 2e54266..3c05612 100644 --- a/drivers/net/dwc_eqos/dwc_eqos.inf +++ b/drivers/net/dwc_eqos/dwc_eqos.inf @@ -96,7 +96,7 @@ HKR, Ndi\params\*FlowControl, type, 0, "enum" HKR, Ndi\params\*FlowControl\enum, "0", 0, %Disabled% HKR, Ndi\params\*FlowControl\enum, "1", 0, %TxEnabled% HKR, Ndi\params\*FlowControl\enum, "2", 0, %RxEnabled% -HKR, Ndi\params\*FlowControl\enum, "3", 0, %TxRxEnabled% +HKR, Ndi\params\*FlowControl\enum, "3", 0, %RxTxEnabled% HKR, Ndi\Params\*PriorityVLANTag, ParamDesc, 0, %PriorityVlanTag% HKR, Ndi\Params\*PriorityVLANTag, Default, 0, "3" @@ -106,21 +106,69 @@ HKR, Ndi\Params\*PriorityVLANTag\enum, "1", 0, %PriorityEn HKR, Ndi\Params\*PriorityVLANTag\enum, "2", 0, %VlanEnabled% HKR, Ndi\Params\*PriorityVLANTag\enum, "3", 0, %PriorityVlanEnabled% -HKR, Ndi\params\*TCPUDPChecksumOffloadIPv4, ParamDesc, 0, %TCPUDPChecksumOffloadIPv4% -HKR, Ndi\params\*TCPUDPChecksumOffloadIPv4, default, 0, "3" -HKR, Ndi\params\*TCPUDPChecksumOffloadIPv4, type, 0, "enum" -HKR, Ndi\params\*TCPUDPChecksumOffloadIPv4\enum, "0", 0, %Disabled% -HKR, Ndi\params\*TCPUDPChecksumOffloadIPv4\enum, "1", 0, %TxEnabled% -HKR, Ndi\params\*TCPUDPChecksumOffloadIPv4\enum, "2", 0, %RxEnabled% -HKR, Ndi\params\*TCPUDPChecksumOffloadIPv4\enum, "3", 0, %TxRxEnabled% - -HKR, Ndi\params\*TCPUDPChecksumOffloadIPv6, ParamDesc, 0, %TCPUDPChecksumOffloadIPv6% -HKR, Ndi\params\*TCPUDPChecksumOffloadIPv6, default, 0, "3" -HKR, Ndi\params\*TCPUDPChecksumOffloadIPv6, type, 0, "enum" -HKR, Ndi\params\*TCPUDPChecksumOffloadIPv6\enum, "0", 0, %Disabled% -HKR, Ndi\params\*TCPUDPChecksumOffloadIPv6\enum, "1", 0, %TxEnabled% -HKR, Ndi\params\*TCPUDPChecksumOffloadIPv6\enum, "2", 0, %RxEnabled% -HKR, Ndi\params\*TCPUDPChecksumOffloadIPv6\enum, "3", 0, %TxRxEnabled% +HKR, Ndi\params\*IPChecksumOffloadIPv4, ParamDesc, 0, %IPChecksumOffloadIPv4% +HKR, Ndi\params\*IPChecksumOffloadIPv4, default, 0, "3" +HKR, Ndi\params\*IPChecksumOffloadIPv4, type, 0, "enum" +HKR, Ndi\params\*IPChecksumOffloadIPv4\enum, "0", 0, %Disabled% +HKR, Ndi\params\*IPChecksumOffloadIPv4\enum, "1", 0, %TxEnabled% +HKR, Ndi\params\*IPChecksumOffloadIPv4\enum, "2", 0, %RxEnabled% +HKR, Ndi\params\*IPChecksumOffloadIPv4\enum, "3", 0, %RxTxEnabled% + +HKR, Ndi\params\*TCPChecksumOffloadIPv4, ParamDesc, 0, %TCPChecksumOffloadIPv4% +HKR, Ndi\params\*TCPChecksumOffloadIPv4, default, 0, "3" +HKR, Ndi\params\*TCPChecksumOffloadIPv4, type, 0, "enum" +HKR, Ndi\params\*TCPChecksumOffloadIPv4\enum, "0", 0, %Disabled% +HKR, Ndi\params\*TCPChecksumOffloadIPv4\enum, "1", 0, %TxEnabled% +HKR, Ndi\params\*TCPChecksumOffloadIPv4\enum, "2", 0, %RxEnabled% +HKR, Ndi\params\*TCPChecksumOffloadIPv4\enum, "3", 0, %RxTxEnabled% + +HKR, Ndi\params\*UDPChecksumOffloadIPv4, ParamDesc, 0, %UDPChecksumOffloadIPv4% +HKR, Ndi\params\*UDPChecksumOffloadIPv4, default, 0, "3" +HKR, Ndi\params\*UDPChecksumOffloadIPv4, type, 0, "enum" +HKR, Ndi\params\*UDPChecksumOffloadIPv4\enum, "0", 0, %Disabled% +HKR, Ndi\params\*UDPChecksumOffloadIPv4\enum, "1", 0, %TxEnabled% +HKR, Ndi\params\*UDPChecksumOffloadIPv4\enum, "2", 0, %RxEnabled% +HKR, Ndi\params\*UDPChecksumOffloadIPv4\enum, "3", 0, %RxTxEnabled% + +HKR, Ndi\params\*TCPChecksumOffloadIPv6, ParamDesc, 0, %TCPChecksumOffloadIPv6% +HKR, Ndi\params\*TCPChecksumOffloadIPv6, default, 0, "3" +HKR, Ndi\params\*TCPChecksumOffloadIPv6, type, 0, "enum" +HKR, Ndi\params\*TCPChecksumOffloadIPv6\enum, "0", 0, %Disabled% +HKR, Ndi\params\*TCPChecksumOffloadIPv6\enum, "1", 0, %TxEnabled% +HKR, Ndi\params\*TCPChecksumOffloadIPv6\enum, "2", 0, %RxEnabled% +HKR, Ndi\params\*TCPChecksumOffloadIPv6\enum, "3", 0, %RxTxEnabled% + +HKR, Ndi\params\*UDPChecksumOffloadIPv6, ParamDesc, 0, %UDPChecksumOffloadIPv6% +HKR, Ndi\params\*UDPChecksumOffloadIPv6, default, 0, "3" +HKR, Ndi\params\*UDPChecksumOffloadIPv6, type, 0, "enum" +HKR, Ndi\params\*UDPChecksumOffloadIPv6\enum, "0", 0, %Disabled% +HKR, Ndi\params\*UDPChecksumOffloadIPv6\enum, "1", 0, %TxEnabled% +HKR, Ndi\params\*UDPChecksumOffloadIPv6\enum, "2", 0, %RxEnabled% +HKR, Ndi\params\*UDPChecksumOffloadIPv6\enum, "3", 0, %RxTxEnabled% + +HKR, Ndi\Params\*LsoV2IPv4, ParamDesc, 0, %LsoV2IPv4% +HKR, Ndi\Params\*LsoV2IPv4, Type, 0, "enum" +HKR, Ndi\Params\*LsoV2IPv4, Default, 0, "1" +HKR, Ndi\Params\*LsoV2IPv4\enum, "0", 0, %Disabled% +HKR, Ndi\Params\*LsoV2IPv4\enum, "1", 0, %Enabled% + +HKR, Ndi\Params\*LsoV2IPv6, ParamDesc, 0, %LsoV2IPv6% +HKR, Ndi\Params\*LsoV2IPv6, Type, 0, "enum" +HKR, Ndi\Params\*LsoV2IPv6, Default, 0, "1" +HKR, Ndi\Params\*LsoV2IPv6\enum, "0", 0, %Disabled% +HKR, Ndi\Params\*LsoV2IPv6\enum, "1", 0, %Enabled% + +HKR, Ndi\Params\*UsoIPv4, ParamDesc, 0, %UsoIPv4% +HKR, Ndi\Params\*UsoIPv4, Type, 0, "enum" +HKR, Ndi\Params\*UsoIPv4, Default, 0, "1" +HKR, Ndi\Params\*UsoIPv4\enum, "0", 0, %Disabled% +HKR, Ndi\Params\*UsoIPv4\enum, "1", 0, %Enabled% + +HKR, Ndi\Params\*UsoIPv6, ParamDesc, 0, %UsoIPv6% +HKR, Ndi\Params\*UsoIPv6, Type, 0, "enum" +HKR, Ndi\Params\*UsoIPv6, Default, 0, "1" +HKR, Ndi\Params\*UsoIPv6\enum, "0", 0, %Disabled% +HKR, Ndi\Params\*UsoIPv6\enum, "1", 0, %Enabled% [DWCEQOS_Device.NT.Services] AddService = %ServiceName%, 2, DWCEQOS_AddService, DWCEQOS_AddService_EventLog @@ -158,12 +206,20 @@ NetworkAddress = "Network Address" JumboPacket = "Jumbo Packet" FlowControl = "Flow Control" PriorityVlanTag = "Packet Priority & VLAN" -TCPUDPChecksumOffloadIPv4 = "TCP/UDP Checksum Offload (IPv4)" -TCPUDPChecksumOffloadIPv6 = "TCP/UDP Checksum Offload (IPv6)" +IPChecksumOffloadIPv4 = "IPv4 Checksum Offload" +TCPChecksumOffloadIPv4 = "TCP Checksum Offload (IPv4)" +TCPChecksumOffloadIPv6 = "TCP Checksum Offload (IPv6)" +UDPChecksumOffloadIPv4 = "UDP Checksum Offload (IPv4)" +UDPChecksumOffloadIPv6 = "UDP Checksum Offload (IPv6)" +LsoV2IPv4 = "Large Send Offload V2 (IPv4)" +LsoV2IPv6 = "Large Send Offload V2 (IPv6)" +UsoIPv4 = "UDP Segmentation Offload (IPv4)" +UsoIPv6 = "UDP Segmentation Offload (IPV6)" Disabled = "Disabled" +Enabled = "Enabled" TxEnabled = "Tx Enabled" RxEnabled = "Rx Enabled" -TxRxEnabled = "Tx and Rx Enabled" +RxTxEnabled = "Rx & Tx Enabled" PriorityEnabled = "Packet Priority Enabled" VlanEnabled = "VLAN Enabled" PriorityVlanEnabled = "Packet Priority & VLAN Enabled" diff --git a/drivers/net/dwc_eqos/dwc_eqos.vcxproj b/drivers/net/dwc_eqos/dwc_eqos.vcxproj index e546bd7..7acdadd 100644 --- a/drivers/net/dwc_eqos/dwc_eqos.vcxproj +++ b/drivers/net/dwc_eqos/dwc_eqos.vcxproj @@ -151,6 +151,10 @@ + + + + diff --git a/drivers/net/dwc_eqos/dwc_eqos.vcxproj.filters b/drivers/net/dwc_eqos/dwc_eqos.vcxproj.filters index 0ca2068..f1f65d4 100644 --- a/drivers/net/dwc_eqos/dwc_eqos.vcxproj.filters +++ b/drivers/net/dwc_eqos/dwc_eqos.vcxproj.filters @@ -84,4 +84,12 @@ Driver Files + + + Driver Files + + + Driver Files + + \ No newline at end of file diff --git a/drivers/net/dwc_eqos/precomp.h b/drivers/net/dwc_eqos/precomp.h index f555bce..4147404 100644 --- a/drivers/net/dwc_eqos/precomp.h +++ b/drivers/net/dwc_eqos/precomp.h @@ -8,6 +8,7 @@ #pragma warning(pop) #include +#include #include #include #include diff --git a/drivers/net/dwc_eqos/queue_common.h b/drivers/net/dwc_eqos/queue_common.h index 6906e04..77a2fbf 100644 --- a/drivers/net/dwc_eqos/queue_common.h +++ b/drivers/net/dwc_eqos/queue_common.h @@ -3,9 +3,10 @@ Definitions shared between TxQueue and RxQueue. */ #pragma once -UINT32 constexpr QueueDescriptorSize = 64; // 64 == sizeof(TxDescriptor) == sizeof(RxDescriptor) -UINT32 constexpr QueueDescriptorMinCount = PAGE_SIZE / QueueDescriptorSize; // Page granularity for allocation, might as well use the whole page. -UINT32 constexpr QueueDescriptorMaxCount = 0x400; // Hardware limitation. +UINT16 constexpr QueueDescriptorSize = 64; // 64 == sizeof(TxDescriptor) == sizeof(RxDescriptor) +UINT16 constexpr QueueDescriptorMinCount = PAGE_SIZE / QueueDescriptorSize; // Page granularity for allocation, might as well use the whole page. +UINT16 constexpr QueueDescriptorMaxCount = 0x400; // Hardware limitation. +UINT16 constexpr QueueDescriptorLengthMax = 0x3FFF; // 14 bits. // Alignment is primarily to make sure the allocation does not cross a 4GB boundary. // It also simplifies the QueueDescriptorAddressToIndex implementation. diff --git a/drivers/net/dwc_eqos/trace.h b/drivers/net/dwc_eqos/trace.h index 9a4845f..15c7992 100644 --- a/drivers/net/dwc_eqos/trace.h +++ b/drivers/net/dwc_eqos/trace.h @@ -8,9 +8,11 @@ Collect traces using something like: tracefmt FileName.etl Send traces to KD by running "!wmitrace.dynamicprint 1" in WinDbg and starting a trace -session with -kd enabled, either via trace tool or via autologger: +session with -kd enabled, either via trace tool, tracelog -start SessionName -guid *dwc_eqos -level 4 -kd + +or by setting up a boot-start session using the AutoLogger.reg file in this project. */ #pragma once diff --git a/drivers/net/dwc_eqos/txqueue.cpp b/drivers/net/dwc_eqos/txqueue.cpp index a15a2bc..740a95b 100644 --- a/drivers/net/dwc_eqos/txqueue.cpp +++ b/drivers/net/dwc_eqos/txqueue.cpp @@ -19,13 +19,14 @@ struct TxQueueContext TxDescriptor* descVirtual; PHYSICAL_ADDRESS descPhysical; NET_EXTENSION packetIeee8021Q; + NET_EXTENSION packetGso; NET_EXTENSION packetChecksum; NET_EXTENSION fragmentLogical; UINT32 descCount; // A power of 2 between QueueDescriptorMinCount and QueueDescriptorMaxCount. UINT8 txPbl; - bool txChecksumOffload; - UINT16 lastVlanTag; + UINT16 lastMss; // MSS set by the most recent context descriptor. + UINT16 lastVlanTag; // VLAN tag set by the most recent context descriptor. UINT32 descBegin; // Start of the TRANSMIT region. UINT32 descEnd; // End of the TRANSMIT region, start of the EMPTY region. }; @@ -64,6 +65,7 @@ TxQueueStart(_In_ NETPACKETQUEUE queue) // PASSIVE_LEVEL, nonpaged (resume path) auto const context = TxQueueGetContext(queue); + context->lastMss = 0; // Make no assumptions about the device's current MSS. context->lastVlanTag = 0; // Make no assumptions about the device's current vlan tag. context->descBegin = 0; context->descEnd = 0; @@ -81,6 +83,8 @@ TxQueueStart(_In_ NETPACKETQUEUE queue) ChannelTxControl_t txControl = {}; txControl.Start = true; txControl.OperateOnSecondPacket = true; + txControl.TcpSegmentation = true; + txControl.TcpSegmentationMode = 0; // TSO+USO mode. txControl.TxPbl = context->txPbl; Write32(&context->channelRegs->Tx_Control, txControl); @@ -88,27 +92,26 @@ TxQueueStart(_In_ NETPACKETQUEUE queue) } // Starting at pktIndex, scan forward until we reach packetRing->NextIndex or a -// non-ignored packet. If we reach a non-ignored packet, sets *pktLastFrag to the last -// fragment of the found packet and returns the packet's index. Otherwise, sets -// *pktLastFrag to 0xFFFFFFFF and returns packetRing->NextIndex. +// non-ignored packet. If we reach a non-ignored packet, set *pktFragIndex to +// pkt->FragmentIndex andreturn the packet's index. Otherwise, leave +// *pktFragIndex unmodified and return packetRing->NextIndex. static UINT32 -SkipIgnorePackets(UINT32 pktIndex, _In_ TxQueueContext const* context, _Out_ UINT32* pktLastFrag) +SkipIgnorePackets(_In_ NET_RING const* packetRing, UINT32 pktIndex, _Inout_ UINT32* pktFragIndex) { // DISPATCH_LEVEL - auto const pktNext = context->packetRing->NextIndex; - for (; pktIndex != pktNext; pktIndex = NetRingIncrementIndex(context->packetRing, pktIndex)) + auto const pktNext = packetRing->NextIndex; + for (; pktIndex != pktNext; pktIndex = NetRingIncrementIndex(packetRing, pktIndex)) { - auto const pkt = NetRingGetPacketAtIndex(context->packetRing, pktIndex); + auto const pkt = NetRingGetPacketAtIndex(packetRing, pktIndex); if (!pkt->Ignore) { NT_ASSERT(pkt->FragmentCount != 0); - *pktLastFrag = NetRingAdvanceIndex(context->fragmentRing, pkt->FragmentIndex, pkt->FragmentCount - 1u); - return pktIndex; + *pktFragIndex = pkt->FragmentIndex; + break; } } - *pktLastFrag = 0xFFFFFFFF; - return pktNext; + return pktIndex; } static EVT_PACKET_QUEUE_ADVANCE TxQueueAdvance; @@ -119,7 +122,7 @@ TxQueueAdvance(_In_ NETPACKETQUEUE queue) auto const context = TxQueueGetContext(queue); auto const descMask = context->descCount - 1u; auto const descEnd = context->descEnd; - UINT32 descIndex, pktIndex, pktLastFrag, fragIndex; + UINT32 descIndex, pktIndex, pktFragIndex; UINT32 doneFrags = 0, queuedFrags = 0; /* @@ -133,13 +136,12 @@ TxQueueAdvance(_In_ NETPACKETQUEUE queue) // Indicate transmitted packets. // Process any descriptors that the adapter has handed back to us. - // Release the corresponding fragments and packets. - pktIndex = SkipIgnorePackets(context->packetRing->BeginIndex, context, &pktLastFrag); - fragIndex = context->fragmentRing->BeginIndex; + pktFragIndex = context->fragmentRing->BeginIndex; + pktIndex = context->packetRing->BeginIndex; + pktIndex = SkipIgnorePackets(context->packetRing, pktIndex, &pktFragIndex); for (descIndex = context->descBegin; descIndex != descEnd; descIndex = (descIndex + 1) & descMask) { NT_ASSERT(pktIndex != context->packetRing->NextIndex); - NT_ASSERT(pktLastFrag != 0xFFFFFFFF); auto const& desc = context->descVirtual[descIndex]; auto const descWrite = desc.Write; @@ -151,7 +153,6 @@ TxQueueAdvance(_In_ NETPACKETQUEUE queue) } NT_ASSERT(descWrite.PacketIndex == pktIndex); - NT_ASSERT(descWrite.FragmentIndex == fragIndex); if (descWrite.ContextType) { @@ -159,33 +160,29 @@ TxQueueAdvance(_In_ NETPACKETQUEUE queue) continue; } - if (descWrite.ErrorSummary || - descWrite.ContextType || - descWrite.DescriptorError || - (descWrite.LastDescriptor != 0) != (fragIndex == pktLastFrag)) + if (descWrite.LastDescriptor) { - TraceWrite("TxQueueAdvance-error", LEVEL_ERROR, - TraceLoggingUInt32(descIndex), - TraceLoggingHexInt32(reinterpret_cast(&descWrite)[3], "TDES3"), - TraceLoggingUInt32(fragIndex), - TraceLoggingUInt32(pktLastFrag)); - } - - // Non-context descriptors map one-to-one with fragments. + if (descWrite.ErrorSummary || + descWrite.ContextType || + descWrite.DescriptorError) + { + TraceWrite("TxQueueAdvance-error", LEVEL_ERROR, + TraceLoggingUInt32(descIndex), + TraceLoggingHexInt32(reinterpret_cast(&descWrite)[3], "TDES3"), + TraceLoggingUInt32(pktIndex)); + } - if (fragIndex == pktLastFrag) - { // Packet is complete. Move to the next one. - pktIndex = SkipIgnorePackets(NetRingIncrementIndex(context->packetRing, pktIndex), context, &pktLastFrag); + auto const pkt = NetRingGetPacketAtIndex(context->packetRing, pktIndex); + pktFragIndex = NetRingAdvanceIndex(context->fragmentRing, pkt->FragmentIndex, pkt->FragmentCount); + pktIndex = NetRingIncrementIndex(context->packetRing, pktIndex); + pktIndex = SkipIgnorePackets(context->packetRing, pktIndex, &pktFragIndex); } - - // Fragment is complete. Move to the next one. - fragIndex = NetRingIncrementIndex(context->fragmentRing, fragIndex); } // Return the completed packets and fragments to NetAdapterCx. context->packetRing->BeginIndex = pktIndex; - context->fragmentRing->BeginIndex = fragIndex; + context->fragmentRing->BeginIndex = pktFragIndex; auto const descBegin = descIndex; context->descBegin = descBegin; @@ -194,15 +191,12 @@ TxQueueAdvance(_In_ NETPACKETQUEUE queue) pktIndex = context->packetRing->NextIndex; - // Number of EMPTY is (descBegin-1) - descEnd (wrapping around if necessary). auto const pktEnd = context->packetRing->EndIndex; - auto const txChecksumOffload = context->txChecksumOffload; - auto descEmpty = (descBegin - 1 - descEnd) & descMask; descIndex = descEnd; - while (descEmpty != 0) - { - NT_ASSERT(descIndex != ((descBegin - 1) & descMask)); +#define EMPTY_DESC_REMAINING(descIndex) ((descBegin - 1u - descIndex) & descMask) // -1 because ring has a hole. + while (EMPTY_DESC_REMAINING(descIndex) != 0) + { if (pktIndex == pktEnd) { break; @@ -211,100 +205,246 @@ TxQueueAdvance(_In_ NETPACKETQUEUE queue) auto const pkt = NetRingGetPacketAtIndex(context->packetRing, pktIndex); if (!pkt->Ignore) { - fragIndex = pkt->FragmentIndex; - - // If checksum offload is disabled by hardware then we can't call - // NetExtensionGetPacketChecksum because device.cpp didn't call - // NetAdapterOffloadSetTxChecksumCapabilities. - // If offload is disabled by software then the extension will be zeroed. - auto const checksum = txChecksumOffload - ? *NetExtensionGetPacketChecksum(&context->packetChecksum, pktIndex) - : NET_PACKET_CHECKSUM{}; // Disabled by hardware. - auto const checksumInsertion = - checksum.Layer4 ? TxChecksumInsertionEnabledIncludingPseudo - : checksum.Layer3 ? TxChecksumInsertionEnabledHeaderOnly - : TxChecksumInsertionDisabled; + UINT32 fragIndex = pkt->FragmentIndex; UINT32 const fragmentCount = pkt->FragmentCount; NT_ASSERT(fragmentCount != 0); - auto const ieee8021Q = NetExtensionGetPacketIeee8021Q(&context->packetIeee8021Q, pktIndex); - if (ieee8021Q->TxTagging != 0) + auto const ieee8021Q = *NetExtensionGetPacketIeee8021Q(&context->packetIeee8021Q, pktIndex); + + auto const gso = NetExtensionGetPacketGso(&context->packetGso, pktIndex); + NT_ASSERT(gso->TCP.Mss <= 0x3FFF); // 14 bits + auto const gsoMss = static_cast(gso->TCP.Mss); + auto const vlanTagControl = ieee8021Q.TxTagging != 0 ? TxVlanTagControlInsert : TxVlanTagControlNone; + + if (gsoMss != 0) // segmentation offload enabled { - UINT16 const newTag = (ieee8021Q->PriorityCodePoint << 13) | ieee8021Q->VlanIdentifier; - NT_ASSERT(newTag != 0); - if (newTag == context->lastVlanTag) + auto const layout = pkt->Layout; + NT_ASSERT(layout.Layer4Type == NetPacketLayer4TypeTcp || layout.Layer4Type == NetPacketLayer4TypeUdp); + if (layout.Layer4Type == NetPacketLayer4TypeTcp) { - goto NoContextPacket; + NT_ASSERT(layout.Layer4HeaderLength >= 4u * 5u); + NT_ASSERT(layout.Layer4HeaderLength <= 4u * 15u); + } + else + { + NT_ASSERT(layout.Layer4HeaderLength == sizeof(UINT32) * 2u); } - if (fragmentCount + 1 > descEmpty) + // TSO/USO: Headers up to the payload. + UINT16 const headerLength = layout.Layer2HeaderLength + layout.Layer3HeaderLength + layout.Layer4HeaderLength; + NT_ASSERT(headerLength <= TxLayer4HeaderOffsetLimit); + + UINT32 packetLength = 0; + unsigned userDescriptorsNeeded = 0; + for (unsigned i = 0, fragIndex2 = fragIndex; i != fragmentCount; i += 1) { + auto const fragLength = static_cast(NetRingGetFragmentAtIndex(context->fragmentRing, fragIndex2)->ValidLength); + packetLength += fragLength; + userDescriptorsNeeded += (fragLength + QueueDescriptorLengthMax - 1u) / QueueDescriptorLengthMax; + NT_ASSERT(packetLength <= TxMaximumOffloadSize + headerLength); + fragIndex2 = NetRingIncrementIndex(context->fragmentRing, fragIndex2); + } + NT_ASSERT(headerLength <= packetLength); + NT_ASSERT(userDescriptorsNeeded <= TxMaximumNumberOfFragments); + + UINT16 newTag; + if (ieee8021Q.TxTagging != 0) + { + newTag = (ieee8021Q.PriorityCodePoint << 13) | ieee8021Q.VlanIdentifier; + NT_ASSERT(newTag != 0); + } + else + { + newTag = context->lastVlanTag; + } + + bool const contextTagNeeded = newTag != context->lastVlanTag; + bool const contextMssNeeded = gsoMss != context->lastMss; + bool const contextNeeded = contextTagNeeded || contextMssNeeded; + + // We might need a context descriptor. + // We will need a header descriptor. + if (userDescriptorsNeeded + contextNeeded + 1u > EMPTY_DESC_REMAINING(descIndex)) + { + // Wait until more descriptors are free. break; } - TxDescriptorContext descCtx = {}; - descCtx.VlanTag = newTag; - descCtx.VlanTagValid = true; - descCtx.ContextType = true; - descCtx.Own = true; + if (contextNeeded) + { + TxDescriptorContext descCtx = {}; + descCtx.MaximumSegmentSize = gsoMss; + descCtx.VlanTag = newTag; + descCtx.VlanTagValid = contextTagNeeded; + descCtx.OneStepInputOrMssValid = true; + descCtx.ContextType = true; + descCtx.Own = true; #if DBG - descCtx.PacketIndex = pktIndex; - descCtx.FragmentIndex = fragIndex; + descCtx.PacketIndex = pktIndex; + descCtx.FragmentIndex = fragIndex; #endif - context->descVirtual[descIndex].Context = descCtx; + NT_ASSERT(EMPTY_DESC_REMAINING(descIndex) != 0); + context->descVirtual[descIndex].Context = descCtx; + descIndex = (descIndex + 1) & descMask; + context->lastMss = gsoMss; + context->lastVlanTag = newTag; + } + + auto const frag0LogicalAddress = NetExtensionGetFragmentLogicalAddress(&context->fragmentLogical, fragIndex)->LogicalAddress; + NT_ASSERT(headerLength <= NetRingGetFragmentAtIndex(context->fragmentRing, fragIndex)->ValidLength); + + TxDescriptorReadTso descTso = {}; + descTso.Buf1Ap = static_cast(frag0LogicalAddress); + descTso.Buf2Ap = static_cast(frag0LogicalAddress >> 32); + descTso.Buf1Length = headerLength; + descTso.VlanTagControl = vlanTagControl; + descTso.TcpPayloadLength = packetLength - headerLength; + descTso.TcpSegmentationEnable = true; + descTso.TcpHeaderLength = layout.Layer4HeaderLength / 4u; + descTso.FirstDescriptor = true; + descTso.Own = true; +#if DBG + descTso.PacketIndex = pktIndex; + descTso.FragmentIndex = fragIndex; +#endif + + NT_ASSERT(EMPTY_DESC_REMAINING(descIndex) != 0); + context->descVirtual[descIndex].ReadTso = descTso; descIndex = (descIndex + 1) & descMask; - descEmpty -= 1; + + unsigned nextFragmentStart = headerLength; + for (unsigned i = 0; i != fragmentCount; i += 1) + { + unsigned fragPos = nextFragmentStart; // Skip header for first fragment. + nextFragmentStart = 0; // Don't skip header for subsequent fragments. + auto const frag = NetRingGetFragmentAtIndex(context->fragmentRing, fragIndex); + auto const fragLogicalAddress = NetExtensionGetFragmentLogicalAddress(&context->fragmentLogical, fragIndex)->LogicalAddress; + auto const fragLength = static_cast(frag->ValidLength); + NT_ASSERT(fragLength != 0); // Otherwise we might not set LastDescriptor. + + while (fragPos != fragLength) + { + auto const bufLogicalAddress = fragLogicalAddress + fragPos; + auto const bufLength = fragLength - fragPos < QueueDescriptorLengthMax + ? static_cast(fragLength - fragPos) + : QueueDescriptorLengthMax; + fragPos += bufLength; + auto const lastDesc = fragPos == fragLength && i == fragmentCount - 1u; + + TxDescriptorRead descRead = {}; + descRead.Buf1Ap = static_cast(bufLogicalAddress); + descRead.Buf2Ap = static_cast(bufLogicalAddress >> 32); + descRead.Buf1Length = bufLength; + descRead.VlanTagControl = vlanTagControl; + descRead.InterruptOnCompletion = lastDesc; + descRead.LastDescriptor = lastDesc; + descRead.FirstDescriptor = false; + descRead.Own = true; +#if DBG + descRead.PacketIndex = pktIndex; + descRead.FragmentIndex = fragIndex; +#endif + + NT_ASSERT(EMPTY_DESC_REMAINING(descIndex) != 0); + context->descVirtual[descIndex].Read = descRead; + descIndex = (descIndex + 1) & descMask; + } + + fragIndex = NetRingIncrementIndex(context->fragmentRing, fragIndex); + queuedFrags += 1; + } } - else + else // segmentation offload disabled { - NoContextPacket: + // If offload is disabled by software then the extension will be zeroed. + auto const checksum = *NetExtensionGetPacketChecksum(&context->packetChecksum, pktIndex); + auto const checksumInsertion = + checksum.Layer4 ? TxChecksumInsertionEnabledIncludingPseudo + : checksum.Layer3 ? TxChecksumInsertionEnabledHeaderOnly + : TxChecksumInsertionDisabled; + + if (ieee8021Q.TxTagging != 0) + { + UINT16 const newTag = (ieee8021Q.PriorityCodePoint << 13) | ieee8021Q.VlanIdentifier; + NT_ASSERT(newTag != 0); + if (newTag == context->lastVlanTag) + { + goto NoContextPacket; + } + + // We will need a context descriptor. + if (fragmentCount + 1u > EMPTY_DESC_REMAINING(descIndex)) + { + // Wait until more descriptors are free. + break; + } + + TxDescriptorContext descCtx = {}; + descCtx.MaximumSegmentSize = context->lastMss; + descCtx.VlanTag = newTag; + descCtx.VlanTagValid = true; + descCtx.ContextType = true; + descCtx.Own = true; +#if DBG + descCtx.PacketIndex = pktIndex; + descCtx.FragmentIndex = fragIndex; +#endif - if (fragmentCount > descEmpty) + NT_ASSERT(EMPTY_DESC_REMAINING(descIndex) != 0); + context->descVirtual[descIndex].Context = descCtx; + descIndex = (descIndex + 1) & descMask; + context->lastVlanTag = newTag; + } + else { - break; + NoContextPacket: + + if (fragmentCount > EMPTY_DESC_REMAINING(descIndex)) + { + // Wait until more descriptors are free. + break; + } } - } - UINT32 frameLength = 0; - for (unsigned i = 0, fragIndex2 = fragIndex; i != fragmentCount; i += 1) - { - frameLength += NetRingGetFragmentAtIndex(context->fragmentRing, fragIndex2)->ValidLength & 0x03FFFFFF; // 26 bits - fragIndex2 = NetRingIncrementIndex(context->fragmentRing, fragIndex2); - } - NT_ASSERT(frameLength <= 0x7FFF); - frameLength &= 0x7FFF; + UINT32 frameLength = 0; + for (unsigned i = 0, fragIndex2 = fragIndex; i != fragmentCount; i += 1) + { + frameLength += static_cast(NetRingGetFragmentAtIndex(context->fragmentRing, fragIndex2)->ValidLength); + NT_ASSERT(frameLength <= 0x7FFF); + fragIndex2 = NetRingIncrementIndex(context->fragmentRing, fragIndex2); + } - for (unsigned i = 0; i != fragmentCount; i += 1) - { - auto const frag = NetRingGetFragmentAtIndex(context->fragmentRing, fragIndex); - NT_ASSERT(frag->ValidLength <= frag->Capacity - frag->Offset); - auto const fragLogicalAddress = NetExtensionGetFragmentLogicalAddress(&context->fragmentLogical, fragIndex)->LogicalAddress; - - TxDescriptorRead descRead = {}; - descRead.Buf1Ap = static_cast(fragLogicalAddress); - descRead.Buf2Ap = static_cast(fragLogicalAddress >> 32); - NT_ASSERT(frag->ValidLength <= 0x3FFF); // 14 bits - descRead.Buf1Length = frag->ValidLength & 0x3FFF; - descRead.InterruptOnCompletion = i == fragmentCount - 1u; - descRead.FrameLength = static_cast(frameLength); - descRead.ChecksumInsertion = checksumInsertion; - descRead.LastDescriptor = i == fragmentCount - 1u; - descRead.FirstDescriptor = i == 0; - descRead.Own = true; + for (unsigned i = 0; i != fragmentCount; i += 1) + { + auto const frag = NetRingGetFragmentAtIndex(context->fragmentRing, fragIndex); + auto const fragLogicalAddress = NetExtensionGetFragmentLogicalAddress(&context->fragmentLogical, fragIndex)->LogicalAddress; + + TxDescriptorRead descRead = {}; + descRead.Buf1Ap = static_cast(fragLogicalAddress); + descRead.Buf2Ap = static_cast(fragLogicalAddress >> 32); + NT_ASSERT(frag->ValidLength <= QueueDescriptorLengthMax); // 14 bits + descRead.Buf1Length = frag->ValidLength; + descRead.VlanTagControl = vlanTagControl; + descRead.InterruptOnCompletion = i == fragmentCount - 1u; + descRead.FrameLength = static_cast(frameLength); + descRead.ChecksumInsertion = checksumInsertion; + descRead.LastDescriptor = i == fragmentCount - 1u; + descRead.FirstDescriptor = i == 0; + descRead.Own = true; #if DBG - descRead.PacketIndex = pktIndex; - descRead.FragmentIndex = fragIndex; + descRead.PacketIndex = pktIndex; + descRead.FragmentIndex = fragIndex; #endif - context->descVirtual[descIndex].Read = descRead; - descIndex = (descIndex + 1) & descMask; - fragIndex = NetRingIncrementIndex(context->fragmentRing, fragIndex); - queuedFrags += 1; - } - - descEmpty -= fragmentCount; + NT_ASSERT(EMPTY_DESC_REMAINING(descIndex) != 0); + context->descVirtual[descIndex].Read = descRead; + descIndex = (descIndex + 1) & descMask; + fragIndex = NetRingIncrementIndex(context->fragmentRing, fragIndex); + queuedFrags += 1; + } + } // segmentation offload enabled/disabled } pktIndex = NetRingIncrementIndex(context->packetRing, pktIndex); @@ -496,7 +636,6 @@ TxQueueCreate( context->fragmentRing = NetRingCollectionGetFragmentRing(rings); context->descCount = QueueDescriptorCount(context->fragmentRing->NumberOfElements); context->txPbl = deviceConfig.txPbl; - context->txChecksumOffload = deviceConfig.txCoeSel; TraceWrite("TxQueueCreate-size", LEVEL_VERBOSE, TraceLoggingHexInt32(context->packetRing->NumberOfElements, "packets"), @@ -535,14 +674,17 @@ TxQueueCreate( NetExtensionTypePacket); NetTxQueueGetExtension(queue, &query, &context->packetIeee8021Q); - if (context->txChecksumOffload) - { - NET_EXTENSION_QUERY_INIT(&query, - NET_PACKET_EXTENSION_CHECKSUM_NAME, - NET_PACKET_EXTENSION_CHECKSUM_VERSION_1, - NetExtensionTypePacket); - NetTxQueueGetExtension(queue, &query, &context->packetChecksum); - } + NET_EXTENSION_QUERY_INIT(&query, + NET_PACKET_EXTENSION_GSO_NAME, + NET_PACKET_EXTENSION_GSO_VERSION_1, + NetExtensionTypePacket); + NetTxQueueGetExtension(queue, &query, &context->packetGso); + + NET_EXTENSION_QUERY_INIT(&query, + NET_PACKET_EXTENSION_CHECKSUM_NAME, + NET_PACKET_EXTENSION_CHECKSUM_VERSION_1, + NetExtensionTypePacket); + NetTxQueueGetExtension(queue, &query, &context->packetChecksum); NET_EXTENSION_QUERY_INIT(&query, NET_FRAGMENT_EXTENSION_LOGICAL_ADDRESS_NAME, diff --git a/drivers/net/dwc_eqos/txqueue.h b/drivers/net/dwc_eqos/txqueue.h index d8f5844..80cf7ec 100644 --- a/drivers/net/dwc_eqos/txqueue.h +++ b/drivers/net/dwc_eqos/txqueue.h @@ -2,12 +2,19 @@ Transmit queue behavior. Similar to the receive queue. */ #pragma once +#include "queue_common.h" struct DeviceContext; struct DeviceConfig; struct ChannelRegisters; struct MtlQueueRegisters; +// 3 = 1 hole in the ring + 1 context descriptor + 1 TSE descriptor. +UINT32 constexpr TxMaximumNumberOfFragments = QueueDescriptorMinCount - 3; + +UINT16 constexpr TxLayer4HeaderOffsetLimit = 0x3FF; // 10 bits. +UINT32 constexpr TxMaximumOffloadSize = 0x3FFFF; // 18 bits. + // Called by device.cpp AdapterCreateTxQueue. _IRQL_requires_same_ _IRQL_requires_(PASSIVE_LEVEL)