From 18bcadc8ca3865edc03581d26501f4cd6eb7b6c5 Mon Sep 17 00:00:00 2001 From: Doug Cook Date: Thu, 28 Dec 2023 16:34:36 -0700 Subject: [PATCH 1/2] dwc_eqos - add performance counters To help with tuning and diagnostics, expose some performance counters so developers can see some of the adapter state without debugging. You can use Windows in-box tools like perfmon or typeperf to see the values. - dwc_eqos-debug: debug event counts like ISR-handled, ISR-unhandled, DPC-by-type. - dwc_eqos-mac: values of various GMAC registers, mainly the MMC counters. Overview: - Keep a collection of active devices so we can enumerate them if anybody asks to collect performance counters. - Define an ID for each device. Use the device's physical address for this, shifted right by 4 to provide support for 36 bits of physical address. - Change the AXI bus parameters to match the docs. I didn't actually perform any tuning here - I just changed the parameters to match up with what the docs describe and verified that this didn't break anything. - Enable the MMC counters and mask their rollover interrupts. - Implement PCW counter collection. - Run ctrpp on the performance counter manifest during build. - Include the generated resources into the driver during build. --- drivers/net/dwc_eqos/device.cpp | 255 +++++++++- drivers/net/dwc_eqos/device.h | 10 + drivers/net/dwc_eqos/driver.cpp | 9 +- drivers/net/dwc_eqos/dwc_eqos.rc | 30 ++ drivers/net/dwc_eqos/dwc_eqos.vcxproj | 33 +- drivers/net/dwc_eqos/dwc_eqos.vcxproj.filters | 17 +- drivers/net/dwc_eqos/dwc_eqos_perf.man | 442 ++++++++++++++++++ drivers/net/dwc_eqos/dwc_eqos_perf_data.h | 71 +++ drivers/net/dwc_eqos/registers.h | 18 +- 9 files changed, 859 insertions(+), 26 deletions(-) create mode 100644 drivers/net/dwc_eqos/dwc_eqos.rc create mode 100644 drivers/net/dwc_eqos/dwc_eqos_perf.man create mode 100644 drivers/net/dwc_eqos/dwc_eqos_perf_data.h diff --git a/drivers/net/dwc_eqos/device.cpp b/drivers/net/dwc_eqos/device.cpp index 009ba5d..e7c5fe4 100644 --- a/drivers/net/dwc_eqos/device.cpp +++ b/drivers/net/dwc_eqos/device.cpp @@ -5,6 +5,10 @@ #include "queue_common.h" #include "registers.h" #include "trace.h" +#include "dwc_eqos_perf_data.h" + +#define CTRPP_VERIFY_COUNTER_SIZES 1 +#include #include @@ -28,6 +32,10 @@ static auto constexpr QueuesSupported = 1u; // TODO: Support multiple queues? static auto constexpr InterruptLinkStatus = 0x80000000u; static auto constexpr InterruptChannel0Status = ~InterruptLinkStatus; +// Updated by DevicePerfRegister/DevicePerfUnregister. +static WDFWAITLOCK g_devicesLock = nullptr; // Guards g_devices. +static WDFCOLLECTION g_devices = nullptr; // Guarded by g_devicesLock. + enum InterruptsWanted : UCHAR { InterruptsNone = 0, @@ -47,6 +55,7 @@ struct DeviceContext WDFSPINLOCK queueLock; WDFINTERRUPT interrupt; WDFDMAENABLER dma; + UINT32 perfCounterDeviceId; // = (regs physical address) >> 4 MacHwFeature0_t feature0; MacHwFeature1_t feature1; MacHwFeature2_t feature2; @@ -632,6 +641,7 @@ DevicePrepareHardware( for (ULONG i = 0; i != resourcesCount; i += 1) { auto desc = WdfCmResourceListGetDescriptor(resourcesTranslated, i); + auto descRaw = WdfCmResourceListGetDescriptor(resourcesRaw, i); switch (desc->Type) { case CmResourceTypeMemory: @@ -660,6 +670,8 @@ DevicePrepareHardware( status = STATUS_INSUFFICIENT_RESOURCES; goto Done; } + + context->perfCounterDeviceId = static_cast(descRaw->u.Memory.Start.QuadPart >> 4); } break; @@ -672,7 +684,7 @@ DevicePrepareHardware( WDF_INTERRUPT_CONFIG config; WDF_INTERRUPT_CONFIG_INIT(&config, DeviceInterruptIsr, DeviceInterruptDpc); - config.InterruptRaw = WdfCmResourceListGetDescriptor(resourcesRaw, i); + config.InterruptRaw = descRaw; config.InterruptTranslated = desc; status = WdfInterruptCreate(device, &config, WDF_NO_OBJECT_ATTRIBUTES, &context->interrupt); @@ -880,22 +892,25 @@ DevicePrepareHardware( goto Done; } - // TODO: use ACPI _DSD? - // TODO: review. This is what the NetBSD driver seems to be doing, and - // it seems to work ok, but it doesn't line up with the documentation. + // TODO: tune? use ACPI _DSD? + Write32(®s->Axi_Lpi_Entry_Interval, 15); // AutoAxiLpi after (interval + 1) * 64 clocks. Max value is 15. auto busMode = Read32(®s->Dma_SysBus_Mode); - busMode.Reserved14 = true; // mixed-burst? - busMode.FixedBurst = false; + busMode.EnableLpi = true; // true = allow LPI, honor AXI LPI request. + busMode.UnlockOnPacket = false; // false = Wake for any received packet, true = only wake for magic packet. busMode.AxiMaxWriteOutstanding = DefaultAxiMaxWriteOutstanding; busMode.AxiMaxReadOutstanding = DefaultAxiMaxReadOutstanding; - busMode.BurstLength16 = true; - busMode.BurstLength8 = true; - busMode.BurstLength4 = true; + busMode.AddressAlignedBeats = false; // ? + busMode.AutoAxiLpi = true; // true = enter LPI after (Axi_Lpi_Entry_Interval + 1) * 64 idle clocks. + busMode.BurstLength16 = true; // true = allow 16-beat fixed-bursts. + busMode.BurstLength8 = true; // true = allow 8-beat fixed-bursts. + busMode.BurstLength4 = true; // true = allow 4-beat fixed-bursts. + busMode.FixedBurst = true; // true = fixed-burst, false = mixed-burst (changes meaning of bits 1..7). Write32(®s->Dma_SysBus_Mode, busMode); Write32(®s->Mac_1us_Tic_Counter, DefaultCsrRate / 1'000'000u - 1); - static_assert(sizeof(RxDescriptor) == sizeof(TxDescriptor)); + static_assert(sizeof(RxDescriptor) == sizeof(TxDescriptor), + "RxDescriptor must be same size as TxDescriptor."); static_assert(sizeof(RxDescriptor) % BusBytes == 0, "RxDescriptor must be a multiple of bus width."); ChannelDmaControl_t dmaControl = {}; @@ -903,7 +918,12 @@ DevicePrepareHardware( dmaControl.PblX8 = QueueBurstLengthX8; Write32(®s->Dma_Ch[0].Control, dmaControl); - Write32(®s->Mmc_Control, 0x9); // Reset and freeze MMC counters because they generate interrupts. + // Disable MMC counter interrupts. + Write32(®s->Mmc_Rx_Interrupt_Mask, 0xFFFFFFFF); // RXWDOGP,RXFOVP,RXPAUSP,RXLENERP,RXOSIZEGP,RXCRCERP,RXMCGP,RXGOCT,RXGBOCT,RXGBPKT + Write32(®s->Mmc_Tx_Interrupt_Mask, 0xFFFFFFFF); // TXPAUSP,TXGPKT,TXGOCT,TXCARERP,TXUFLOWERP,TXGBPKT,TXGBOCT + Write32(®s->Mmc_Ipc_Rx_Interrupt_Mask, 0xFFFFFFFF); + Write32(®s->Mmc_Fpe_Tx_Interrupt_Mask, 0xFFFFFFFF); + Write32(®s->Mmc_Fpe_Rx_Interrupt_Mask, 0xFFFFFFFF); } // Start adapter. @@ -1018,6 +1038,19 @@ DeviceSetNotificationTxQueue( } +__declspec(code_seg("PAGE")) +static void +DeviceCleanup(WDFOBJECT Object) +{ + PAGED_CODE(); + if (g_devices) + { + WdfWaitLockAcquire(g_devicesLock, nullptr); + WdfCollectionRemove(g_devices, Object); + WdfWaitLockRelease(g_devicesLock); + } +} + __declspec(code_seg("PAGE")) NTSTATUS DeviceAdd( @@ -1056,6 +1089,7 @@ DeviceAdd( { WDF_OBJECT_ATTRIBUTES attributes; WDF_OBJECT_ATTRIBUTES_INIT_CONTEXT_TYPE(&attributes, DeviceContext); + attributes.EvtCleanupCallback = DeviceCleanup; status = WdfDeviceCreate(&deviceInit, &attributes, &device); if (!NT_SUCCESS(status)) @@ -1065,6 +1099,13 @@ DeviceAdd( goto Done; } + if (g_devices) + { + WdfWaitLockAcquire(g_devicesLock, nullptr); + (void)WdfCollectionAdd(g_devices, device); // Best-effort. + WdfWaitLockRelease(g_devicesLock); + } + WdfDeviceSetAlignmentRequirement(device, FILE_BYTE_ALIGNMENT); WDF_DEVICE_STATE deviceState; @@ -1128,3 +1169,195 @@ DeviceAdd( TraceEntryExitWithStatus(DeviceAdd, LEVEL_INFO, status); return status; } + +// Performance counter implementation: extract PERF_MAC_DATA from DeviceContext. +_IRQL_requires_max_(APC_LEVEL) +__declspec(code_seg("PAGE")) +static void +PerfDataInit( + _In_ DeviceContext* context, + _Out_ PERF_MAC_DATA* data) +{ + PAGED_CODE(); + + data->Mac_Configuration = READ_REGISTER_NOFENCE_ULONG(&context->regs->Mac_Configuration.Value32); +#define READ_COUNTER(name) data->name = READ_REGISTER_NOFENCE_ULONG(&context->regs->name) + READ_COUNTER(Tx_Packet_Count_Good_Bad); + READ_COUNTER(Tx_Underflow_Error_Packets); + READ_COUNTER(Tx_Carrier_Error_Packets); + READ_COUNTER(Tx_Octet_Count_Good); + READ_COUNTER(Tx_Packet_Count_Good); + READ_COUNTER(Tx_Pause_Packets); + READ_COUNTER(Rx_Packets_Count_Good_Bad); + READ_COUNTER(Rx_Octet_Count_Good_Bad); + READ_COUNTER(Rx_Octet_Count_Good); + READ_COUNTER(Rx_Multicast_Packets_Good); + READ_COUNTER(Rx_Crc_Error_Packets); + READ_COUNTER(Rx_Oversize_Packets_Good); + READ_COUNTER(Rx_Length_Error_Packets); + READ_COUNTER(Rx_Pause_Packets); + READ_COUNTER(Rx_Fifo_Overflow_Packets); + READ_COUNTER(Rx_Watchdog_Error_Packets); + READ_COUNTER(RxIPv4_Good_Packets); + READ_COUNTER(RxIPv4_Header_Error_Packets); + READ_COUNTER(RxIPv6_Good_Packets); + READ_COUNTER(RxIPv6_Header_Error_Packets); + READ_COUNTER(RxUdp_Error_Packets); + READ_COUNTER(RxTcp_Error_Packets); + READ_COUNTER(RxIcmp_Error_Packets); + READ_COUNTER(RxIPv4_Header_Error_Octets); + READ_COUNTER(RxIPv6_Header_Error_Octets); + READ_COUNTER(RxUdp_Error_Octets); + READ_COUNTER(RxTcp_Error_Octets); + READ_COUNTER(RxIcmp_Error_Octets); + READ_COUNTER(Mmc_Tx_Fpe_Fragment_Cntr); + READ_COUNTER(Mmc_Tx_Hold_Req_Cntr); + READ_COUNTER(Mmc_Rx_Packet_Smd_Err_Cntr); + READ_COUNTER(Mmc_Rx_Packet_Assembly_OK_Cntr); + READ_COUNTER(Mmc_Rx_Fpe_Fragment_Cntr); +#undef READ_COUNTER +} + +// Performance counter implementation: extract PERF_DEBUG_DATA from DeviceContext. +_IRQL_requires_max_(APC_LEVEL) +__declspec(code_seg("PAGE")) +static void +PerfDataInit( + _In_ DeviceContext* context, + _Out_ PERF_DEBUG_DATA* data) +{ + PAGED_CODE(); + + data->IsrHandled = context->isrHandled; + data->IsrIgnored = context->isrIgnored; + data->DpcLinkState = context->dpcLinkState; + data->DpcRx = context->dpcRx; + data->DpcTx = context->dpcTx; + data->DpcAbnormalStatus = context->dpcAbnormalStatus; + data->DpcFatalBusError = context->dpcFatalBusError; +} + +// Implements the performance counter callback for a given DataType. +// Expects a PerfDataInit(DeviceContext*, DataType*) function to exist. +template +_IRQL_requires_max_(APC_LEVEL) +__declspec(code_seg("PAGE")) +static NTSTATUS NTAPI +PerfCallback( + _In_ PCW_CALLBACK_TYPE type, + _In_ PCW_CALLBACK_INFORMATION* info, + _In_opt_ void* callbackContext) +{ + PAGED_CODE(); + UNREFERENCED_PARAMETER(callbackContext); + + if ((type == PcwCallbackEnumerateInstances || type == PcwCallbackCollectData) && + g_devices != nullptr) + { + auto const buffer = type == PcwCallbackCollectData + ? info->CollectData.Buffer + : info->EnumerateInstances.Buffer; + + DataType data = {}; + wchar_t nameBuffer[8]; + UNICODE_STRING nameString = {}; + nameString.Buffer = nameBuffer; + nameString.Length = 0; + nameString.MaximumLength = sizeof(nameBuffer); + + WdfWaitLockAcquire(g_devicesLock, nullptr); + + auto const count = WdfCollectionGetCount(g_devices); + for (ULONG i = 0; i != count; i += 1) + { + auto const device = static_cast(WdfCollectionGetItem(g_devices, i)); + auto const context = DeviceGetContext(device); + + if (type == PcwCallbackCollectData) + { + PerfDataInit(context, &data); + } + + RtlIntegerToUnicodeString(context->perfCounterDeviceId, 16, &nameString); + + // Inline the ctrpp-generated AddXXX function: + PCW_DATA pcwData = { &data, sizeof(data) }; + (void)PcwAddInstance(buffer, &nameString, context->perfCounterDeviceId, 1, &pcwData); // Best-effort. + } + + WdfWaitLockRelease(g_devicesLock); + } + + return STATUS_SUCCESS; +} + +__declspec(code_seg("INIT")) +void +DevicePerfRegister(_In_ WDFDRIVER driver) +{ + PAGED_CODE(); + NT_ASSERT(g_devicesLock == nullptr); + NT_ASSERT(g_devices == nullptr); + + // This is all best-effort. + // Driver should still run even if performance counters can't be registered. + NTSTATUS status; + + WDF_OBJECT_ATTRIBUTES attributes; + WDF_OBJECT_ATTRIBUTES_INIT(&attributes); + attributes.ParentObject = driver; + + status = WdfWaitLockCreate(&attributes, &g_devicesLock); + if (!NT_SUCCESS(status)) + { + goto Done; + } + + status = WdfCollectionCreate(&attributes, &g_devices); + if (!NT_SUCCESS(status)) + { + goto Done; + } + + status = RegisterPERF_MAC_COUNTERSET(PerfCallback, nullptr); + if (!NT_SUCCESS(status)) + { + TraceWrite("RegisterPERF_MAC_COUNTERSET", LEVEL_WARNING, + TraceLoggingNTStatus(status)); + } + + status = RegisterPERF_DEBUG_COUNTERSET(PerfCallback, nullptr); + if (!NT_SUCCESS(status)) + { + TraceWrite("RegisterPERF_DEBUG_COUNTERSET", LEVEL_WARNING, + TraceLoggingNTStatus(status)); + } + + status = STATUS_SUCCESS; + +Done: + + TraceEntryExitWithStatus(DevicePerfRegister, LEVEL_INFO, status); +} + +__declspec(code_seg("PAGE")) +void +DevicePerfUnregister() +{ + PAGED_CODE(); + + UnregisterPERF_DEBUG_COUNTERSET(); + UnregisterPERF_MAC_COUNTERSET(); + + if (g_devices) + { + WdfObjectDelete(g_devices); + g_devices = nullptr; + } + + if (g_devicesLock) + { + WdfObjectDelete(g_devicesLock); + g_devicesLock = nullptr; + } +} diff --git a/drivers/net/dwc_eqos/device.h b/drivers/net/dwc_eqos/device.h index c905460..5cb13d5 100644 --- a/drivers/net/dwc_eqos/device.h +++ b/drivers/net/dwc_eqos/device.h @@ -9,6 +9,16 @@ __declspec(code_seg("PAGE")) EVT_WDF_DRIVER_DEVICE_ADD DeviceAdd; +// Called by driver.cpp DriverEntry. +__declspec(code_seg("INIT")) +void +DevicePerfRegister(_In_ WDFDRIVER driver); + +// Called by driver.cpp DriverUnload. +__declspec(code_seg("PAGE")) +void +DevicePerfUnregister(); + // Called by rxqueue.cpp RxQueueSetNotificationEnabled. void DeviceSetNotificationRxQueue( diff --git a/drivers/net/dwc_eqos/driver.cpp b/drivers/net/dwc_eqos/driver.cpp index 0c56db3..31cba24 100644 --- a/drivers/net/dwc_eqos/driver.cpp +++ b/drivers/net/dwc_eqos/driver.cpp @@ -31,6 +31,7 @@ DriverUnload(_In_ WDFDRIVER driver) // PASSIVE_LEVEL PAGED_CODE(); UNREFERENCED_PARAMETER(driver); + DevicePerfUnregister(); TraceEntryExit(DriverUnload, LEVEL_INFO); TraceLoggingUnregister(TraceProvider); } @@ -56,12 +57,18 @@ DriverEntry( config.EvtDriverUnload = DriverUnload; config.DriverPoolTag = 'dwcE'; + WDFDRIVER driver; status = WdfDriverCreate( driverObject, registryPath, WDF_NO_OBJECT_ATTRIBUTES, &config, - WDF_NO_HANDLE); + &driver); + + if (NT_SUCCESS(status)) + { + DevicePerfRegister(driver); + } TraceExitWithStatus(DriverEntry, LEVEL_INFO, status); diff --git a/drivers/net/dwc_eqos/dwc_eqos.rc b/drivers/net/dwc_eqos/dwc_eqos.rc new file mode 100644 index 0000000..58e68eb --- /dev/null +++ b/drivers/net/dwc_eqos/dwc_eqos.rc @@ -0,0 +1,30 @@ +#include +#include + +#define VER_FILETYPE VFT_DRV +#define VER_FILESUBTYPE VFT2_DRV_SYSTEM +#define VER_FILEDESCRIPTION_STR "Synopsys DesignWare Ethernet Quality of Service (GMAC) driver for RK3588" +#define VER_INTERNALNAME_STR "dwc_eqos.sys" +#define VER_ORIGINALFILENAME_STR "dwc_eqos.sys" + +#define VER_LEGALCOPYRIGHT_YEARS "2023" +#define VER_LEGALCOPYRIGHT_STR "Copyright (C) " VER_LEGALCOPYRIGHT_YEARS + +#define VER_FILEVERSION 1,0,0,0 +#define VER_PRODUCTVERSION_STR "1.0.0.0" +#define VER_PRODUCTVERSION 1,0,0,0 +#define LVER_PRODUCTVERSION_STR L"1.0.0.0" + +#define VER_FILEFLAGSMASK (VS_FF_DEBUG | VS_FF_PRERELEASE) +#ifdef DEBUG +#define VER_FILEFLAGS (VS_FF_DEBUG) +#else +#define VER_FILEFLAGS (0) +#endif + +#define VER_FILEOS VOS_NT_WINDOWS32 + +#define VER_COMPANYNAME_STR "Open Source" +#define VER_PRODUCTNAME_STR "RK3588" + +#include "common.ver" diff --git a/drivers/net/dwc_eqos/dwc_eqos.vcxproj b/drivers/net/dwc_eqos/dwc_eqos.vcxproj index c2941c9..eae9ec1 100644 --- a/drivers/net/dwc_eqos/dwc_eqos.vcxproj +++ b/drivers/net/dwc_eqos/dwc_eqos.vcxproj @@ -23,6 +23,7 @@ + @@ -98,6 +99,17 @@ $(DDK_LIB_PATH)Ksecdd.lib;%(AdditionalDependencies) + + false + true + + + $(IntDir)%(Filename).h + $(IntDir)%(Filename).rc + + + copy /y dwc_eqos_perf.man "$(OutDir)" + @@ -111,11 +123,30 @@ $(DDK_LIB_PATH)Ksecdd.lib;%(AdditionalDependencies) + + false + true + + + $(IntDir)%(Filename).h + $(IntDir)%(Filename).rc + + + copy /y dwc_eqos_perf.man "$(OutDir)" + + + + Manifest + + + + + - \ No newline at end of file + diff --git a/drivers/net/dwc_eqos/dwc_eqos.vcxproj.filters b/drivers/net/dwc_eqos/dwc_eqos.vcxproj.filters index cae6be1..1c405b7 100644 --- a/drivers/net/dwc_eqos/dwc_eqos.vcxproj.filters +++ b/drivers/net/dwc_eqos/dwc_eqos.vcxproj.filters @@ -9,10 +9,6 @@ {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hpp;hxx;hm;inl;inc;xsd - - {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} - rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms - {8E41214B-6785-4CFE-B992-037D68949A14} inf;inv;inx;mof;mc; @@ -45,6 +41,9 @@ Header Files + + Header Files + @@ -66,4 +65,14 @@ Source Files + + + Driver Files + + + + + Driver Files + + \ No newline at end of file diff --git a/drivers/net/dwc_eqos/dwc_eqos_perf.man b/drivers/net/dwc_eqos/dwc_eqos_perf.man new file mode 100644 index 0000000..6688f9e --- /dev/null +++ b/drivers/net/dwc_eqos/dwc_eqos_perf.man @@ -0,0 +1,442 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/drivers/net/dwc_eqos/dwc_eqos_perf_data.h b/drivers/net/dwc_eqos/dwc_eqos_perf_data.h new file mode 100644 index 0000000..d472b67 --- /dev/null +++ b/drivers/net/dwc_eqos/dwc_eqos_perf_data.h @@ -0,0 +1,71 @@ +#pragma once + +/* +Structures used for collecting performance counter data. +Needs to be kept in sync with dwc_eqos_perf.man. + +Performance counters are not installed by default. + +Install: lodctr /m:dwc_eqos_perf.man +Uninstall: unlodctr /p:dwc_eqos + +Counter data can be collected using tools like perfmon or typeperf. + +These performance counters are designed for debugging and performance tuning, +avoiding runtime overhead when they are not used. The values are only 32-bits, +so some of them will roll-over quickly. Collecting 64-bit values is possible, +but would require handling rollover interrupts and tracking the counter values +within the driver (doesn't seem worthwhile at this point). +*/ + +// Values incremented during ISR or DPC. +struct PERF_DEBUG_DATA +{ + UINT32 IsrHandled; + UINT32 IsrIgnored; + UINT32 DpcLinkState; + UINT32 DpcRx; + UINT32 DpcTx; + UINT32 DpcAbnormalStatus; + UINT32 DpcFatalBusError; +}; + +// Each value corresponds directly to a register in the device. +struct PERF_MAC_DATA +{ + UINT32 Mac_Configuration; + UINT32 Tx_Octet_Count_Good_Bad; + UINT32 Tx_Packet_Count_Good_Bad; + UINT32 Tx_Underflow_Error_Packets; + UINT32 Tx_Carrier_Error_Packets; + UINT32 Tx_Octet_Count_Good; + UINT32 Tx_Packet_Count_Good; + UINT32 Tx_Pause_Packets; + UINT32 Rx_Packets_Count_Good_Bad; + UINT32 Rx_Octet_Count_Good_Bad; + UINT32 Rx_Octet_Count_Good; + UINT32 Rx_Multicast_Packets_Good; + UINT32 Rx_Crc_Error_Packets; + UINT32 Rx_Oversize_Packets_Good; + UINT32 Rx_Length_Error_Packets; + UINT32 Rx_Pause_Packets; + UINT32 Rx_Fifo_Overflow_Packets; + UINT32 Rx_Watchdog_Error_Packets; + UINT32 RxIPv4_Good_Packets; + UINT32 RxIPv4_Header_Error_Packets; + UINT32 RxIPv6_Good_Packets; + UINT32 RxIPv6_Header_Error_Packets; + UINT32 RxUdp_Error_Packets; + UINT32 RxTcp_Error_Packets; + UINT32 RxIcmp_Error_Packets; + UINT32 RxIPv4_Header_Error_Octets; + UINT32 RxIPv6_Header_Error_Octets; + UINT32 RxUdp_Error_Octets; + UINT32 RxTcp_Error_Octets; + UINT32 RxIcmp_Error_Octets; + UINT32 Mmc_Tx_Fpe_Fragment_Cntr; + UINT32 Mmc_Tx_Hold_Req_Cntr; + UINT32 Mmc_Rx_Packet_Smd_Err_Cntr; + UINT32 Mmc_Rx_Packet_Assembly_OK_Cntr; + UINT32 Mmc_Rx_Fpe_Fragment_Cntr; +}; diff --git a/drivers/net/dwc_eqos/registers.h b/drivers/net/dwc_eqos/registers.h index cd2e900..b6d7447 100644 --- a/drivers/net/dwc_eqos/registers.h +++ b/drivers/net/dwc_eqos/registers.h @@ -559,7 +559,7 @@ enum HashTableSize_t : UINT32 union MacConfiguration_t { - UINT32 Value32; + ULONG Value32; struct { UINT32 ReceiverEnable : 1; @@ -1180,12 +1180,12 @@ struct MacRegisters // MMC_Rx_Interrupt_Mask @ 0x070C = 0x0: // This register maintains the masks for interrupts generated from all Receive // statistics counters. - ULONG Mmc_Rx_InterruptMask; + ULONG Mmc_Rx_Interrupt_Mask; // MMC_Tx_Interrupt_Mask @ 0x0710 = 0x0: // This register maintains the masks for interrupts generated from all Transmit // statistics counters. - ULONG Mmc_Tx_InterruptMask; + ULONG Mmc_Tx_Interrupt_Mask; // Tx_Octet_Count_Good_Bad @ 0x0714 = 0x0: // This register provides the number of bytes transmitted by the GMAC, exclusive @@ -1397,15 +1397,15 @@ struct MacRegisters // Transmit statistics counters. ULONG Mmc_Fpe_Tx_Interrupt_Mask; - // MmcTxFpeFragment_Cntr @ 0x08A8 = 0x0: + // Mmc_Tx_Fpe_Fragment_Cntr @ 0x08A8 = 0x0: // This register provides the number of additional mPackets transmitted due to // preemption. - ULONG MmcTxFpeFragment_Cntr; + ULONG Mmc_Tx_Fpe_Fragment_Cntr; - // MMC_Tx_Hold_Req_Cntr @ 0x08AC = 0x0: + // Mmc_Tx_Hold_Req_Cntr @ 0x08AC = 0x0: // This register provides the count of number of times a hold request is given to // MAC. - ULONG MMC_Tx_Hold_Req_Cntr; + ULONG Mmc_Tx_Hold_Req_Cntr; ULONG Padding08B0[4]; @@ -1428,12 +1428,12 @@ struct MacRegisters // This register provides the number of received MAC frames rejected due to // unknown SMD value and MAC frame fragments rejected due to arriving with an // SMD-C when there was no. - ULONG Mmc_Rx_Packet_SMD_Err_Cntr; + ULONG Mmc_Rx_Packet_Smd_Err_Cntr; // MMC_Rx_Packet_Assembly_OK_Cntr @ 0x08D0 = 0x0: // This register provides the number of MAC frames that were successfully // reassembled and delivered to MAC. - ULONG MmcRxPacketAssemblyOkCntr; + ULONG Mmc_Rx_Packet_Assembly_OK_Cntr; // MMC_Rx_FPE_Fragment_Cntr @ 0x08D4 = 0x0: // This register provides the number of additional mPackets transmitted due to From 8cad2ad80238d6f4bd11c397e1d1ea9af21622ac Mon Sep 17 00:00:00 2001 From: Doug Cook Date: Thu, 28 Dec 2023 17:28:45 -0700 Subject: [PATCH 2/2] Add some queue counters --- drivers/net/dwc_eqos/device.cpp | 46 ++++++++++++++++++++--- drivers/net/dwc_eqos/device.h | 26 ++++++++++++- drivers/net/dwc_eqos/dwc_eqos_perf.man | 36 ++++++++++++++++++ drivers/net/dwc_eqos/dwc_eqos_perf_data.h | 4 ++ drivers/net/dwc_eqos/rxqueue.cpp | 24 +++++++----- drivers/net/dwc_eqos/rxqueue.h | 3 +- drivers/net/dwc_eqos/txqueue.cpp | 23 ++++++------ drivers/net/dwc_eqos/txqueue.h | 3 +- 8 files changed, 134 insertions(+), 31 deletions(-) diff --git a/drivers/net/dwc_eqos/device.cpp b/drivers/net/dwc_eqos/device.cpp index e7c5fe4..627ee50 100644 --- a/drivers/net/dwc_eqos/device.cpp +++ b/drivers/net/dwc_eqos/device.cpp @@ -80,6 +80,10 @@ struct DeviceContext UINT32 dpcTx; // Updated only in DPC. UINT32 dpcAbnormalStatus; // Updated only in DPC. UINT32 dpcFatalBusError; // Updated only in DPC. + UINT32 rxOwnDescriptors; // Updated only during RxQueueAdvance. + UINT32 rxDoneFragments; // Updated only during RxQueueAdvance. + UINT32 txOwnDescriptors; // Updated only during TxQueueAdvance. + UINT32 txDoneFragments; // Updated only during TxQueueAdvance. }; WDF_DECLARE_CONTEXT_TYPE_WITH_NAME(DeviceContext, DeviceGetContext) @@ -416,7 +420,7 @@ AdapterCreateTxQueue( auto const context = DeviceGetContext(AdapterGetContext(adapter)->device); NT_ASSERT(context->txQueue == nullptr); return TxQueueCreate( - adapter, + context, queueInit, context->dma, &context->regs->Dma_Ch[0], @@ -433,7 +437,7 @@ AdapterCreateRxQueue( auto const context = DeviceGetContext(AdapterGetContext(adapter)->device); NT_ASSERT(context->rxQueue == nullptr); return RxQueueCreate( - adapter, + context, queueInit, context->dma, &context->regs->Dma_Ch[0]); @@ -993,13 +997,13 @@ DeviceReleaseHardware( return STATUS_SUCCESS; } +_IRQL_requires_max_(PASSIVE_LEVEL) void DeviceSetNotificationRxQueue( - _In_ NETADAPTER adapter, + _Inout_ DeviceContext* context, _In_opt_ NETPACKETQUEUE rxQueue) { // PASSIVE_LEVEL, nonpaged (resume path, raises IRQL) - auto const context = DeviceGetContext(AdapterGetContext(adapter)->device); WdfSpinLockAcquire(context->queueLock); // PASSIVE_LEVEL --> DISPATCH_LEVEL context->rxQueue = rxQueue; @@ -1015,13 +1019,13 @@ DeviceSetNotificationRxQueue( } } +_IRQL_requires_max_(PASSIVE_LEVEL) void DeviceSetNotificationTxQueue( - _In_ NETADAPTER adapter, + _Inout_ DeviceContext* context, _In_opt_ NETPACKETQUEUE txQueue) { // PASSIVE_LEVEL, nonpaged (resume path, raises IRQL) - auto const context = DeviceGetContext(AdapterGetContext(adapter)->device); WdfSpinLockAcquire(context->queueLock); // PASSIVE_LEVEL --> DISPATCH_LEVEL context->txQueue = txQueue; @@ -1038,6 +1042,30 @@ DeviceSetNotificationTxQueue( } +_IRQL_requires_max_(DISPATCH_LEVEL) +void +DeviceAddStatisticsRxQueue( + _Inout_ DeviceContext* context, + UINT32 ownDescriptors, + UINT32 doneFragments) +{ + // DISPATCH_LEVEL + context->rxOwnDescriptors += ownDescriptors; + context->rxDoneFragments += doneFragments; +} + +_IRQL_requires_max_(DISPATCH_LEVEL) +void +DeviceAddStatisticsTxQueue( + _Inout_ DeviceContext* context, + UINT32 ownDescriptors, + UINT32 doneFragments) +{ + // DISPATCH_LEVEL + context->txOwnDescriptors += ownDescriptors; + context->txDoneFragments += doneFragments; +} + __declspec(code_seg("PAGE")) static void DeviceCleanup(WDFOBJECT Object) @@ -1235,6 +1263,10 @@ PerfDataInit( data->DpcTx = context->dpcTx; data->DpcAbnormalStatus = context->dpcAbnormalStatus; data->DpcFatalBusError = context->dpcFatalBusError; + data->RxOwnDescriptors = context->rxOwnDescriptors; + data->RxDoneFragments = context->rxDoneFragments; + data->TxOwnDescriptors = context->txOwnDescriptors; + data->TxDoneFragments = context->txDoneFragments; } // Implements the performance counter callback for a given DataType. @@ -1291,6 +1323,7 @@ PerfCallback( return STATUS_SUCCESS; } +_IRQL_requires_max_(PASSIVE_LEVEL) __declspec(code_seg("INIT")) void DevicePerfRegister(_In_ WDFDRIVER driver) @@ -1340,6 +1373,7 @@ DevicePerfRegister(_In_ WDFDRIVER driver) TraceEntryExitWithStatus(DevicePerfRegister, LEVEL_INFO, status); } +_IRQL_requires_max_(PASSIVE_LEVEL) __declspec(code_seg("PAGE")) void DevicePerfUnregister() diff --git a/drivers/net/dwc_eqos/device.h b/drivers/net/dwc_eqos/device.h index 5cb13d5..3b121d9 100644 --- a/drivers/net/dwc_eqos/device.h +++ b/drivers/net/dwc_eqos/device.h @@ -3,6 +3,8 @@ Device behavior. Includes adapter and interrupt since they are 1:1 with the devi */ #pragma once +struct DeviceContext; + // Referenced in driver.cpp DriverEntry. // Called by WDF. __declspec(code_seg("PAGE")) @@ -10,23 +12,43 @@ EVT_WDF_DRIVER_DEVICE_ADD DeviceAdd; // Called by driver.cpp DriverEntry. +_IRQL_requires_max_(PASSIVE_LEVEL) __declspec(code_seg("INIT")) void DevicePerfRegister(_In_ WDFDRIVER driver); // Called by driver.cpp DriverUnload. +_IRQL_requires_max_(PASSIVE_LEVEL) __declspec(code_seg("PAGE")) void DevicePerfUnregister(); // Called by rxqueue.cpp RxQueueSetNotificationEnabled. +_IRQL_requires_max_(PASSIVE_LEVEL) void DeviceSetNotificationRxQueue( - _In_ NETADAPTER adapter, + _Inout_ DeviceContext* context, _In_opt_ NETPACKETQUEUE rxQueue); // Called by txqueue.cpp TxQueueSetNotificationEnabled. +_IRQL_requires_max_(PASSIVE_LEVEL) void DeviceSetNotificationTxQueue( - _In_ NETADAPTER adapter, + _Inout_ DeviceContext* context, _In_opt_ NETPACKETQUEUE txQueue); + +// Called by rxqueue.cpp RxQueueAdvance. +_IRQL_requires_max_(DISPATCH_LEVEL) +void +DeviceAddStatisticsRxQueue( + _Inout_ DeviceContext* context, + UINT32 ownDescriptors, + UINT32 doneFragments); + +// Called by txqueue.cpp TxQueueAdvance. +_IRQL_requires_max_(DISPATCH_LEVEL) +void +DeviceAddStatisticsTxQueue( + _Inout_ DeviceContext* context, + UINT32 ownDescriptors, + UINT32 doneFragments); diff --git a/drivers/net/dwc_eqos/dwc_eqos_perf.man b/drivers/net/dwc_eqos/dwc_eqos_perf.man index 6688f9e..832a077 100644 --- a/drivers/net/dwc_eqos/dwc_eqos_perf.man +++ b/drivers/net/dwc_eqos/dwc_eqos_perf.man @@ -434,6 +434,42 @@ type="perf_counter_rawcount" uri="uri:opensource/dwc_eqos/perf/debug/DpcFatalBusError" /> + + + + diff --git a/drivers/net/dwc_eqos/dwc_eqos_perf_data.h b/drivers/net/dwc_eqos/dwc_eqos_perf_data.h index d472b67..c562940 100644 --- a/drivers/net/dwc_eqos/dwc_eqos_perf_data.h +++ b/drivers/net/dwc_eqos/dwc_eqos_perf_data.h @@ -28,6 +28,10 @@ struct PERF_DEBUG_DATA UINT32 DpcTx; UINT32 DpcAbnormalStatus; UINT32 DpcFatalBusError; + UINT32 RxOwnDescriptors; + UINT32 RxDoneFragments; + UINT32 TxOwnDescriptors; + UINT32 TxDoneFragments; }; // Each value corresponds directly to a register in the device. diff --git a/drivers/net/dwc_eqos/rxqueue.cpp b/drivers/net/dwc_eqos/rxqueue.cpp index 74dd51f..d43d4e3 100644 --- a/drivers/net/dwc_eqos/rxqueue.cpp +++ b/drivers/net/dwc_eqos/rxqueue.cpp @@ -11,7 +11,7 @@ struct RxQueueContext { ChannelRegisters* channelRegs; - NETADAPTER adapter; + DeviceContext* deviceContext; NET_RING* packetRing; NET_RING* fragmentRing; WDFCOMMONBUFFER descBuffer; @@ -88,7 +88,7 @@ RxQueueAdvance(_In_ NETPACKETQUEUE queue) auto const fragEnd = context->fragmentRing->EndIndex; auto const descMask = context->descCount - 1u; UINT32 descIndex, pktIndex, fragIndex; - UINT32 donePkts = 0, queuedPkts = 0; + UINT32 ownDescriptors = 0, doneFrags = 0, queuedFrags = 0; /* Fragment indexes: @@ -127,6 +127,7 @@ RxQueueAdvance(_In_ NETPACKETQUEUE queue) TraceWrite("RxQueueAdvance-own", LEVEL_WARNING, TraceLoggingUInt32(descIndex, "descIndex"), TraceLoggingHexInt32(reinterpret_cast(&descWrite)[3], "RDES3")); + ownDescriptors = 1; break; } @@ -170,7 +171,7 @@ RxQueueAdvance(_In_ NETPACKETQUEUE queue) pktIndex = NetRingIncrementIndex(context->packetRing, pktIndex); fragIndex = NetRingIncrementIndex(context->fragmentRing, fragIndex); - donePkts += 1; + doneFrags += 1; } context->descBegin = descIndex; @@ -207,7 +208,7 @@ RxQueueAdvance(_In_ NETPACKETQUEUE queue) context->descVirtual[descIndex].Read = descRead; fragIndex = NetRingIncrementIndex(context->fragmentRing, fragIndex); - queuedPkts += 1; + queuedFrags += 1; } // In some error cases, the device may stall until we write to the tail pointer @@ -232,9 +233,12 @@ RxQueueAdvance(_In_ NETPACKETQUEUE queue) context->fragmentRing->BeginIndex = fragEnd; } + DeviceAddStatisticsRxQueue(context->deviceContext, ownDescriptors, doneFrags); + TraceEntryExit(RxQueueAdvance, LEVEL_VERBOSE, - TraceLoggingUInt32(donePkts), - TraceLoggingUInt32(queuedPkts)); + TraceLoggingUInt32(ownDescriptors), + TraceLoggingUInt32(doneFrags), + TraceLoggingUInt32(queuedFrags)); } static EVT_PACKET_QUEUE_SET_NOTIFICATION_ENABLED RxQueueSetNotificationEnabled; @@ -245,7 +249,7 @@ RxQueueSetNotificationEnabled( { // PASSIVE_LEVEL, nonpaged (resume path) auto const context = RxQueueGetContext(queue); - DeviceSetNotificationRxQueue(context->adapter, notificationEnabled ? queue : nullptr); + DeviceSetNotificationRxQueue(context->deviceContext, notificationEnabled ? queue : nullptr); TraceEntryExit(RxQueueSetNotificationEnabled, LEVEL_VERBOSE, TraceLoggingBoolean(notificationEnabled, "enabled")); } @@ -278,7 +282,7 @@ RxQueueStop(_In_ NETPACKETQUEUE queue) PAGED_CODE(); auto const context = RxQueueGetContext(queue); - DeviceSetNotificationRxQueue(context->adapter, nullptr); + DeviceSetNotificationRxQueue(context->deviceContext, nullptr); TraceEntryExit(RxQueueStop, LEVEL_INFO); } @@ -300,7 +304,7 @@ RxQueueCleanup(_In_ WDFOBJECT queue) _Use_decl_annotations_ NTSTATUS RxQueueCreate( - NETADAPTER adapter, + DeviceContext* deviceContext, NETRXQUEUE_INIT* queueInit, WDFDMAENABLER dma, ChannelRegisters* channelRegs) @@ -338,7 +342,7 @@ RxQueueCreate( auto const context = RxQueueGetContext(queue); context->channelRegs = channelRegs; - context->adapter = adapter; + context->deviceContext = deviceContext; context->packetRing = NetRingCollectionGetPacketRing(rings); context->fragmentRing = NetRingCollectionGetFragmentRing(rings); context->descCount = QueueDescriptorCount(context->fragmentRing->NumberOfElements); diff --git a/drivers/net/dwc_eqos/rxqueue.h b/drivers/net/dwc_eqos/rxqueue.h index 42a7b88..e3a0e05 100644 --- a/drivers/net/dwc_eqos/rxqueue.h +++ b/drivers/net/dwc_eqos/rxqueue.h @@ -3,6 +3,7 @@ Receive queue behavior. Similar to the transmit queue. */ #pragma once +struct DeviceContext; struct ChannelRegisters; auto constexpr RxBufferSize = 2048u; @@ -11,7 +12,7 @@ _IRQL_requires_same_ _IRQL_requires_(PASSIVE_LEVEL) NTSTATUS RxQueueCreate( - _In_ NETADAPTER adapter, + _Inout_ DeviceContext* deviceContext, _Inout_ NETRXQUEUE_INIT* queueInit, _In_ WDFDMAENABLER dma, _Inout_ ChannelRegisters* channelRegs); diff --git a/drivers/net/dwc_eqos/txqueue.cpp b/drivers/net/dwc_eqos/txqueue.cpp index 273c5a3..5e5b0f9 100644 --- a/drivers/net/dwc_eqos/txqueue.cpp +++ b/drivers/net/dwc_eqos/txqueue.cpp @@ -11,7 +11,7 @@ struct TxQueueContext { ChannelRegisters* channelRegs; MtlQueueRegisters* mtlRegs; - NETADAPTER adapter; + DeviceContext* deviceContext; NET_RING* packetRing; NET_RING* fragmentRing; WDFCOMMONBUFFER descBuffer; @@ -87,7 +87,7 @@ TxQueueAdvance(_In_ NETPACKETQUEUE queue) auto const pktEnd = context->packetRing->EndIndex; auto const descMask = context->descCount - 1u; UINT32 descIndex, pktIndex, fragIndex; - UINT32 donePkts = 0, queuedPkts = 0, queuedFrags = 0; + UINT32 ownDescriptors = 0, doneFrags = 0, queuedFrags = 0; /* Packet indexes: @@ -148,6 +148,7 @@ TxQueueAdvance(_In_ NETPACKETQUEUE queue) TraceLoggingHexInt32(reinterpret_cast(&descWrite)[3], "TDES3"), TraceLoggingUInt32(i, "fragment"), TraceLoggingUInt32(fragmentCount)); + ownDescriptors = 1; goto DoneIndicating; } else if ( @@ -164,7 +165,7 @@ TxQueueAdvance(_In_ NETPACKETQUEUE queue) } } - donePkts += 1; + doneFrags += fragmentCount; descReady -= fragmentCount; descIndex = (descIndex + fragmentCount) & descMask; } @@ -243,8 +244,6 @@ TxQueueAdvance(_In_ NETPACKETQUEUE queue) fragIndex = NetRingIncrementIndex(context->fragmentRing, fragIndex); queuedFrags += 1; } - - queuedPkts += 1; } pktIndex = NetRingIncrementIndex(context->packetRing, pktIndex); @@ -259,9 +258,11 @@ TxQueueAdvance(_In_ NETPACKETQUEUE queue) context->packetRing->NextIndex = pktIndex; } + DeviceAddStatisticsTxQueue(context->deviceContext, ownDescriptors, doneFrags); + TraceEntryExit(TxQueueAdvance, LEVEL_VERBOSE, - TraceLoggingUInt32(donePkts), - TraceLoggingUInt32(queuedPkts), + TraceLoggingUInt32(ownDescriptors), + TraceLoggingUInt32(doneFrags), TraceLoggingUInt32(queuedFrags)); } @@ -273,7 +274,7 @@ TxQueueSetNotificationEnabled( { // PASSIVE_LEVEL, nonpaged (resume path) auto const context = TxQueueGetContext(queue); - DeviceSetNotificationTxQueue(context->adapter, notificationEnabled ? queue : nullptr); + DeviceSetNotificationTxQueue(context->deviceContext, notificationEnabled ? queue : nullptr); TraceEntryExit(TxQueueSetNotificationEnabled, LEVEL_VERBOSE, TraceLoggingBoolean(notificationEnabled, "enabled")); } @@ -371,7 +372,7 @@ TxQueueStop(_In_ NETPACKETQUEUE queue) context->descBegin = 0; context->descEnd = 0; - DeviceSetNotificationTxQueue(context->adapter, nullptr); + DeviceSetNotificationTxQueue(context->deviceContext, nullptr); TraceEntryExit(TxQueueStop, LEVEL_INFO); } @@ -393,7 +394,7 @@ TxQueueCleanup(_In_ WDFOBJECT queue) _Use_decl_annotations_ NTSTATUS TxQueueCreate( - NETADAPTER adapter, + DeviceContext* deviceContext, NETTXQUEUE_INIT* queueInit, WDFDMAENABLER dma, ChannelRegisters* channelRegs, @@ -432,7 +433,7 @@ TxQueueCreate( auto const context = TxQueueGetContext(queue); context->channelRegs = channelRegs; context->mtlRegs = mtlRegs; - context->adapter = adapter; + context->deviceContext = deviceContext; context->packetRing = NetRingCollectionGetPacketRing(rings); context->fragmentRing = NetRingCollectionGetFragmentRing(rings); context->descCount = QueueDescriptorCount(context->fragmentRing->NumberOfElements); diff --git a/drivers/net/dwc_eqos/txqueue.h b/drivers/net/dwc_eqos/txqueue.h index ab396d6..6236b69 100644 --- a/drivers/net/dwc_eqos/txqueue.h +++ b/drivers/net/dwc_eqos/txqueue.h @@ -3,6 +3,7 @@ Transmit queue behavior. Similar to the receive queue. */ #pragma once +struct DeviceContext; struct ChannelRegisters; struct MtlQueueRegisters; @@ -11,7 +12,7 @@ _IRQL_requires_same_ _IRQL_requires_(PASSIVE_LEVEL) NTSTATUS TxQueueCreate( - _In_ NETADAPTER adapter, + _Inout_ DeviceContext* deviceContext, _Inout_ NETTXQUEUE_INIT* queueInit, _In_ WDFDMAENABLER dma, _Inout_ ChannelRegisters* channelRegs,