From 0fe67428e5322f8bde12e79a7f324f92a1f052c8 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 13 Sep 2023 13:03:32 -0400 Subject: [PATCH] ntb_hw_amd: Simulate link flap after unclean reboot In case physical link events reporting on primary side is unreliable it may happen that secondary side tries to renegotiate after reboot while the primary side still thinks the link was up all the time and nothing has happened. To workaround that, on the driver load check the READY bit in SIDEINFO register, and if it is already set, it means we had a dirty shutdown. In such case clear it and send the doorbell interrupt to the other side, similar to driver unload. After that wait for a second for remote side to see the change and continue the initialization sequence. Signed-off-by: Alexander Motin --- drivers/ntb/hw/amd/ntb_hw_amd.c | 30 +++++++++++++++++++++++------- drivers/ntb/hw/amd/ntb_hw_amd.h | 2 +- scripts/package/truenas/changelog | 6 ++++++ 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c index d293822eba64d..1b70041afc084 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.c +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c @@ -1014,7 +1014,7 @@ static void amd_set_side_info_reg(struct amd_ntb_dev *ndev, bool peer) } } -static void amd_clear_side_info_reg(struct amd_ntb_dev *ndev, bool peer) +static bool amd_clear_side_info_reg(struct amd_ntb_dev *ndev, bool peer) { void __iomem *mmio = NULL; unsigned int reg; @@ -1029,7 +1029,9 @@ static void amd_clear_side_info_reg(struct amd_ntb_dev *ndev, bool peer) reg &= ~AMD_SIDE_READY; writel(reg, mmio + AMD_SIDEINFO_OFFSET); readl(mmio + AMD_SIDEINFO_OFFSET); + return true; } + return false; } static void amd_init_side_info(struct amd_ntb_dev *ndev) @@ -1044,16 +1046,19 @@ static void amd_init_side_info(struct amd_ntb_dev *ndev) writel(ntb_ctl, mmio + AMD_CNTL_OFFSET); } -static void amd_deinit_side_info(struct amd_ntb_dev *ndev) +static bool amd_deinit_side_info(struct amd_ntb_dev *ndev) { void __iomem *mmio = ndev->self_mmio; u32 ntb_ctl; + bool res; - amd_clear_side_info_reg(ndev, false); + res = amd_clear_side_info_reg(ndev, false); ntb_ctl = readl(mmio + AMD_CNTL_OFFSET); ntb_ctl &= ~(PMM_REG_CTL | SMM_REG_CTL); writel(ntb_ctl, mmio + AMD_CNTL_OFFSET); + + return res; } static int amd_init_ntb(struct amd_ntb_dev *ndev) @@ -1239,6 +1244,17 @@ static int amd_ntb_pci_probe(struct pci_dev *pdev, if (rc) goto err_init_dev; + /* + * If our READY bit in SIDEINFO register is already set, it likely + * means we have crashed and the other controller could miss it. + * In such case clear the bit, interrupt the other side, and give + * it some time to see zero before we set the READY again below. + */ + if (amd_deinit_side_info(ndev)) { + ntb_peer_db_set(&ndev->ntb, BIT_ULL(ndev->db_last_bit)); + msleep(1000); + } + /* write side info */ amd_init_side_info(ndev); @@ -1274,8 +1290,8 @@ static void amd_ntb_pci_remove(struct pci_dev *pdev) * to the peer. This will make sure that when the peer handles the * DB event, it correctly reads this bit as being 0. */ - amd_deinit_side_info(ndev); - ntb_peer_db_set(&ndev->ntb, BIT_ULL(ndev->db_last_bit)); + if (amd_deinit_side_info(ndev)) + ntb_peer_db_set(&ndev->ntb, BIT_ULL(ndev->db_last_bit)); ntb_unregister_device(&ndev->ntb); ndev_deinit_debugfs(ndev); amd_deinit_dev(ndev); @@ -1290,8 +1306,8 @@ static void amd_ntb_pci_shutdown(struct pci_dev *pdev) /* Send link down notification */ ntb_link_event(&ndev->ntb); - amd_deinit_side_info(ndev); - ntb_peer_db_set(&ndev->ntb, BIT_ULL(ndev->db_last_bit)); + if (amd_deinit_side_info(ndev)) + ntb_peer_db_set(&ndev->ntb, BIT_ULL(ndev->db_last_bit)); ntb_unregister_device(&ndev->ntb); ndev_deinit_debugfs(ndev); amd_deinit_dev(ndev); diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h index 5f337b1572a08..9a627e6cf50b1 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.h +++ b/drivers/ntb/hw/amd/ntb_hw_amd.h @@ -217,7 +217,7 @@ struct amd_ntb_dev { #define hb_ndev(__work) container_of(__work, struct amd_ntb_dev, hb_timer.work) static void amd_set_side_info_reg(struct amd_ntb_dev *ndev, bool peer); -static void amd_clear_side_info_reg(struct amd_ntb_dev *ndev, bool peer); +static bool amd_clear_side_info_reg(struct amd_ntb_dev *ndev, bool peer); static int amd_poll_link(struct amd_ntb_dev *ndev); #endif diff --git a/scripts/package/truenas/changelog b/scripts/package/truenas/changelog index eccf5dafda37c..eab58808907f6 100644 --- a/scripts/package/truenas/changelog +++ b/scripts/package/truenas/changelog @@ -1,3 +1,9 @@ +linux-6.1.49+truenas (6.1.49+truenas-2) sid; urgency=low + + * ntb_hw_amd: Simulate link flap after unclean reboot + + -- iXsystems engineering team Web, 13 Sep 2023 14:00:00 +0500 + linux-6.1.49+truenas (6.1.49+truenas-1) sid; urgency=low * Rebase local commits onto upstream v6.1.49