Skip to content

Commit

Permalink
ntb_hw_amd: Simulate link flap after unclean reboot
Browse files Browse the repository at this point in the history
In case physical link events reporting on primary side is unreliable
it may happen that secondary side tries to renegotiate after reboot
while the primary side still thinks the link was up all the time and
nothing has happened.  To workaround that, on the driver load check
the READY bit in SIDEINFO register, and if it is already set, it
means we had a dirty shutdown.  In such case clear it and send the
doorbell interrupt to the other side, similar to driver unload.
After that wait for a second for remote side to see the change and
continue the initialization sequence.

Signed-off-by: Alexander Motin <[email protected]>
  • Loading branch information
amotin committed Sep 14, 2023
1 parent 4397bd8 commit 0fe6742
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 8 deletions.
30 changes: 23 additions & 7 deletions drivers/ntb/hw/amd/ntb_hw_amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@ static void amd_set_side_info_reg(struct amd_ntb_dev *ndev, bool peer)
}
}

static void amd_clear_side_info_reg(struct amd_ntb_dev *ndev, bool peer)
static bool amd_clear_side_info_reg(struct amd_ntb_dev *ndev, bool peer)
{
void __iomem *mmio = NULL;
unsigned int reg;
Expand All @@ -1029,7 +1029,9 @@ static void amd_clear_side_info_reg(struct amd_ntb_dev *ndev, bool peer)
reg &= ~AMD_SIDE_READY;
writel(reg, mmio + AMD_SIDEINFO_OFFSET);
readl(mmio + AMD_SIDEINFO_OFFSET);
return true;
}
return false;
}

static void amd_init_side_info(struct amd_ntb_dev *ndev)
Expand All @@ -1044,16 +1046,19 @@ static void amd_init_side_info(struct amd_ntb_dev *ndev)
writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
}

static void amd_deinit_side_info(struct amd_ntb_dev *ndev)
static bool amd_deinit_side_info(struct amd_ntb_dev *ndev)
{
void __iomem *mmio = ndev->self_mmio;
u32 ntb_ctl;
bool res;

amd_clear_side_info_reg(ndev, false);
res = amd_clear_side_info_reg(ndev, false);

ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
ntb_ctl &= ~(PMM_REG_CTL | SMM_REG_CTL);
writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);

return res;
}

static int amd_init_ntb(struct amd_ntb_dev *ndev)
Expand Down Expand Up @@ -1239,6 +1244,17 @@ static int amd_ntb_pci_probe(struct pci_dev *pdev,
if (rc)
goto err_init_dev;

/*
* If our READY bit in SIDEINFO register is already set, it likely
* means we have crashed and the other controller could miss it.
* In such case clear the bit, interrupt the other side, and give
* it some time to see zero before we set the READY again below.
*/
if (amd_deinit_side_info(ndev)) {
ntb_peer_db_set(&ndev->ntb, BIT_ULL(ndev->db_last_bit));
msleep(1000);
}

/* write side info */
amd_init_side_info(ndev);

Expand Down Expand Up @@ -1274,8 +1290,8 @@ static void amd_ntb_pci_remove(struct pci_dev *pdev)
* to the peer. This will make sure that when the peer handles the
* DB event, it correctly reads this bit as being 0.
*/
amd_deinit_side_info(ndev);
ntb_peer_db_set(&ndev->ntb, BIT_ULL(ndev->db_last_bit));
if (amd_deinit_side_info(ndev))
ntb_peer_db_set(&ndev->ntb, BIT_ULL(ndev->db_last_bit));
ntb_unregister_device(&ndev->ntb);
ndev_deinit_debugfs(ndev);
amd_deinit_dev(ndev);
Expand All @@ -1290,8 +1306,8 @@ static void amd_ntb_pci_shutdown(struct pci_dev *pdev)
/* Send link down notification */
ntb_link_event(&ndev->ntb);

amd_deinit_side_info(ndev);
ntb_peer_db_set(&ndev->ntb, BIT_ULL(ndev->db_last_bit));
if (amd_deinit_side_info(ndev))
ntb_peer_db_set(&ndev->ntb, BIT_ULL(ndev->db_last_bit));
ntb_unregister_device(&ndev->ntb);
ndev_deinit_debugfs(ndev);
amd_deinit_dev(ndev);
Expand Down
2 changes: 1 addition & 1 deletion drivers/ntb/hw/amd/ntb_hw_amd.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ struct amd_ntb_dev {
#define hb_ndev(__work) container_of(__work, struct amd_ntb_dev, hb_timer.work)

static void amd_set_side_info_reg(struct amd_ntb_dev *ndev, bool peer);
static void amd_clear_side_info_reg(struct amd_ntb_dev *ndev, bool peer);
static bool amd_clear_side_info_reg(struct amd_ntb_dev *ndev, bool peer);
static int amd_poll_link(struct amd_ntb_dev *ndev);

#endif
6 changes: 6 additions & 0 deletions scripts/package/truenas/changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
linux-6.1.49+truenas (6.1.49+truenas-2) sid; urgency=low

* ntb_hw_amd: Simulate link flap after unclean reboot

-- iXsystems engineering team <[email protected]> Web, 13 Sep 2023 14:00:00 +0500

linux-6.1.49+truenas (6.1.49+truenas-1) sid; urgency=low

* Rebase local commits onto upstream v6.1.49
Expand Down

0 comments on commit 0fe6742

Please sign in to comment.