From 102b468b5e190973fbaee6fe682727eb33079811 Mon Sep 17 00:00:00 2001 From: Robert Evans Date: Mon, 25 Mar 2024 17:56:49 -0400 Subject: [PATCH] Fix corruption caused by mmap flushing problems 1) Make mmap flushes synchronous. Linux may skip flushing dirty pages already in writeback unless data-integrity sync is requested. 2) Change zfs_putpage to use TXG_WAIT. Otherwise dirty pages may be skipped due to DMU pushing back on TX assign. 3) Add missing mmap flush when doing block cloning. 4) While here, pass errors from putpage to writepage/writepages. This change fixes corruption edge cases, but unfortunately adds synchronous ZIL flushes for dirty mmap pages to llseek and bclone operations. It may be possible to avoid these sync writes later but would need more tricky refactoring of the writeback code. Reviewed-by: Alexander Motin Reviewed-by: Brian Behlendorf Signed-off-by: Robert Evans Closes #15933 Closes #16019 --- module/os/linux/zfs/zfs_vnops_os.c | 5 +---- module/os/linux/zfs/zpl_file.c | 8 ++++---- module/zfs/zfs_vnops.c | 6 +++++- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index a32307c39331..1cecad9f7755 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -3795,11 +3795,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); - err = dmu_tx_assign(tx, TXG_NOWAIT); + err = dmu_tx_assign(tx, TXG_WAIT); if (err != 0) { - if (err == ERESTART) - dmu_tx_wait(tx); - dmu_tx_abort(tx); #ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO filemap_dirty_folio(page_mapping(pp), page_folio(pp)); diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index 3caa0fc6c214..9dec52215c7c 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -720,23 +720,23 @@ zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data) { boolean_t *for_sync = data; fstrans_cookie_t cookie; + int ret; ASSERT(PageLocked(pp)); ASSERT(!PageWriteback(pp)); cookie = spl_fstrans_mark(); - (void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync); + ret = zfs_putpage(pp->mapping->host, pp, wbc, *for_sync); spl_fstrans_unmark(cookie); - return (0); + return (ret); } #ifdef HAVE_WRITEPAGE_T_FOLIO static int zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data) { - (void) zpl_putpage(&pp->page, wbc, data); - return (0); + return (zpl_putpage(&pp->page, wbc, data)); } #endif diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 7f39ad6fc775..babb07ca25a9 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -123,7 +123,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off) /* Flush any mmap()'d data to disk */ if (zn_has_cached_data(zp, 0, file_sz - 1)) - zn_flush_cached_data(zp, B_FALSE); + zn_flush_cached_data(zp, B_TRUE); lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER); error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff); @@ -1187,6 +1187,10 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, } } + /* Flush any mmap()'d data to disk */ + if (zn_has_cached_data(inzp, inoff, inoff + len - 1)) + zn_flush_cached_data(inzp, B_TRUE); + /* * Maintain predictable lock order. */