diff --git a/cmd/ztest.c b/cmd/ztest.c
index b6b99bfff6db..398c519cfc35 100644
--- a/cmd/ztest.c
+++ b/cmd/ztest.c
@@ -2412,7 +2412,6 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
 	int error;
 
 	ASSERT3P(lwb, !=, NULL);
-	ASSERT3P(zio, !=, NULL);
 	ASSERT3U(size, !=, 0);
 
 	ztest_object_lock(zd, object, RL_READER);
@@ -2446,6 +2445,7 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
 		    DMU_READ_NO_PREFETCH);
 		ASSERT0(error);
 	} else {
+		ASSERT3P(zio, !=, NULL);
 		size = doi.doi_data_block_size;
 		if (ISP2(size)) {
 			offset = P2ALIGN(offset, size);
diff --git a/include/sys/zil_impl.h b/include/sys/zil_impl.h
index b58dad9695a6..f780ad3d61bc 100644
--- a/include/sys/zil_impl.h
+++ b/include/sys/zil_impl.h
@@ -38,14 +38,22 @@ extern "C" {
 /*
  * Possible states for a given lwb structure.
  *
- * An lwb will start out in the "closed" state, and then transition to
- * the "opened" state via a call to zil_lwb_write_open(). When
- * transitioning from "closed" to "opened" the zilog's "zl_issuer_lock"
- * must be held.
+ * An lwb will start out in the "new" state, and transition to the "opened"
+ * state via a call to zil_lwb_write_open() on first itx assignment.  When
+ * transitioning from "new" to "opened" the zilog's "zl_issuer_lock" must be
+ * held.
  *
- * After the lwb is "opened", it can transition into the "issued" state
- * via zil_lwb_write_close(). Again, the zilog's "zl_issuer_lock" must
- * be held when making this transition.
+ * After the lwb is "opened", it can be assigned number of itxs and transition
+ * into the "closed" state via zil_lwb_write_close() when full or on timeout.
+ * When transitioning from "opened" to "closed" the zilog's "zl_issuer_lock"
+ * must be held.  New lwb allocation also takes "zl_lock" to protect the list.
+ *
+ * After the lwb is "closed", it can transition into the "ready" state via
+ * zil_lwb_write_issue().  "zl_lock" must be held when making this transition.
+ * Since it is done by the same thread, "zl_issuer_lock" is not needed.
+ *
+ * When lwb in "ready" state receives its block pointer, it can transition to
+ * "issued". "zl_lock" must be held when making this transition.
  *
  * After the lwb's write zio completes, it transitions into the "write
  * done" state via zil_lwb_write_done(); and then into the "flush done"
@@ -62,17 +70,20 @@ extern "C" {
  *
  * Additionally, correctness when reading an lwb's state is often
  * achieved by exploiting the fact that these state transitions occur in
- * this specific order; i.e. "closed" to "opened" to "issued" to "done".
+ * this specific order; i.e. "new" to "opened" to "closed" to "ready" to
+ * "issued" to "write_done" and finally "flush_done".
  *
- * Thus, if an lwb is in the "closed" or "opened" state, holding the
+ * Thus, if an lwb is in the "new" or "opened" state, holding the
  * "zl_issuer_lock" will prevent a concurrent thread from transitioning
- * that lwb to the "issued" state. Likewise, if an lwb is already in the
- * "issued" state, holding the "zl_lock" will prevent a concurrent
- * thread from transitioning that lwb to the "write done" state.
+ * that lwb to the "closed" state. Likewise, if an lwb is already in the
+ * "ready" state, holding the "zl_lock" will prevent a concurrent thread
+ * from transitioning that lwb to the "issued" state.
  */
 typedef enum {
-    LWB_STATE_CLOSED,
+    LWB_STATE_NEW,
     LWB_STATE_OPENED,
+    LWB_STATE_CLOSED,
+    LWB_STATE_READY,
     LWB_STATE_ISSUED,
     LWB_STATE_WRITE_DONE,
     LWB_STATE_FLUSH_DONE,
@@ -91,17 +102,21 @@ typedef enum {
 typedef struct lwb {
 	zilog_t		*lwb_zilog;	/* back pointer to log struct */
 	blkptr_t	lwb_blk;	/* on disk address of this log blk */
+	boolean_t	lwb_slim;	/* log block has slim format */
 	boolean_t	lwb_slog;	/* lwb_blk is on SLOG device */
-	boolean_t	lwb_indirect;	/* do not postpone zil_lwb_commit() */
+	int		lwb_error;	/* log block allocation error */
+	int		lwb_nmax;	/* max bytes in the buffer */
 	int		lwb_nused;	/* # used bytes in buffer */
 	int		lwb_nfilled;	/* # filled bytes in buffer */
 	int		lwb_sz;		/* size of block and buffer */
 	lwb_state_t	lwb_state;	/* the state of this lwb */
 	char		*lwb_buf;	/* log write buffer */
+	zio_t		*lwb_child_zio;	/* parent zio for children */
 	zio_t		*lwb_write_zio;	/* zio for the lwb buffer */
 	zio_t		*lwb_root_zio;	/* root zio for lwb write and flushes */
 	hrtime_t	lwb_issued_timestamp; /* when was the lwb issued? */
 	uint64_t	lwb_issued_txg;	/* the txg when the write is issued */
+	uint64_t	lwb_alloc_txg;	/* the txg when lwb_blk is allocated */
 	uint64_t	lwb_max_txg;	/* highest txg in this lwb */
 	list_node_t	lwb_node;	/* zilog->zl_lwb_list linkage */
 	list_node_t	lwb_issue_node;	/* linkage of lwbs ready for issue */
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 54ea43363bfc..fc46ec122575 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -839,7 +839,6 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
 	uint64_t zp_gen;
 
 	ASSERT3P(lwb, !=, NULL);
-	ASSERT3P(zio, !=, NULL);
 	ASSERT3U(size, !=, 0);
 
 	/*
@@ -889,6 +888,7 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
 		}
 		ASSERT(error == 0 || error == ENOENT);
 	} else { /* indirect write */
+		ASSERT3P(zio, !=, NULL);
 		/*
 		 * Have to lock the whole block to ensure when it's
 		 * written out and its checksum is being calculated
diff --git a/module/zfs/zil.c b/module/zfs/zil.c
index 6f04a7d4a7df..1b540da9d940 100644
--- a/module/zfs/zil.c
+++ b/module/zfs/zil.c
@@ -151,7 +151,6 @@ static kmem_cache_t *zil_lwb_cache;
 static kmem_cache_t *zil_zcw_cache;
 
 static void zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx);
-static void zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb);
 static itx_t *zil_itx_clone(itx_t *oitx);
 
 static int
@@ -760,33 +759,52 @@ zil_lwb_vdev_compare(const void *x1, const void *x2)
 	return (TREE_CMP(v1, v2));
 }
 
+/*
+ * Allocate a new lwb.  We may already have a block pointer for it, in which
+ * case we get size and version from there.  Or we may not yet, in which case
+ * we choose them here and later make the block allocation match.
+ */
 static lwb_t *
-zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg)
+zil_alloc_lwb(zilog_t *zilog, int sz, blkptr_t *bp, boolean_t slog,
+    uint64_t txg, lwb_state_t state)
 {
 	lwb_t *lwb;
 
 	lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP);
 	lwb->lwb_zilog = zilog;
-	lwb->lwb_blk = *bp;
+	if (bp) {
+		lwb->lwb_blk = *bp;
+		lwb->lwb_slim = (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2);
+		sz = BP_GET_LSIZE(bp);
+	} else {
+		BP_ZERO(&lwb->lwb_blk);
+		lwb->lwb_slim = (spa_version(zilog->zl_spa) >=
+		    SPA_VERSION_SLIM_ZIL);
+	}
 	lwb->lwb_slog = slog;
-	lwb->lwb_indirect = B_FALSE;
-	if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
+	lwb->lwb_error = 0;
+	if (lwb->lwb_slim) {
+		lwb->lwb_nmax = sz;
 		lwb->lwb_nused = lwb->lwb_nfilled = sizeof (zil_chain_t);
-		lwb->lwb_sz = BP_GET_LSIZE(bp);
 	} else {
+		lwb->lwb_nmax = sz - sizeof (zil_chain_t);
 		lwb->lwb_nused = lwb->lwb_nfilled = 0;
-		lwb->lwb_sz = BP_GET_LSIZE(bp) - sizeof (zil_chain_t);
 	}
-	lwb->lwb_state = LWB_STATE_CLOSED;
-	lwb->lwb_buf = zio_buf_alloc(BP_GET_LSIZE(bp));
+	lwb->lwb_sz = sz;
+	lwb->lwb_state = state;
+	lwb->lwb_buf = zio_buf_alloc(sz);
+	lwb->lwb_child_zio = NULL;
 	lwb->lwb_write_zio = NULL;
 	lwb->lwb_root_zio = NULL;
 	lwb->lwb_issued_timestamp = 0;
 	lwb->lwb_issued_txg = 0;
-	lwb->lwb_max_txg = txg;
+	lwb->lwb_alloc_txg = txg;
+	lwb->lwb_max_txg = 0;
 
 	mutex_enter(&zilog->zl_lock);
 	list_insert_tail(&zilog->zl_lwb_list, lwb);
+	if (state != LWB_STATE_NEW)
+		zilog->zl_last_lwb_opened = lwb;
 	mutex_exit(&zilog->zl_lock);
 
 	return (lwb);
@@ -800,10 +818,12 @@ zil_free_lwb(zilog_t *zilog, lwb_t *lwb)
 	VERIFY(list_is_empty(&lwb->lwb_waiters));
 	VERIFY(list_is_empty(&lwb->lwb_itxs));
 	ASSERT(avl_is_empty(&lwb->lwb_vdev_tree));
+	ASSERT3P(lwb->lwb_child_zio, ==, NULL);
 	ASSERT3P(lwb->lwb_write_zio, ==, NULL);
 	ASSERT3P(lwb->lwb_root_zio, ==, NULL);
+	ASSERT3U(lwb->lwb_alloc_txg, <=, spa_syncing_txg(zilog->zl_spa));
 	ASSERT3U(lwb->lwb_max_txg, <=, spa_syncing_txg(zilog->zl_spa));
-	ASSERT(lwb->lwb_state == LWB_STATE_CLOSED ||
+	ASSERT(lwb->lwb_state == LWB_STATE_NEW ||
 	    lwb->lwb_state == LWB_STATE_FLUSH_DONE);
 
 	/*
@@ -954,7 +974,7 @@ zil_create(zilog_t *zilog)
 	 * Allocate a log write block (lwb) for the first log block.
 	 */
 	if (error == 0)
-		lwb = zil_alloc_lwb(zilog, &blk, slog, txg);
+		lwb = zil_alloc_lwb(zilog, 0, &blk, slog, txg, LWB_STATE_NEW);
 
 	/*
 	 * If we just allocated the first log block, commit our transaction
@@ -1041,7 +1061,8 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
 		while ((lwb = list_remove_head(&zilog->zl_lwb_list)) != NULL) {
 			if (lwb->lwb_buf != NULL)
 				zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
-			zio_free(zilog->zl_spa, txg, &lwb->lwb_blk);
+			if (!BP_IS_HOLE(&lwb->lwb_blk))
+				zio_free(zilog->zl_spa, txg, &lwb->lwb_blk);
 			zil_free_lwb(zilog, lwb);
 		}
 	} else if (!keep_first) {
@@ -1269,21 +1290,21 @@ zil_commit_waiter_link_lwb(zil_commit_waiter_t *zcw, lwb_t *lwb)
 {
 	/*
 	 * The lwb_waiters field of the lwb is protected by the zilog's
-	 * zl_lock, thus it must be held when calling this function.
+	 * zl_issuer_lock while the lwb is open and zl_lock otherwise.
+	 * zl_issuer_lock also protects leaving the open state.
+	 * zcw_lwb setting is protected by zl_issuer_lock and state !=
+	 * flush_done, which transition is protected by zl_lock.
 	 */
-	ASSERT(MUTEX_HELD(&lwb->lwb_zilog->zl_lock));
+	ASSERT(MUTEX_HELD(&lwb->lwb_zilog->zl_issuer_lock));
+	IMPLY(lwb->lwb_state != LWB_STATE_OPENED,
+	    MUTEX_HELD(&lwb->lwb_zilog->zl_lock));
+	ASSERT3S(lwb->lwb_state, !=, LWB_STATE_NEW);
+	ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE);
 
-	mutex_enter(&zcw->zcw_lock);
 	ASSERT(!list_link_active(&zcw->zcw_node));
-	ASSERT3P(zcw->zcw_lwb, ==, NULL);
-	ASSERT3P(lwb, !=, NULL);
-	ASSERT(lwb->lwb_state == LWB_STATE_OPENED ||
-	    lwb->lwb_state == LWB_STATE_ISSUED ||
-	    lwb->lwb_state == LWB_STATE_WRITE_DONE);
-
 	list_insert_tail(&lwb->lwb_waiters, zcw);
+	ASSERT3P(zcw->zcw_lwb, ==, NULL);
 	zcw->zcw_lwb = lwb;
-	mutex_exit(&zcw->zcw_lock);
 }
 
 /*
@@ -1294,11 +1315,9 @@ zil_commit_waiter_link_lwb(zil_commit_waiter_t *zcw, lwb_t *lwb)
 static void
 zil_commit_waiter_link_nolwb(zil_commit_waiter_t *zcw, list_t *nolwb)
 {
-	mutex_enter(&zcw->zcw_lock);
 	ASSERT(!list_link_active(&zcw->zcw_node));
-	ASSERT3P(zcw->zcw_lwb, ==, NULL);
 	list_insert_tail(nolwb, zcw);
-	mutex_exit(&zcw->zcw_lock);
+	ASSERT3P(zcw->zcw_lwb, ==, NULL);
 }
 
 void
@@ -1484,7 +1503,7 @@ zil_lwb_flush_wait_all(zilog_t *zilog, uint64_t txg)
 	mutex_enter(&zilog->zl_lock);
 	mutex_enter(&zilog->zl_lwb_io_lock);
 	lwb_t *lwb = list_head(&zilog->zl_lwb_list);
-	while (lwb != NULL && lwb->lwb_max_txg <= txg) {
+	while (lwb != NULL) {
 		if (lwb->lwb_issued_txg <= txg) {
 			ASSERT(lwb->lwb_state != LWB_STATE_ISSUED);
 			ASSERT(lwb->lwb_state != LWB_STATE_WRITE_DONE);
@@ -1527,14 +1546,6 @@ zil_lwb_write_done(zio_t *zio)
 
 	ASSERT3S(spa_config_held(spa, SCL_STATE, RW_READER), !=, 0);
 
-	ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF);
-	ASSERT(BP_GET_TYPE(zio->io_bp) == DMU_OT_INTENT_LOG);
-	ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
-	ASSERT(BP_GET_BYTEORDER(zio->io_bp) == ZFS_HOST_BYTEORDER);
-	ASSERT(!BP_IS_GANG(zio->io_bp));
-	ASSERT(!BP_IS_HOLE(zio->io_bp));
-	ASSERT(BP_GET_FILL(zio->io_bp) == 0);
-
 	abd_free(zio->io_abd);
 	zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
 	lwb->lwb_buf = NULL;
@@ -1542,6 +1553,7 @@ zil_lwb_write_done(zio_t *zio)
 	mutex_enter(&zilog->zl_lock);
 	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_ISSUED);
 	lwb->lwb_state = LWB_STATE_WRITE_DONE;
+	lwb->lwb_child_zio = NULL;
 	lwb->lwb_write_zio = NULL;
 	nlwb = list_next(&zilog->zl_lwb_list, lwb);
 	mutex_exit(&zilog->zl_lock);
@@ -1606,116 +1618,75 @@ zil_lwb_write_done(zio_t *zio)
 	}
 }
 
+/*
+ * Build the zio dependency chain, which is used to preserve the ordering of
+ * lwb completions that is required by the semantics of the ZIL. Each new lwb
+ * zio becomes a parent of the previous lwb zio, such that the new lwb's zio
+ * cannot complete until the previous lwb's zio completes.
+ *
+ * This is required by the semantics of zil_commit(): the commit waiters
+ * attached to the lwbs will be woken in the lwb zio's completion callback,
+ * so this zio dependency graph ensures the waiters are woken in the correct
+ * order (the same order the lwbs were created).
+ */
 static void
 zil_lwb_set_zio_dependency(zilog_t *zilog, lwb_t *lwb)
 {
-	lwb_t *last_lwb_opened = zilog->zl_last_lwb_opened;
-
-	ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
 	ASSERT(MUTEX_HELD(&zilog->zl_lock));
 
+	lwb_t *prev_lwb = list_prev(&zilog->zl_lwb_list, lwb);
+	if (prev_lwb == NULL ||
+	    prev_lwb->lwb_state == LWB_STATE_FLUSH_DONE)
+		return;
+
 	/*
-	 * The zilog's "zl_last_lwb_opened" field is used to build the
-	 * lwb/zio dependency chain, which is used to preserve the
-	 * ordering of lwb completions that is required by the semantics
-	 * of the ZIL. Each new lwb zio becomes a parent of the
-	 * "previous" lwb zio, such that the new lwb's zio cannot
-	 * complete until the "previous" lwb's zio completes.
+	 * If the previous lwb's write hasn't already completed, we also want
+	 * to order the completion of the lwb write zios (above, we only order
+	 * the completion of the lwb root zios). This is required because of
+	 * how we can defer the DKIOCFLUSHWRITECACHE commands for each lwb.
+	 *
+	 * When the DKIOCFLUSHWRITECACHE commands are deferred, the previous
+	 * lwb will rely on this lwb to flush the vdevs written to by that
+	 * previous lwb. Thus, we need to ensure this lwb doesn't issue the
+	 * flush until after the previous lwb's write completes. We ensure
+	 * this ordering by setting the zio parent/child relationship here.
 	 *
-	 * This is required by the semantics of zil_commit(); the commit
-	 * waiters attached to the lwbs will be woken in the lwb zio's
-	 * completion callback, so this zio dependency graph ensures the
-	 * waiters are woken in the correct order (the same order the
-	 * lwbs were created).
+	 * Without this relationship on the lwb's write zio, it's possible
+	 * for this lwb's write to complete prior to the previous lwb's write
+	 * completing; and thus, the vdevs for the previous lwb would be
+	 * flushed prior to that lwb's data being written to those vdevs (the
+	 * vdevs are flushed in the lwb write zio's completion handler,
+	 * zil_lwb_write_done()).
 	 */
-	if (last_lwb_opened != NULL &&
-	    last_lwb_opened->lwb_state != LWB_STATE_FLUSH_DONE) {
-		ASSERT(last_lwb_opened->lwb_state == LWB_STATE_OPENED ||
-		    last_lwb_opened->lwb_state == LWB_STATE_ISSUED ||
-		    last_lwb_opened->lwb_state == LWB_STATE_WRITE_DONE);
-
-		ASSERT3P(last_lwb_opened->lwb_root_zio, !=, NULL);
-		zio_add_child(lwb->lwb_root_zio,
-		    last_lwb_opened->lwb_root_zio);
-
-		/*
-		 * If the previous lwb's write hasn't already completed,
-		 * we also want to order the completion of the lwb write
-		 * zios (above, we only order the completion of the lwb
-		 * root zios). This is required because of how we can
-		 * defer the DKIOCFLUSHWRITECACHE commands for each lwb.
-		 *
-		 * When the DKIOCFLUSHWRITECACHE commands are deferred,
-		 * the previous lwb will rely on this lwb to flush the
-		 * vdevs written to by that previous lwb. Thus, we need
-		 * to ensure this lwb doesn't issue the flush until
-		 * after the previous lwb's write completes. We ensure
-		 * this ordering by setting the zio parent/child
-		 * relationship here.
-		 *
-		 * Without this relationship on the lwb's write zio,
-		 * it's possible for this lwb's write to complete prior
-		 * to the previous lwb's write completing; and thus, the
-		 * vdevs for the previous lwb would be flushed prior to
-		 * that lwb's data being written to those vdevs (the
-		 * vdevs are flushed in the lwb write zio's completion
-		 * handler, zil_lwb_write_done()).
-		 */
-		if (last_lwb_opened->lwb_state != LWB_STATE_WRITE_DONE) {
-			ASSERT(last_lwb_opened->lwb_state == LWB_STATE_OPENED ||
-			    last_lwb_opened->lwb_state == LWB_STATE_ISSUED);
-
-			ASSERT3P(last_lwb_opened->lwb_write_zio, !=, NULL);
-			zio_add_child(lwb->lwb_write_zio,
-			    last_lwb_opened->lwb_write_zio);
-		}
+	if (prev_lwb->lwb_state == LWB_STATE_ISSUED) {
+		ASSERT3P(prev_lwb->lwb_write_zio, !=, NULL);
+		zio_add_child(lwb->lwb_write_zio, prev_lwb->lwb_write_zio);
+	} else {
+		ASSERT3S(prev_lwb->lwb_state, ==, LWB_STATE_WRITE_DONE);
 	}
+
+	ASSERT3P(prev_lwb->lwb_root_zio, !=, NULL);
+	zio_add_child(lwb->lwb_root_zio, prev_lwb->lwb_root_zio);
 }
 
 
 /*
  * This function's purpose is to "open" an lwb such that it is ready to
- * accept new itxs being committed to it. To do this, the lwb's zio
- * structures are created, and linked to the lwb. This function is
- * idempotent; if the passed in lwb has already been opened, this
- * function is essentially a no-op.
+ * accept new itxs being committed to it. This function is idempotent; if
+ * the passed in lwb has already been opened, it is essentially a no-op.
  */
 static void
 zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb)
 {
-	zbookmark_phys_t zb;
-	zio_priority_t prio;
-
 	ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
-	ASSERT3P(lwb, !=, NULL);
-	EQUIV(lwb->lwb_root_zio == NULL, lwb->lwb_state == LWB_STATE_CLOSED);
-	EQUIV(lwb->lwb_root_zio != NULL, lwb->lwb_state == LWB_STATE_OPENED);
 
-	if (lwb->lwb_root_zio != NULL)
+	if (lwb->lwb_state != LWB_STATE_NEW) {
+		ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);
 		return;
-
-	lwb->lwb_root_zio = zio_root(zilog->zl_spa,
-	    zil_lwb_flush_vdevs_done, lwb, ZIO_FLAG_CANFAIL);
-
-	abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf,
-	    BP_GET_LSIZE(&lwb->lwb_blk));
-
-	if (!lwb->lwb_slog || zilog->zl_cur_used <= zil_slog_bulk)
-		prio = ZIO_PRIORITY_SYNC_WRITE;
-	else
-		prio = ZIO_PRIORITY_ASYNC_WRITE;
-
-	SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET],
-	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
-	    lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);
-
-	lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, zilog->zl_spa, 0,
-	    &lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk),
-	    zil_lwb_write_done, lwb, prio, ZIO_FLAG_CANFAIL, &zb);
+	}
 
 	mutex_enter(&zilog->zl_lock);
 	lwb->lwb_state = LWB_STATE_OPENED;
-	zil_lwb_set_zio_dependency(zilog, lwb);
 	zilog->zl_last_lwb_opened = lwb;
 	mutex_exit(&zilog->zl_lock);
 }
@@ -1752,57 +1723,21 @@ static uint_t zil_maxblocksize = SPA_OLD_MAXBLOCKSIZE;
  * Has to be called under zl_issuer_lock to chain more lwbs.
  */
 static lwb_t *
-zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs)
+zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, lwb_state_t state)
 {
-	lwb_t *nlwb = NULL;
-	zil_chain_t *zilc;
-	spa_t *spa = zilog->zl_spa;
-	blkptr_t *bp;
-	dmu_tx_t *tx;
-	uint64_t txg;
-	uint64_t zil_blksz;
-	int i, error;
-	boolean_t slog;
+	int i;
 
 	ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
-	ASSERT3P(lwb->lwb_root_zio, !=, NULL);
-	ASSERT3P(lwb->lwb_write_zio, !=, NULL);
 	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);
+	lwb->lwb_state = LWB_STATE_CLOSED;
 
 	/*
-	 * If this lwb includes indirect writes, we have to commit before
-	 * creating the transaction, otherwise we may end up in dead lock.
-	 */
-	if (lwb->lwb_indirect) {
-		for (itx_t *itx = list_head(&lwb->lwb_itxs); itx;
-		    itx = list_next(&lwb->lwb_itxs, itx))
-			zil_lwb_commit(zilog, lwb, itx);
-		lwb->lwb_nused = lwb->lwb_nfilled;
-	}
-
-	/*
-	 * Allocate the next block and save its address in this block
-	 * before writing it in order to establish the log chain.
+	 * If there was an allocation failure then returned NULL will trigger
+	 * zil_commit_writer_stall() at the caller.  This is inherently racy,
+	 * since allocation may not have happened yet.
 	 */
-
-	tx = dmu_tx_create(zilog->zl_os);
-
-	/*
-	 * Since we are not going to create any new dirty data, and we
-	 * can even help with clearing the existing dirty data, we
-	 * should not be subject to the dirty data based delays. We
-	 * use TXG_NOTHROTTLE to bypass the delay mechanism.
-	 */
-	VERIFY0(dmu_tx_assign(tx, TXG_WAIT | TXG_NOTHROTTLE));
-
-	dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
-	txg = dmu_tx_get_txg(tx);
-
-	mutex_enter(&zilog->zl_lwb_io_lock);
-	lwb->lwb_issued_txg = txg;
-	zilog->zl_lwb_inflight[txg & TXG_MASK]++;
-	zilog->zl_lwb_max_issued_txg = MAX(txg, zilog->zl_lwb_max_issued_txg);
-	mutex_exit(&zilog->zl_lwb_io_lock);
+	if (lwb->lwb_error != 0)
+		return (NULL);
 
 	/*
 	 * Log blocks are pre-allocated. Here we select the size of the next
@@ -1820,7 +1755,7 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs)
 	 * the maximum block size because we can exhaust the available
 	 * pool log space.
 	 */
-	zil_blksz = zilog->zl_cur_used + sizeof (zil_chain_t);
+	uint64_t zil_blksz = zilog->zl_cur_used + sizeof (zil_chain_t);
 	for (i = 0; zil_blksz > zil_block_buckets[i].limit; i++)
 		continue;
 	zil_blksz = MIN(zil_block_buckets[i].blksz, zilog->zl_max_block_size);
@@ -1832,94 +1767,149 @@ zil_lwb_write_close(zilog_t *zilog, lwb_t *lwb, list_t *ilwbs)
 	    uint64_t, zilog->zl_prev_blks[zilog->zl_prev_rotor]);
 	zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1);
 
-	if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2)
-		zilc = (zil_chain_t *)lwb->lwb_buf;
-	else
-		zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
-	bp = &zilc->zc_next_blk;
-	BP_ZERO(bp);
-	error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, zil_blksz, &slog);
-	if (error == 0) {
-		ASSERT3U(bp->blk_birth, ==, txg);
-		bp->blk_cksum = lwb->lwb_blk.blk_cksum;
-		bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++;
+	return (zil_alloc_lwb(zilog, zil_blksz, NULL, 0, 0, state));
+}
 
-		/*
-		 * Allocate a new log write block (lwb).
-		 */
-		nlwb = zil_alloc_lwb(zilog, bp, slog, txg);
-	}
+/*
+ * Finalize previously closed block and issue the write zio.
+ */
+static void
+zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
+{
+	spa_t *spa = zilog->zl_spa;
+	zil_chain_t *zilc;
+	boolean_t slog;
+	zbookmark_phys_t zb;
+	zio_priority_t prio;
+	int error;
 
-	lwb->lwb_state = LWB_STATE_ISSUED;
+	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_CLOSED);
 
-	dmu_tx_commit(tx);
+	/* Actually fill the lwb with the data. */
+	for (itx_t *itx = list_head(&lwb->lwb_itxs); itx;
+	    itx = list_next(&lwb->lwb_itxs, itx))
+		zil_lwb_commit(zilog, lwb, itx);
+	lwb->lwb_nused = lwb->lwb_nfilled;
+
+	lwb->lwb_root_zio = zio_root(spa, zil_lwb_flush_vdevs_done, lwb,
+	    ZIO_FLAG_CANFAIL);
 
 	/*
-	 * We need to acquire the config lock for the lwb to issue it later.
-	 * However, if we already have a queue of closed parent lwbs already
-	 * holding the config lock (but not yet issued), we can't block here
-	 * waiting on the lock or we will deadlock.  In that case we must
-	 * first issue to parent IOs before waiting on the lock.
+	 * The lwb is now ready to be issued, but it can be only if it already
+	 * got its block pointer allocated or the allocation has failed.
+	 * Otherwise leave it as-is, relying on some other thread to issue it
+	 * after allocating its block pointer via calling zil_lwb_write_issue()
+	 * for the previous lwb(s) in the chain.
 	 */
-	if (ilwbs && !list_is_empty(ilwbs)) {
-		if (!spa_config_tryenter(spa, SCL_STATE, lwb, RW_READER)) {
-			lwb_t *tlwb;
-			while ((tlwb = list_remove_head(ilwbs)) != NULL)
-				zil_lwb_write_issue(zilog, tlwb);
-			spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
+	mutex_enter(&zilog->zl_lock);
+	lwb->lwb_state = LWB_STATE_READY;
+	if (BP_IS_HOLE(&lwb->lwb_blk) && lwb->lwb_error == 0) {
+		mutex_exit(&zilog->zl_lock);
+		return;
+	}
+	mutex_exit(&zilog->zl_lock);
+
+next_lwb:
+	if (lwb->lwb_slim)
+		zilc = (zil_chain_t *)lwb->lwb_buf;
+	else
+		zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_nmax);
+	int wsz = lwb->lwb_sz;
+	if (lwb->lwb_error == 0) {
+		abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf, lwb->lwb_sz);
+		if (!lwb->lwb_slog || zilog->zl_cur_used <= zil_slog_bulk)
+			prio = ZIO_PRIORITY_SYNC_WRITE;
+		else
+			prio = ZIO_PRIORITY_ASYNC_WRITE;
+		SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET],
+		    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
+		    lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);
+		lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, spa, 0,
+		    &lwb->lwb_blk, lwb_abd, lwb->lwb_sz, zil_lwb_write_done,
+		    lwb, prio, ZIO_FLAG_CANFAIL, &zb);
+		zil_lwb_add_block(lwb, &lwb->lwb_blk);
+
+		if (lwb->lwb_slim) {
+			/* For Slim ZIL only write what is used. */
+			wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ,
+			    int);
+			ASSERT3S(wsz, <=, lwb->lwb_sz);
+			zio_shrink(lwb->lwb_write_zio, wsz);
+			wsz = lwb->lwb_write_zio->io_size;
 		}
+		memset(lwb->lwb_buf + lwb->lwb_nused, 0, wsz - lwb->lwb_nused);
+		zilc->zc_pad = 0;
+		zilc->zc_nused = lwb->lwb_nused;
+		zilc->zc_eck.zec_cksum = lwb->lwb_blk.blk_cksum;
 	} else {
-		spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
+		/*
+		 * We can't write the lwb if there was an allocation failure,
+		 * so create a null zio instead just to maintain dependencies.
+		 */
+		lwb->lwb_write_zio = zio_null(lwb->lwb_root_zio, spa, NULL,
+		    zil_lwb_write_done, lwb, ZIO_FLAG_CANFAIL);
+		lwb->lwb_write_zio->io_error = lwb->lwb_error;
 	}
-
-	if (ilwbs)
-		list_insert_tail(ilwbs, lwb);
+	if (lwb->lwb_child_zio)
+		zio_add_child(lwb->lwb_write_zio, lwb->lwb_child_zio);
 
 	/*
-	 * If there was an allocation failure then nlwb will be null which
-	 * forces a txg_wait_synced().
+	 * Open transaction to allocate the next block pointer.
 	 */
-	return (nlwb);
-}
+	dmu_tx_t *tx = dmu_tx_create(zilog->zl_os);
+	VERIFY0(dmu_tx_assign(tx, TXG_WAIT | TXG_NOTHROTTLE));
+	dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
+	uint64_t txg = dmu_tx_get_txg(tx);
 
-/*
- * Finalize previously closed block and issue the write zio.
- * Does not require locking.
- */
-static void
-zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
-{
-	zil_chain_t *zilc;
-	int wsz;
-
-	/* Actually fill the lwb with the data if not yet. */
-	if (!lwb->lwb_indirect) {
-		for (itx_t *itx = list_head(&lwb->lwb_itxs); itx;
-		    itx = list_next(&lwb->lwb_itxs, itx))
-			zil_lwb_commit(zilog, lwb, itx);
-		lwb->lwb_nused = lwb->lwb_nfilled;
+	/*
+	 * Allocate next the block pointer unless we are already in error.
+	 */
+	lwb_t *nlwb = list_next(&zilog->zl_lwb_list, lwb);
+	blkptr_t *bp = &zilc->zc_next_blk;
+	BP_ZERO(bp);
+	error = lwb->lwb_error;
+	if (error == 0) {
+		error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, nlwb->lwb_sz,
+		    &slog);
+	}
+	if (error == 0) {
+		ASSERT3U(bp->blk_birth, ==, txg);
+		BP_SET_CHECKSUM(bp, nlwb->lwb_slim ? ZIO_CHECKSUM_ZILOG2 :
+		    ZIO_CHECKSUM_ZILOG);
+		bp->blk_cksum = lwb->lwb_blk.blk_cksum;
+		bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++;
 	}
 
-	if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
-		/* For Slim ZIL only write what is used. */
-		wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, int);
-		ASSERT3S(wsz, <=, lwb->lwb_sz);
-		zio_shrink(lwb->lwb_write_zio, wsz);
-		wsz = lwb->lwb_write_zio->io_size;
+	/*
+	 * Reduce TXG open time by incrementing inflight counter and committing
+	 * the transaciton.  zil_sync() will wait for it to return to zero.
+	 */
+	mutex_enter(&zilog->zl_lwb_io_lock);
+	lwb->lwb_issued_txg = txg;
+	zilog->zl_lwb_inflight[txg & TXG_MASK]++;
+	zilog->zl_lwb_max_issued_txg = MAX(txg, zilog->zl_lwb_max_issued_txg);
+	mutex_exit(&zilog->zl_lwb_io_lock);
+	dmu_tx_commit(tx);
 
-		zilc = (zil_chain_t *)lwb->lwb_buf;
-	} else {
-		wsz = lwb->lwb_sz;
-		zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_sz);
-	}
-	zilc->zc_pad = 0;
-	zilc->zc_nused = lwb->lwb_nused;
-	zilc->zc_eck.zec_cksum = lwb->lwb_blk.blk_cksum;
+	spa_config_enter(spa, SCL_STATE, lwb, RW_READER);
 
 	/*
-	 * clear unused data for security
+	 * We've completed all potentially blocking operations.  Update the
+	 * nlwb and allow it proceed without possible lock order reversals.
 	 */
-	memset(lwb->lwb_buf + lwb->lwb_nused, 0, wsz - lwb->lwb_nused);
+	mutex_enter(&zilog->zl_lock);
+	zil_lwb_set_zio_dependency(zilog, lwb);
+	lwb->lwb_state = LWB_STATE_ISSUED;
+
+	if (nlwb) {
+		nlwb->lwb_blk = *bp;
+		nlwb->lwb_error = error;
+		nlwb->lwb_slog = slog;
+		nlwb->lwb_alloc_txg = txg;
+		if (nlwb->lwb_state != LWB_STATE_READY)
+			nlwb = NULL;
+	}
+	mutex_exit(&zilog->zl_lock);
 
 	if (lwb->lwb_slog) {
 		ZIL_STAT_BUMP(zilog, zil_itx_metaslab_slog_count);
@@ -1938,11 +1928,19 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
 		ZIL_STAT_INCR(zilog, zil_itx_metaslab_normal_alloc,
 		    BP_GET_LSIZE(&lwb->lwb_blk));
 	}
-	ASSERT(spa_config_held(zilog->zl_spa, SCL_STATE, RW_READER));
-	zil_lwb_add_block(lwb, &lwb->lwb_blk);
 	lwb->lwb_issued_timestamp = gethrtime();
 	zio_nowait(lwb->lwb_root_zio);
 	zio_nowait(lwb->lwb_write_zio);
+	if (lwb->lwb_child_zio)
+		zio_nowait(lwb->lwb_child_zio);
+
+	/*
+	 * If nlwb was ready when we gave it the block pointer,
+	 * it is on us to issue it and possibly following ones.
+	 */
+	lwb = nlwb;
+	if (lwb)
+		goto next_lwb;
 }
 
 /*
@@ -2014,10 +2012,7 @@ zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs)
 	 * For more details, see the comment above zil_commit().
 	 */
 	if (lr->lrc_txtype == TX_COMMIT) {
-		mutex_enter(&zilog->zl_lock);
 		zil_commit_waiter_link_lwb(itx->itx_private, lwb);
-		itx->itx_private = NULL;
-		mutex_exit(&zilog->zl_lock);
 		list_insert_tail(&lwb->lwb_itxs, itx);
 		return (lwb);
 	}
@@ -2036,17 +2031,17 @@ zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs)
 	 * If this record won't fit in the current log block, start a new one.
 	 * For WR_NEED_COPY optimize layout for minimal number of chunks.
 	 */
-	lwb_sp = lwb->lwb_sz - lwb->lwb_nused;
+	lwb_sp = lwb->lwb_nmax - lwb->lwb_nused;
 	max_log_data = zil_max_log_data(zilog, sizeof (lr_write_t));
 	if (reclen > lwb_sp || (reclen + dlen > lwb_sp &&
 	    lwb_sp < zil_max_waste_space(zilog) &&
 	    (dlen % max_log_data == 0 ||
 	    lwb_sp < reclen + dlen % max_log_data))) {
-		lwb = zil_lwb_write_close(zilog, lwb, ilwbs);
+		list_insert_tail(ilwbs, lwb);
+		lwb = zil_lwb_write_close(zilog, lwb, LWB_STATE_OPENED);
 		if (lwb == NULL)
 			return (NULL);
-		zil_lwb_write_open(zilog, lwb);
-		lwb_sp = lwb->lwb_sz - lwb->lwb_nused;
+		lwb_sp = lwb->lwb_nmax - lwb->lwb_nused;
 
 		/*
 		 * There must be enough space in the new, empty log block to
@@ -2084,7 +2079,7 @@ zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs)
 	clr->lrc_seq = ++zilog->zl_lr_seq;
 
 	lwb->lwb_nused += reclen + dnow;
-	ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz);
+	ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_nmax);
 	ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)));
 
 	zil_lwb_add_txg(lwb, lr->lrc_txg);
@@ -2096,22 +2091,9 @@ zil_lwb_assign(zilog_t *zilog, lwb_t *lwb, itx_t *itx, list_t *ilwbs)
 		goto cont;
 	}
 
-	/*
-	 * We have to really issue all queued LWBs before we may have to
-	 * wait for a txg sync.  Otherwise we may end up in a dead lock.
-	 */
-	if (lr->lrc_txtype == TX_WRITE) {
-		boolean_t frozen = lr->lrc_txg > spa_freeze_txg(zilog->zl_spa);
-		if (frozen || itx->itx_wr_state == WR_INDIRECT) {
-			lwb_t *tlwb;
-			while ((tlwb = list_remove_head(ilwbs)) != NULL)
-				zil_lwb_write_issue(zilog, tlwb);
-		}
-		if (itx->itx_wr_state == WR_INDIRECT)
-			lwb->lwb_indirect = B_TRUE;
-		if (frozen)
-			txg_wait_synced(zilog->zl_dmu_pool, lr->lrc_txg);
-	}
+	if (lr->lrc_txtype == TX_WRITE &&
+	    lr->lrc_txg > spa_freeze_txg(zilog->zl_spa))
+		txg_wait_synced(zilog->zl_dmu_pool, lr->lrc_txg);
 
 	return (lwb);
 }
@@ -2174,26 +2156,24 @@ zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx)
 				ZIL_STAT_BUMP(zilog, zil_itx_indirect_count);
 				ZIL_STAT_INCR(zilog, zil_itx_indirect_bytes,
 				    lrw->lr_length);
+				if (lwb->lwb_child_zio == NULL) {
+					lwb->lwb_child_zio = zio_root(
+					    zilog->zl_spa, NULL, NULL,
+					    ZIO_FLAG_CANFAIL);
+				}
 			}
 
 			/*
-			 * We pass in the "lwb_write_zio" rather than
-			 * "lwb_root_zio" so that the "lwb_write_zio"
-			 * becomes the parent of any zio's created by
-			 * the "zl_get_data" callback. The vdevs are
-			 * flushed after the "lwb_write_zio" completes,
-			 * so we want to make sure that completion
-			 * callback waits for these additional zio's,
-			 * such that the vdevs used by those zio's will
-			 * be included in the lwb's vdev tree, and those
-			 * vdevs will be properly flushed. If we passed
-			 * in "lwb_root_zio" here, then these additional
-			 * vdevs may not be flushed; e.g. if these zio's
-			 * completed after "lwb_write_zio" completed.
+			 * The "lwb_child_zio" we pass in will become a child of
+			 * "lwb_write_zio", when one is created, so one will be
+			 * a parent of any zio's created by the "zl_get_data".
+			 * This way "lwb_write_zio" will first wait for children
+			 * block pointers before own writing, and then for their
+			 * writing completion before the vdev cache flushing.
 			 */
 			error = zilog->zl_get_data(itx->itx_private,
 			    itx->itx_gen, lrwb, dbuf, lwb,
-			    lwb->lwb_write_zio);
+			    lwb->lwb_child_zio);
 			if (dbuf != NULL && error == 0) {
 				/* Zero any padding bytes in the last block. */
 				memset((char *)dbuf + lrwb->lr_length, 0,
@@ -2226,12 +2206,8 @@ zil_lwb_commit(zilog_t *zilog, lwb_t *lwb, itx_t *itx)
 				    error);
 				zfs_fallthrough;
 			case EIO:
-				if (lwb->lwb_indirect) {
-					txg_wait_synced(zilog->zl_dmu_pool,
-					    lr->lrc_txg);
-				} else {
-					lwb->lwb_write_zio->io_error = error;
-				}
+				txg_wait_synced(zilog->zl_dmu_pool,
+				    lr->lrc_txg);
 				zfs_fallthrough;
 			case ENOENT:
 				zfs_fallthrough;
@@ -2675,7 +2651,6 @@ zil_prune_commit_list(zilog_t *zilog)
 			zil_commit_waiter_skip(itx->itx_private);
 		} else {
 			zil_commit_waiter_link_lwb(itx->itx_private, last_lwb);
-			itx->itx_private = NULL;
 		}
 
 		mutex_exit(&zilog->zl_lock);
@@ -2753,10 +2728,9 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
 		 * have already been created (zl_lwb_list not empty).
 		 */
 		zil_commit_activate_saxattr_feature(zilog);
-		ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED);
-		ASSERT3S(lwb->lwb_state, !=, LWB_STATE_WRITE_DONE);
-		ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE);
-		first = (lwb->lwb_state != LWB_STATE_OPENED) &&
+		ASSERT(lwb->lwb_state == LWB_STATE_NEW ||
+		    lwb->lwb_state == LWB_STATE_OPENED);
+		first = (lwb->lwb_state == LWB_STATE_NEW) &&
 		    ((plwb = list_prev(&zilog->zl_lwb_list, lwb)) == NULL ||
 		    plwb->lwb_state == LWB_STATE_FLUSH_DONE);
 	}
@@ -2880,37 +2854,32 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
 	} else {
 		ASSERT(list_is_empty(&nolwb_waiters));
 		ASSERT3P(lwb, !=, NULL);
-		ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED);
-		ASSERT3S(lwb->lwb_state, !=, LWB_STATE_WRITE_DONE);
-		ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE);
+		ASSERT(lwb->lwb_state == LWB_STATE_NEW ||
+		    lwb->lwb_state == LWB_STATE_OPENED);
 
 		/*
 		 * At this point, the ZIL block pointed at by the "lwb"
-		 * variable is in one of the following states: "closed"
-		 * or "open".
+		 * variable is in "new" or "opened" state.
 		 *
-		 * If it's "closed", then no itxs have been committed to
-		 * it, so there's no point in issuing its zio (i.e. it's
-		 * "empty").
+		 * If it's "new", then no itxs have been committed to it, so
+		 * there's no point in issuing its zio (i.e. it's "empty").
 		 *
-		 * If it's "open", then it contains one or more itxs that
+		 * If it's "opened", then it contains one or more itxs that
 		 * eventually need to be committed to stable storage. In
 		 * this case we intentionally do not issue the lwb's zio
 		 * to disk yet, and instead rely on one of the following
 		 * two mechanisms for issuing the zio:
 		 *
-		 * 1. Ideally, there will be more ZIL activity occurring
-		 * on the system, such that this function will be
-		 * immediately called again (not necessarily by the same
-		 * thread) and this lwb's zio will be issued via
-		 * zil_lwb_assign(). This way, the lwb is guaranteed to
-		 * be "full" when it is issued to disk, and we'll make
-		 * use of the lwb's size the best we can.
+		 * 1. Ideally, there will be more ZIL activity occurring on
+		 * the system, such that this function will be immediately
+		 * called again by different thread and this lwb will be
+		 * closed by zil_lwb_assign().  This way, the lwb will be
+		 * "full" when it is issued to disk, and we'll make use of
+		 * the lwb's size the best we can.
 		 *
 		 * 2. If there isn't sufficient ZIL activity occurring on
-		 * the system, such that this lwb's zio isn't issued via
-		 * zil_lwb_assign(), zil_commit_waiter() will issue the
-		 * lwb's zio. If this occurs, the lwb is not guaranteed
+		 * the system, zil_commit_waiter() will close it and issue
+		 * the zio.  If this occurs, the lwb is not guaranteed
 		 * to be "full" by the time its zio is issued, and means
 		 * the size of the lwb was "too large" given the amount
 		 * of ZIL activity occurring on the system at that time.
@@ -2940,8 +2909,11 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
 			hrtime_t sleep = zilog->zl_last_lwb_latency *
 			    zfs_commit_timeout_pct / 100;
 			if (sleep < zil_min_commit_timeout ||
-			    lwb->lwb_sz - lwb->lwb_nused < lwb->lwb_sz / 8) {
-				lwb = zil_lwb_write_close(zilog, lwb, ilwbs);
+			    lwb->lwb_nmax - lwb->lwb_nused <
+			    lwb->lwb_nmax / 8) {
+				list_insert_tail(ilwbs, lwb);
+				lwb = zil_lwb_write_close(zilog, lwb,
+				    LWB_STATE_NEW);
 				zilog->zl_cur_used = 0;
 				if (lwb == NULL) {
 					while ((lwb = list_remove_head(ilwbs))
@@ -3024,7 +2996,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
 
 	lwb_t *lwb = zcw->zcw_lwb;
 	ASSERT3P(lwb, !=, NULL);
-	ASSERT3S(lwb->lwb_state, !=, LWB_STATE_CLOSED);
+	ASSERT3S(lwb->lwb_state, !=, LWB_STATE_NEW);
 
 	/*
 	 * If the lwb has already been issued by another thread, we can
@@ -3033,9 +3005,7 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
 	 * do this prior to acquiring the zl_issuer_lock, to avoid
 	 * acquiring it when it's not necessary to do so.
 	 */
-	if (lwb->lwb_state == LWB_STATE_ISSUED ||
-	    lwb->lwb_state == LWB_STATE_WRITE_DONE ||
-	    lwb->lwb_state == LWB_STATE_FLUSH_DONE)
+	if (lwb->lwb_state != LWB_STATE_OPENED)
 		return;
 
 	/*
@@ -3058,8 +3028,8 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
 	 * wind up with a use-after-free error below.
 	 */
 	if (zcw->zcw_done) {
-		lwb = NULL;
-		goto out;
+		mutex_exit(&zilog->zl_issuer_lock);
+		return;
 	}
 
 	ASSERT3P(lwb, ==, zcw->zcw_lwb);
@@ -3070,28 +3040,33 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
 	 * second time while holding the lock.
 	 *
 	 * We don't need to hold the zl_lock since the lwb cannot transition
-	 * from OPENED to ISSUED while we hold the zl_issuer_lock. The lwb
-	 * _can_ transition from ISSUED to DONE, but it's OK to race with
+	 * from OPENED to CLOSED while we hold the zl_issuer_lock. The lwb
+	 * _can_ transition from CLOSED to DONE, but it's OK to race with
 	 * that transition since we treat the lwb the same, whether it's in
-	 * the ISSUED or DONE states.
+	 * the CLOSED, ISSUED or DONE states.
 	 *
 	 * The important thing, is we treat the lwb differently depending on
-	 * if it's ISSUED or OPENED, and block any other threads that might
-	 * attempt to issue this lwb. For that reason we hold the
+	 * if it's OPENED or CLOSED, and block any other threads that might
+	 * attempt to close/issue this lwb. For that reason we hold the
 	 * zl_issuer_lock when checking the lwb_state; we must not call
-	 * zil_lwb_write_close() if the lwb had already been issued.
+	 * zil_lwb_write_close() if the lwb had already been closed/issued.
 	 *
 	 * See the comment above the lwb_state_t structure definition for
 	 * more details on the lwb states, and locking requirements.
 	 */
-	if (lwb->lwb_state == LWB_STATE_ISSUED ||
-	    lwb->lwb_state == LWB_STATE_WRITE_DONE ||
-	    lwb->lwb_state == LWB_STATE_FLUSH_DONE) {
-		lwb = NULL;
-		goto out;
+	if (lwb->lwb_state != LWB_STATE_OPENED) {
+		mutex_exit(&zilog->zl_issuer_lock);
+		return;
 	}
 
-	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED);
+	/*
+	 * We do not need zcw_lock once we hold zl_issuer_lock and know lwb
+	 * is still open.  But we have to drop it to avoid a deadlock in case
+	 * callback of zio issued by zil_lwb_write_issue() try to get it,
+	 * while zil_lwb_write_issue() is blocked on attempt to issue next
+	 * lwb it found in LWB_STATE_READY state.
+	 */
+	mutex_exit(&zcw->zcw_lock);
 
 	/*
 	 * As described in the comments above zil_commit_waiter() and
@@ -3099,9 +3074,9 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
 	 * since we've reached the commit waiter's timeout and it still
 	 * hasn't been issued.
 	 */
-	lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, NULL);
+	lwb_t *nlwb = zil_lwb_write_close(zilog, lwb, LWB_STATE_NEW);
 
-	ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED);
+	ASSERT3S(lwb->lwb_state, ==, LWB_STATE_CLOSED);
 
 	/*
 	 * Since the lwb's zio hadn't been issued by the time this thread
@@ -3124,34 +3099,15 @@ zil_commit_waiter_timeout(zilog_t *zilog, zil_commit_waiter_t *zcw)
 		 * "next" lwb on-disk. When this occurs, the ZIL write
 		 * pipeline must be stalled; see the comment within the
 		 * zil_commit_writer_stall() function for more details.
-		 *
-		 * We must drop the commit waiter's lock prior to
-		 * calling zil_commit_writer_stall() or else we can wind
-		 * up with the following deadlock:
-		 *
-		 * - This thread is waiting for the txg to sync while
-		 *   holding the waiter's lock; txg_wait_synced() is
-		 *   used within txg_commit_writer_stall().
-		 *
-		 * - The txg can't sync because it is waiting for this
-		 *   lwb's zio callback to call dmu_tx_commit().
-		 *
-		 * - The lwb's zio callback can't call dmu_tx_commit()
-		 *   because it's blocked trying to acquire the waiter's
-		 *   lock, which occurs prior to calling dmu_tx_commit()
 		 */
-		mutex_exit(&zcw->zcw_lock);
 		zil_lwb_write_issue(zilog, lwb);
-		lwb = NULL;
 		zil_commit_writer_stall(zilog);
-		mutex_enter(&zcw->zcw_lock);
-	}
-
-out:
-	mutex_exit(&zilog->zl_issuer_lock);
-	if (lwb)
+		mutex_exit(&zilog->zl_issuer_lock);
+	} else {
+		mutex_exit(&zilog->zl_issuer_lock);
 		zil_lwb_write_issue(zilog, lwb);
-	ASSERT(MUTEX_HELD(&zcw->zcw_lock));
+	}
+	mutex_enter(&zcw->zcw_lock);
 }
 
 /*
@@ -3216,7 +3172,7 @@ zil_commit_waiter(zilog_t *zilog, zil_commit_waiter_t *zcw)
 		 * where it's "zcw_lwb" field is NULL, and it hasn't yet
 		 * been skipped, so it's "zcw_done" field is still B_FALSE.
 		 */
-		IMPLY(lwb != NULL, lwb->lwb_state != LWB_STATE_CLOSED);
+		IMPLY(lwb != NULL, lwb->lwb_state != LWB_STATE_NEW);
 
 		if (lwb != NULL && lwb->lwb_state == LWB_STATE_OPENED) {
 			ASSERT3B(timedout, ==, B_FALSE);
@@ -3264,6 +3220,8 @@ zil_commit_waiter(zilog_t *zilog, zil_commit_waiter_t *zcw)
 			 */
 
 			IMPLY(lwb != NULL,
+			    lwb->lwb_state == LWB_STATE_CLOSED ||
+			    lwb->lwb_state == LWB_STATE_READY ||
 			    lwb->lwb_state == LWB_STATE_ISSUED ||
 			    lwb->lwb_state == LWB_STATE_WRITE_DONE ||
 			    lwb->lwb_state == LWB_STATE_FLUSH_DONE);
@@ -3618,10 +3576,11 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
 	while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
 		zh->zh_log = lwb->lwb_blk;
 		if (lwb->lwb_state != LWB_STATE_FLUSH_DONE ||
-		    lwb->lwb_max_txg > txg)
+		    lwb->lwb_alloc_txg > txg || lwb->lwb_max_txg > txg)
 			break;
 		list_remove(&zilog->zl_lwb_list, lwb);
-		zio_free(spa, txg, &lwb->lwb_blk);
+		if (!BP_IS_HOLE(&lwb->lwb_blk))
+			zio_free(spa, txg, &lwb->lwb_blk);
 		zil_free_lwb(zilog, lwb);
 
 		/*
@@ -3825,17 +3784,18 @@ zil_close(zilog_t *zilog)
 	}
 
 	mutex_enter(&zilog->zl_lock);
+	txg = zilog->zl_dirty_max_txg;
 	lwb = list_tail(&zilog->zl_lwb_list);
-	if (lwb == NULL)
-		txg = zilog->zl_dirty_max_txg;
-	else
-		txg = MAX(zilog->zl_dirty_max_txg, lwb->lwb_max_txg);
+	if (lwb != NULL) {
+		txg = MAX(txg, lwb->lwb_alloc_txg);
+		txg = MAX(txg, lwb->lwb_max_txg);
+	}
 	mutex_exit(&zilog->zl_lock);
 
 	/*
 	 * zl_lwb_max_issued_txg may be larger than lwb_max_txg. It depends
 	 * on the time when the dmu_tx transaction is assigned in
-	 * zil_lwb_write_close().
+	 * zil_lwb_write_issue().
 	 */
 	mutex_enter(&zilog->zl_lwb_io_lock);
 	txg = MAX(zilog->zl_lwb_max_issued_txg, txg);
@@ -3864,8 +3824,7 @@ zil_close(zilog_t *zilog)
 	lwb = list_remove_head(&zilog->zl_lwb_list);
 	if (lwb != NULL) {
 		ASSERT(list_is_empty(&zilog->zl_lwb_list));
-		ASSERT3S(lwb->lwb_state, !=, LWB_STATE_ISSUED);
-
+		ASSERT3S(lwb->lwb_state, ==, LWB_STATE_NEW);
 		zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
 		zil_free_lwb(zilog, lwb);
 	}
@@ -3986,7 +3945,7 @@ zil_suspend(const char *osname, void **cookiep)
 
 	/*
 	 * We need to use zil_commit_impl to ensure we wait for all
-	 * LWB_STATE_OPENED and LWB_STATE_ISSUED lwbs to be committed
+	 * LWB_STATE_OPENED, _CLOSED and _READY lwbs to be committed
 	 * to disk before proceeding. If we used zil_commit instead, it
 	 * would just call txg_wait_synced(), because zl_suspend is set.
 	 * txg_wait_synced() doesn't wait for these lwb's to be
diff --git a/module/zfs/zio.c b/module/zfs/zio.c
index b5627109900c..7458b416c292 100644
--- a/module/zfs/zio.c
+++ b/module/zfs/zio.c
@@ -4466,8 +4466,8 @@ zio_ready(zio_t *zio)
 	zio_t *pio, *pio_next;
 	zio_link_t *zl = NULL;
 
-	if (zio_wait_for_children(zio, ZIO_CHILD_GANG_BIT | ZIO_CHILD_DDT_BIT,
-	    ZIO_WAIT_READY)) {
+	if (zio_wait_for_children(zio, ZIO_CHILD_LOGICAL_BIT |
+	    ZIO_CHILD_GANG_BIT | ZIO_CHILD_DDT_BIT, ZIO_WAIT_READY)) {
 		return (NULL);
 	}
 
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index cd4e6f0c7558..f3bb35447dd6 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -698,7 +698,6 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
 	int error;
 
 	ASSERT3P(lwb, !=, NULL);
-	ASSERT3P(zio, !=, NULL);
 	ASSERT3U(size, !=, 0);
 
 	zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
@@ -717,6 +716,7 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
 		error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf,
 		    DMU_READ_NO_PREFETCH);
 	} else { /* indirect write */
+		ASSERT3P(zio, !=, NULL);
 		/*
 		 * Have to lock the whole block to ensure when it's written out
 		 * and its checksum is being calculated that no one can change