Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/ericb/tags/pull-nbd-2020-07-28'…
Browse files Browse the repository at this point in the history
… into staging

nbd patches for 2020-07-28

- fix NBD handling of trim/zero requests larger than 2G
- allow no-op resizes on NBD (in turn fixing qemu-img convert -c into NBD)
- several deadlock fixes when using NBD reconnect

# gpg: Signature made Tue 28 Jul 2020 15:59:42 BST
# gpg:                using RSA key 71C2CC22B1C4602927D2F3AAA7A16B4A2527436A
# gpg: Good signature from "Eric Blake <[email protected]>" [full]
# gpg:                 aka "Eric Blake (Free Software Programmer) <[email protected]>" [full]
# gpg:                 aka "[jpeg image of size 6874]" [full]
# Primary key fingerprint: 71C2 CC22 B1C4 6029 27D2  F3AA A7A1 6B4A 2527 436A

* remotes/ericb/tags/pull-nbd-2020-07-28:
  block/nbd: nbd_co_reconnect_loop(): don't sleep if drained
  block/nbd: on shutdown terminate connection attempt
  block/nbd: allow drain during reconnect attempt
  block/nbd: split nbd_establish_connection out of nbd_client_connect
  iotests: Test convert to qcow2 compressed to NBD
  iotests: Add more qemu_img helpers
  iotests: Make qemu_nbd_popen() a contextmanager
  block: nbd: Fix convert qcow2 compressed to nbd
  nbd: Fix large trim/zero requests

Signed-off-by: Peter Maydell <[email protected]>
  • Loading branch information
pm215 committed Jul 28, 2020
2 parents b175383 + 12c75e2 commit 5045be8
Show file tree
Hide file tree
Showing 10 changed files with 342 additions and 89 deletions.
126 changes: 95 additions & 31 deletions block/nbd.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,10 @@ typedef struct BDRVNBDState {
char *x_dirty_bitmap;
} BDRVNBDState;

static int nbd_client_connect(BlockDriverState *bs, Error **errp);
static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
Error **errp);
static int nbd_client_handshake(BlockDriverState *bs, QIOChannelSocket *sioc,
Error **errp);

static void nbd_clear_bdrvstate(BDRVNBDState *s)
{
Expand Down Expand Up @@ -206,11 +209,15 @@ static void nbd_teardown_connection(BlockDriverState *bs)
{
BDRVNBDState *s = (BDRVNBDState *)bs->opaque;

if (s->state == NBD_CLIENT_CONNECTED) {
if (s->ioc) {
/* finish any pending coroutines */
assert(s->ioc);
qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
} else if (s->sioc) {
/* abort negotiation */
qio_channel_shutdown(QIO_CHANNEL(s->sioc), QIO_CHANNEL_SHUTDOWN_BOTH,
NULL);
}

s->state = NBD_CLIENT_QUIT;
if (s->connection_co) {
if (s->connection_co_sleep_ns_state) {
Expand Down Expand Up @@ -241,7 +248,9 @@ static bool nbd_client_connecting_wait(BDRVNBDState *s)

static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
{
int ret;
Error *local_err = NULL;
QIOChannelSocket *sioc;

if (!nbd_client_connecting(s)) {
return;
Expand Down Expand Up @@ -280,19 +289,39 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
s->ioc = NULL;
}

s->connect_status = nbd_client_connect(s->bs, &local_err);
sioc = nbd_establish_connection(s->saddr, &local_err);
if (!sioc) {
ret = -ECONNREFUSED;
goto out;
}

bdrv_dec_in_flight(s->bs);

ret = nbd_client_handshake(s->bs, sioc, &local_err);

if (s->drained) {
s->wait_drained_end = true;
while (s->drained) {
/*
* We may be entered once from nbd_client_attach_aio_context_bh
* and then from nbd_client_co_drain_end. So here is a loop.
*/
qemu_coroutine_yield();
}
}
bdrv_inc_in_flight(s->bs);

out:
s->connect_status = ret;
error_free(s->connect_err);
s->connect_err = NULL;
error_propagate(&s->connect_err, local_err);

if (s->connect_status < 0) {
/* failed attempt */
return;
if (ret >= 0) {
/* successfully connected */
s->state = NBD_CLIENT_CONNECTED;
qemu_co_queue_restart_all(&s->free_sema);
}

/* successfully connected */
s->state = NBD_CLIENT_CONNECTED;
qemu_co_queue_restart_all(&s->free_sema);
}

static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s)
Expand All @@ -312,8 +341,6 @@ static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s)
qemu_co_queue_restart_all(&s->free_sema);
}

qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout,
&s->connection_co_sleep_ns_state);
if (s->drained) {
bdrv_dec_in_flight(s->bs);
s->wait_drained_end = true;
Expand All @@ -325,9 +352,12 @@ static coroutine_fn void nbd_co_reconnect_loop(BDRVNBDState *s)
qemu_coroutine_yield();
}
bdrv_inc_in_flight(s->bs);
}
if (timeout < max_timeout) {
timeout *= 2;
} else {
qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout,
&s->connection_co_sleep_ns_state);
if (timeout < max_timeout) {
timeout *= 2;
}
}

nbd_reconnect_attempt(s);
Expand Down Expand Up @@ -1425,24 +1455,18 @@ static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
return sioc;
}

static int nbd_client_connect(BlockDriverState *bs, Error **errp)
/* nbd_client_handshake takes ownership on sioc. On failure it is unref'ed. */
static int nbd_client_handshake(BlockDriverState *bs, QIOChannelSocket *sioc,
Error **errp)
{
BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
AioContext *aio_context = bdrv_get_aio_context(bs);
int ret;

/*
* establish TCP connection, return error if it fails
* TODO: Configurable retry-until-timeout behaviour.
*/
QIOChannelSocket *sioc = nbd_establish_connection(s->saddr, errp);
trace_nbd_client_handshake(s->export);

if (!sioc) {
return -ECONNREFUSED;
}
s->sioc = sioc;

/* NBD handshake */
trace_nbd_client_connect(s->export);
qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
qio_channel_attach_aio_context(QIO_CHANNEL(sioc), aio_context);

Expand All @@ -1457,6 +1481,7 @@ static int nbd_client_connect(BlockDriverState *bs, Error **errp)
g_free(s->info.name);
if (ret < 0) {
object_unref(OBJECT(sioc));
s->sioc = NULL;
return ret;
}
if (s->x_dirty_bitmap && !s->info.base_allocation) {
Expand All @@ -1482,14 +1507,12 @@ static int nbd_client_connect(BlockDriverState *bs, Error **errp)
}
}

s->sioc = sioc;

if (!s->ioc) {
s->ioc = QIO_CHANNEL(sioc);
object_ref(OBJECT(s->ioc));
}

trace_nbd_client_connect_success(s->export);
trace_nbd_client_handshake_success(s->export);

return 0;

Expand All @@ -1504,6 +1527,7 @@ static int nbd_client_connect(BlockDriverState *bs, Error **errp)
nbd_send_request(s->ioc ?: QIO_CHANNEL(sioc), &request);

object_unref(OBJECT(sioc));
s->sioc = NULL;

return ret;
}
Expand Down Expand Up @@ -1894,6 +1918,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
{
int ret;
BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
QIOChannelSocket *sioc;

ret = nbd_process_options(bs, options, errp);
if (ret < 0) {
Expand All @@ -1904,7 +1929,16 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
qemu_co_mutex_init(&s->send_mutex);
qemu_co_queue_init(&s->free_sema);

ret = nbd_client_connect(bs, errp);
/*
* establish TCP connection, return error if it fails
* TODO: Configurable retry-until-timeout behaviour.
*/
sioc = nbd_establish_connection(s->saddr, errp);
if (!sioc) {
return -ECONNREFUSED;
}

ret = nbd_client_handshake(bs, sioc, errp);
if (ret < 0) {
nbd_clear_bdrvstate(s);
return ret;
Expand Down Expand Up @@ -1966,6 +2000,33 @@ static void nbd_close(BlockDriverState *bs)
nbd_clear_bdrvstate(s);
}

/*
* NBD cannot truncate, but if the caller asks to truncate to the same size, or
* to a smaller size with exact=false, there is no reason to fail the
* operation.
*
* Preallocation mode is ignored since it does not seems useful to fail when
* we never change anything.
*/
static int coroutine_fn nbd_co_truncate(BlockDriverState *bs, int64_t offset,
bool exact, PreallocMode prealloc,
BdrvRequestFlags flags, Error **errp)
{
BDRVNBDState *s = bs->opaque;

if (offset != s->info.size && exact) {
error_setg(errp, "Cannot resize NBD nodes");
return -ENOTSUP;
}

if (offset > s->info.size) {
error_setg(errp, "Cannot grow NBD nodes");
return -EINVAL;
}

return 0;
}

static int64_t nbd_getlength(BlockDriverState *bs)
{
BDRVNBDState *s = bs->opaque;
Expand Down Expand Up @@ -2045,6 +2106,7 @@ static BlockDriver bdrv_nbd = {
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_co_truncate = nbd_co_truncate,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_client_detach_aio_context,
.bdrv_attach_aio_context = nbd_client_attach_aio_context,
Expand Down Expand Up @@ -2072,6 +2134,7 @@ static BlockDriver bdrv_nbd_tcp = {
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_co_truncate = nbd_co_truncate,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_client_detach_aio_context,
.bdrv_attach_aio_context = nbd_client_attach_aio_context,
Expand Down Expand Up @@ -2099,6 +2162,7 @@ static BlockDriver bdrv_nbd_unix = {
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_co_truncate = nbd_co_truncate,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_client_detach_aio_context,
.bdrv_attach_aio_context = nbd_client_attach_aio_context,
Expand Down
4 changes: 2 additions & 2 deletions block/trace-events
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ nbd_parse_blockstatus_compliance(const char *err) "ignoring extra data from non-
nbd_structured_read_compliance(const char *type) "server sent non-compliant unaligned read %s chunk"
nbd_read_reply_entry_fail(int ret, const char *err) "ret = %d, err: %s"
nbd_co_request_fail(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, uint16_t type, const char *name, int ret, const char *err) "Request failed { .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", .type = %" PRIu16 " (%s) } ret = %d, err: %s"
nbd_client_connect(const char *export_name) "export '%s'"
nbd_client_connect_success(const char *export_name) "export '%s'"
nbd_client_handshake(const char *export_name) "export '%s'"
nbd_client_handshake_success(const char *export_name) "export '%s'"

# ssh.c
ssh_restart_coroutine(void *co) "co=%p"
Expand Down
28 changes: 23 additions & 5 deletions nbd/server.c
Original file line number Diff line number Diff line change
Expand Up @@ -2378,8 +2378,17 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
flags |= BDRV_REQ_NO_FALLBACK;
}
ret = blk_pwrite_zeroes(exp->blk, request->from + exp->dev_offset,
request->len, flags);
ret = 0;
/* FIXME simplify this when blk_pwrite_zeroes switches to 64-bit */
while (ret >= 0 && request->len) {
int align = client->check_align ?: 1;
int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
align));
ret = blk_pwrite_zeroes(exp->blk, request->from + exp->dev_offset,
len, flags);
request->len -= len;
request->from += len;
}
return nbd_send_generic_reply(client, request->handle, ret,
"writing to file failed", errp);

Expand All @@ -2393,9 +2402,18 @@ static coroutine_fn int nbd_handle_request(NBDClient *client,
"flush failed", errp);

case NBD_CMD_TRIM:
ret = blk_co_pdiscard(exp->blk, request->from + exp->dev_offset,
request->len);
if (ret == 0 && request->flags & NBD_CMD_FLAG_FUA) {
ret = 0;
/* FIXME simplify this when blk_co_pdiscard switches to 64-bit */
while (ret >= 0 && request->len) {
int align = client->check_align ?: 1;
int len = MIN(request->len, QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
align));
ret = blk_co_pdiscard(exp->blk, request->from + exp->dev_offset,
len);
request->len -= len;
request->from += len;
}
if (ret >= 0 && request->flags & NBD_CMD_FLAG_FUA) {
ret = blk_co_flush(exp->blk);
}
return nbd_send_generic_reply(client, request->handle, ret,
Expand Down
2 changes: 1 addition & 1 deletion qemu-io-cmds.c
Original file line number Diff line number Diff line change
Expand Up @@ -1715,7 +1715,7 @@ static int truncate_f(BlockBackend *blk, int argc, char **argv)
* exact=true. It is better to err on the "emit more errors" side
* than to be overly permissive.
*/
ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, 0, &local_err);
ret = blk_truncate(blk, offset, false, PREALLOC_MODE_OFF, 0, &local_err);
if (ret < 0) {
error_report_err(local_err);
return ret;
Expand Down
Loading

0 comments on commit 5045be8

Please sign in to comment.