Skip to content

Commit

Permalink
rbd-wnbd: optionally handle wnbd adapter restart events
Browse files Browse the repository at this point in the history
The WNBD adapter may be reset in certain situations (e.g. driver
upgrade, MS WHQL tests, etc).

We're going to monitor the WNBD adapter using WMI[1] events, restarting
the rbd-wnbd disk mappings whenever necessary. Adapter monitoring can be
enabled by passing the --adapter-monitoring-enabled flag to the service.

This feature is optional for the following reasons:

* it's mainly used during development / driver certification
* we had to use a relatively small polling interval, which might imply
  additional resource usage. WMI quotas also have to be considered.

While at it, we're updating two lambdas that are submitted to thread pools,
avoiding default reference capturing and explicitly specifying the variables
that get copied.

[1] https://learn.microsoft.com/en-us/windows/win32/wmisdk/wmi-start-page

Signed-off-by: Lucian Petrut <[email protected]>
  • Loading branch information
petrutlucian94 committed Mar 24, 2023
1 parent a47caa1 commit 0c25ca6
Show file tree
Hide file tree
Showing 7 changed files with 543 additions and 23 deletions.
2 changes: 2 additions & 0 deletions src/dokan/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
*
*/

#pragma once

#include "include/compat.h"

void to_filetime(time_t t, LPFILETIME pft);
Expand Down
2 changes: 2 additions & 0 deletions src/include/win32/fs_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// Those definitions allow handling information coming from Ceph and should
// not be passed to Windows functions.

#pragma once

#define S_IFLNK 0120000

#define S_ISTYPE(m, TYPE) ((m & S_IFMT) == TYPE)
Expand Down
3 changes: 2 additions & 1 deletion src/tools/rbd_wnbd/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
add_executable(rbd-wnbd wnbd_handler.cc rbd_wnbd.cc)
add_executable(rbd-wnbd rbd_wnbd.cc wnbd_handler.cc wnbd_wmi.cc)
set_target_properties(
rbd-wnbd PROPERTIES COMPILE_FLAGS
"-fpermissive -I${WNBD_INCLUDE_DIRS}")
target_link_libraries(
rbd-wnbd setupapi rpcrt4
wbemuuid oleaut32
${WNBD_LIBRARIES}
${Boost_FILESYSTEM_LIBRARY}
librbd librados global)
Expand Down
188 changes: 166 additions & 22 deletions src/tools/rbd_wnbd/rbd_wnbd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,14 @@
*
*/

#include <objidl.h>
// LOCK_WRITE is also defined by objidl.h, we have to avoid
// a collision.
#undef LOCK_WRITE

#include "include/int_types.h"

#include <atomic>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
Expand All @@ -21,6 +27,7 @@
#include <unistd.h>

#include "wnbd_handler.h"
#include "wnbd_wmi.h"
#include "rbd_wnbd.h"

#include <fstream>
Expand Down Expand Up @@ -54,6 +61,17 @@

using namespace std;

// Wait 2s before recreating the wmi subscription in case of errors
#define WMI_SUBSCRIPTION_RETRY_INTERVAL 2
// SCSI adapter modification events aren't received until the entire polling
// interval has elapsed (unlike other WMI classes, such as Msvm_ComputerSystem).
// With longer intervals, it even seems to miss events. For this reason,
// we're using a relatively short interval but have adapter state monitoring
// as an optional feature, mainly used for dev / driver certification purposes.
#define WNBD_ADAPTER_WMI_POLL_INTERVAL 2
// Wait for wmi events up to two seconds
#define WMI_EVENT_TIMEOUT 2

bool is_process_running(DWORD pid)
{
HANDLE process = OpenProcess(SYNCHRONIZE, FALSE, pid);
Expand Down Expand Up @@ -106,8 +124,16 @@ DWORD WNBDActiveDiskIterator::fetch_list(
WNBDActiveDiskIterator::WNBDActiveDiskIterator()
{
DWORD status = WNBDActiveDiskIterator::fetch_list(&conn_list);
if (status) {
switch (status) {
case 0:
// no error
break;
case ERROR_OPEN_FAILED:
error = ENOENT;
break;
default:
error = EINVAL;
break;
}
}

Expand Down Expand Up @@ -559,7 +585,10 @@ int restart_registered_mappings(
{
Config cfg;
WNBDDiskIterator iterator;
int err = 0, r;
int r;
std::atomic<int> err = 0;

dout(0) << "remounting persistent disks" << dendl;

int total_timeout_ms = max(total_timeout, total_timeout * 1000);
int image_map_timeout_ms = max(image_map_timeout, image_map_timeout * 1000);
Expand Down Expand Up @@ -593,7 +622,8 @@ int restart_registered_mappings(
}

boost::asio::post(pool,
[&, cfg]() mutable
[cfg, start_t, counter_freq, total_timeout_ms,
image_map_timeout_ms, &err]()
{
LARGE_INTEGER curr_t, elapsed_ms;
QueryPerformanceCounter(&curr_t);
Expand All @@ -615,7 +645,7 @@ int restart_registered_mappings(

// We'll try to map all devices and return a non-zero value
// if any of them fails.
r = map_device_using_suprocess(cfg.command_line, time_left_ms);
int r = map_device_using_suprocess(cfg.command_line, time_left_ms);
if (r) {
err = r;
derr << "Could not create mapping: "
Expand Down Expand Up @@ -650,15 +680,17 @@ int disconnect_all_mappings(

Config cfg;
WNBDActiveDiskIterator iterator;
int err = 0, r;
int r;
std::atomic<int> err = 0;

boost::asio::thread_pool pool(worker_count);
LARGE_INTEGER start_t, counter_freq;
QueryPerformanceFrequency(&counter_freq);
QueryPerformanceCounter(&start_t);
while (iterator.get(&cfg)) {
boost::asio::post(pool,
[&, cfg]() mutable
[cfg, start_t, counter_freq, timeout_ms,
hard_disconnect, unregister, &err]() mutable
{
LARGE_INTEGER curr_t, elapsed_ms;
QueryPerformanceCounter(&curr_t);
Expand All @@ -677,7 +709,7 @@ int disconnect_all_mappings(
<< "s. Hard disconnect: " << cfg.hard_disconnect
<< dendl;

r = do_unmap(&cfg, unregister);
int r = do_unmap(&cfg, unregister);
if (r) {
err = r;
derr << "Could not remove mapping: " << cfg.devpath
Expand All @@ -690,7 +722,11 @@ int disconnect_all_mappings(
pool.join();

r = iterator.get_error();
if (r) {
if (r == ENOENT) {
dout(0) << __func__ << ": wnbd adapter unavailable, "
<< "assuming that no wnbd mappings exist." << dendl;
err = 0;
} else if (r) {
derr << "Could not fetch all mappings. Error: " << r << dendl;
err = r;
}
Expand All @@ -706,21 +742,31 @@ class RBDService : public ServiceBase {
int service_start_timeout;
int image_map_timeout;
bool remap_failure_fatal;
bool adapter_monitoring_enabled;

std::thread adapter_monitor_thread;

ceph::mutex start_lock = ceph::make_mutex("RBDService::StartLocker");
ceph::mutex shutdown_lock = ceph::make_mutex("RBDService::ShutdownLocker");
bool started = false;
std::atomic<bool> stop_requsted = false;

public:
RBDService(bool _hard_disconnect,
int _soft_disconnect_timeout,
int _thread_count,
int _service_start_timeout,
int _image_map_timeout,
bool _remap_failure_fatal)
bool _remap_failure_fatal,
bool _adapter_monitoring_enabled)
: ServiceBase(g_ceph_context)
, hard_disconnect(_hard_disconnect)
, soft_disconnect_timeout(_soft_disconnect_timeout)
, thread_count(_thread_count)
, service_start_timeout(_service_start_timeout)
, image_map_timeout(_image_map_timeout)
, remap_failure_fatal(_remap_failure_fatal)
, adapter_monitoring_enabled(_adapter_monitoring_enabled)
{
}

Expand Down Expand Up @@ -863,7 +909,79 @@ class RBDService : public ServiceBase {
return err;
}

void monitor_wnbd_adapter()
{
dout(5) << __func__ << ": initializing COM" << dendl;
// Initialize the Windows COM library for this thread.
COMBootstrapper com_bootstrapper;
HRESULT hres = com_bootstrapper.initialize();
if (FAILED(hres)) {
return;
}

WmiSubscription subscription = subscribe_wnbd_adapter_events(
WNBD_ADAPTER_WMI_POLL_INTERVAL);
dout(5) << __func__ << ": initializing wmi subscription" << dendl;
hres = subscription.initialize();

dout(0) << "monitoring wnbd adapter state changes" << dendl;
// The event watcher will wait at most WMI_EVENT_TIMEOUT (2s)
// and exit the loop if the service is being stopped.
while (!stop_requsted) {
IWbemClassObject* object;
ULONG returned = 0;

if (FAILED(hres)) {
derr << "couldn't retrieve wnbd adapter events, wmi hresult: "
<< hres << ". Reestablishing wmi listener in "
<< WMI_SUBSCRIPTION_RETRY_INTERVAL << " seconds." << dendl;
subscription.close();
Sleep(WMI_SUBSCRIPTION_RETRY_INTERVAL * 1000);

dout(20) << "recreating wnbd adapter wmi subscription" << dendl;
subscription = subscribe_wnbd_adapter_events(
WNBD_ADAPTER_WMI_POLL_INTERVAL);
hres = subscription.initialize();
continue;
}

dout(20) << "fetching wnbd adapter events" << dendl;
hres = subscription.next(
WMI_EVENT_TIMEOUT * 1000,
1, // we'll process one event at a time
&object,
&returned);

if (!FAILED(hres) && returned) {
if (WBEM_S_NO_ERROR == object->InheritsFrom(L"__InstanceCreationEvent")) {
dout(0) << "wnbd adapter (re)created, remounting disks" << dendl;
restart_registered_mappings(
thread_count, service_start_timeout, image_map_timeout);
} else if (WBEM_S_NO_ERROR == object->InheritsFrom(L"__InstanceDeletionEvent")) {
dout(0) << "wnbd adapter removed" << dendl;
// nothing to do here
} else if (WBEM_S_NO_ERROR == object->InheritsFrom(L"__InstanceModificationEvent")) {
dout(0) << "wnbd adapter changed" << dendl;
// TODO: look for state changes and log the availability/status
}

object->Release();
}
}

dout(10) << "service stop requested, wnbd event monitor exited" << dendl;
}

int run_hook() override {
std::unique_lock l{start_lock};
if (started) {
// The run hook is only supposed to be called once per process,
// however we're staying cautious.
derr << "Service already running." << dendl;
return -EALREADY;
}

started = true;
// Restart registered mappings before accepting new ones.
int r = restart_registered_mappings(
thread_count, service_start_timeout, image_map_timeout);
Expand All @@ -876,14 +994,33 @@ class RBDService : public ServiceBase {
}
}

if (adapter_monitoring_enabled) {
adapter_monitor_thread = std::thread(&monitor_wnbd_adapter, this);
} else {
dout(0) << "WNBD adapter monitoring disabled." << dendl;
}

return create_pipe_server();
}

// Invoked when the service is requested to stop.
int stop_hook() override {
return disconnect_all_mappings(
std::unique_lock l{shutdown_lock};

stop_requsted = true;

int r = disconnect_all_mappings(
false, hard_disconnect, soft_disconnect_timeout, thread_count);

if (adapter_monitor_thread.joinable()) {
dout(10) << "waiting for wnbd event monitor thread" << dendl;
adapter_monitor_thread.join();
dout(10) << "wnbd event monitor stopped" << dendl;
}

return r;
}

// Invoked when the system is shutting down.
int shutdown_hook() override {
return stop_hook();
Expand Down Expand Up @@ -954,17 +1091,21 @@ Unmap options:
unflushed caches or open handles. Default: 15
Service options:
--hard-disconnect Skip attempting a soft disconnect
--soft-disconnect-timeout Cummulative soft disconnect timeout in seconds,
used when disconnecting existing mappings. A hard
disconnect will be issued when hitting the timeout
--service-thread-count The number of workers used when mapping or
unmapping images. Default: 8
--start-timeout The service start timeout in seconds. Default: 120
--map-timeout Individual image map timeout in seconds. Default: 20
--remap-failure-fatal If set, the service will stop when failing to remap
an image at start time, unmapping images that have
been mapped so far.
--hard-disconnect Skip attempting a soft disconnect
--soft-disconnect-timeout Cummulative soft disconnect timeout in seconds,
used when disconnecting existing mappings. A hard
disconnect will be issued when hitting the timeout
--service-thread-count The number of workers used when mapping or
unmapping images. Default: 8
--start-timeout The service start timeout in seconds. Default: 120
--map-timeout Individual image map timeout in seconds. Default: 20
--remap-failure-fatal If set, the service will stop when failing to remap
an image at start time, unmapping images that have
been mapped so far.
--adapter-monitoring-enabled If set, the service will monitor WNBD adapter WMI
events and remount the images when the adapter gets
recreated. Mainly used for development and driver
certification purposes.
Show|List options:
--format plain|json|xml Output format (default: plain)
Expand Down Expand Up @@ -1478,6 +1619,8 @@ static int parse_args(std::vector<const char*>& args,
cfg->pretty_format = true;
} else if (ceph_argparse_flag(args, i, "--remap-failure-fatal", (char *)NULL)) {
cfg->remap_failure_fatal = true;
} else if (ceph_argparse_flag(args, i, "--adapter-monitoring-enabled", (char *)NULL)) {
cfg->adapter_monitoring_enabled = true;
} else if (ceph_argparse_witharg(args, i, &cfg->parent_pipe, err,
"--pipe-name", (char *)NULL)) {
if (!err.str().empty()) {
Expand Down Expand Up @@ -1691,7 +1834,8 @@ static int rbd_wnbd(int argc, const char *argv[])
cfg.service_thread_count,
cfg.service_start_timeout,
cfg.image_map_timeout,
cfg.remap_failure_fatal);
cfg.remap_failure_fatal,
cfg.adapter_monitoring_enabled);
// This call will block until the service stops.
r = RBDService::initialize(&service);
if (r < 0)
Expand Down
1 change: 1 addition & 0 deletions src/tools/rbd_wnbd/rbd_wnbd.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ struct Config {
int service_start_timeout = DEFAULT_SERVICE_START_TIMEOUT;
int image_map_timeout = DEFAULT_IMAGE_MAP_TIMEOUT;
bool remap_failure_fatal = false;
bool adapter_monitoring_enabled = false;

// TODO: consider moving those fields to a separate structure. Those
// provide connection information without actually being configurable.
Expand Down
Loading

0 comments on commit 0c25ca6

Please sign in to comment.