From 0692c7fb6c70315ef3d90f4c18fd28f2d2c888eb Mon Sep 17 00:00:00 2001 From: Adrian Muzyka Date: Wed, 8 May 2024 15:43:09 +0200 Subject: [PATCH] Try to recover plugin by wakeup on Hibernation error Especially the socket read timeout was not covered causing PID to be hibernated by memcr but in ACTIVATED state. Now it will try to recover by calling a wakeup what may help at least to unblock the PID and let it crash. --- Source/WPEFramework/PluginServer.cpp | 20 +++++++++---------- .../checkpointserver/CheckpointServer.c | 11 +++++++--- Source/extensions/hibernate/hibernate.h | 1 + 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/Source/WPEFramework/PluginServer.cpp b/Source/WPEFramework/PluginServer.cpp index 5c954417e..95b1f0597 100644 --- a/Source/WPEFramework/PluginServer.cpp +++ b/Source/WPEFramework/PluginServer.cpp @@ -731,11 +731,12 @@ namespace PluginHost { if (result == HIBERNATE_ERROR_NONE) { result = HibernateChildren(parentPID, timeout); - if(result != Core::ERROR_NONE && result != Core::ERROR_ABORTED) { - //wakeup Parent process to revert Hibernation - TRACE(Activity, (_T("Wakeup plugin [%s] process [%u] on Hibernate error [%d]"), Callsign().c_str(), parentPID, result)); - WakeupProcess(timeout, parentPID, _administrator.Configuration().HibernateLocator().c_str(), _T(""), &_hibernateStorage); - } + } + + if (result != Core::ERROR_NONE && result != Core::ERROR_ABORTED) { + // try to wakeup Parent process to revert Hibernation and recover + TRACE(Activity, (_T("Wakeup plugin [%s] process [%u] on Hibernate error [%d]"), Callsign().c_str(), parentPID, result)); + WakeupProcess(timeout, parentPID, _administrator.Configuration().HibernateLocator().c_str(), _T(""), &_hibernateStorage); } Lock(); @@ -834,15 +835,12 @@ namespace PluginHost { if (result == Core::ERROR_ABORTED) { break; } - - if (result != HIBERNATE_ERROR_NONE) { - // revert Hibernation of parent - TRACE(Activity, (_T("Wakeup plugin [%s] process [%u] on Hibernate error [%d]"), Callsign().c_str(), *iter, result)); - WakeupProcess(timeout, *iter, _administrator.Configuration().HibernateLocator().c_str(), _T(""), &_hibernateStorage); - } } if (result != HIBERNATE_ERROR_NONE) { + // try to recover by reverting current Hibernations + TRACE(Activity, (_T("Wakeup plugin [%s] process [%u] on Hibernate error [%d]"), Callsign().c_str(), *iter, result)); + WakeupProcess(timeout, *iter, _administrator.Configuration().HibernateLocator().c_str(), _T(""), &_hibernateStorage); // revert previous Hibernations and break while (iter != childrenPIDs.begin()) { --iter; diff --git a/Source/extensions/hibernate/checkpointserver/CheckpointServer.c b/Source/extensions/hibernate/checkpointserver/CheckpointServer.c index e33c5eb2e..867ba2281 100644 --- a/Source/extensions/hibernate/checkpointserver/CheckpointServer.c +++ b/Source/extensions/hibernate/checkpointserver/CheckpointServer.c @@ -37,7 +37,8 @@ typedef enum { typedef enum { MEMCR_OK = 0, MEMCR_ERROR = -1, - MEMCR_INVALID_PID = -2 + MEMCR_INVALID_PID = -2, + MEMCR_SOCKET_READ_ERROR = -3 } ServerResponseCode; typedef struct { @@ -135,14 +136,15 @@ static bool SendRcvCmd(const ServerRequest* cmd, ServerResponse* resp, uint32_t ret = write(cd, cmd, sizeof(ServerRequest)); if (ret != sizeof(ServerRequest)) { - LOGERR("Socket write failed: ret %d", ret); + LOGERR("Socket write failed: ret %d, %m", ret); close(cd); return false; } ret = read(cd, resp, sizeof(ServerResponse)); if (ret != sizeof(ServerResponse)) { - LOGERR("Socket read failed: ret %d", ret); + LOGERR("Socket read failed: ret %d, %m", ret); + resp->respCode = MEMCR_SOCKET_READ_ERROR; close(cd); return false; } @@ -163,6 +165,9 @@ uint32_t HibernateProcess(const uint32_t timeout, const pid_t pid, const char da if (SendRcvCmd(&req, &resp, timeout, data_dir)) { LOGINFO("Hibernate process PID %d success", pid); return HIBERNATE_ERROR_NONE; + } else if (resp.respCode == MEMCR_SOCKET_READ_ERROR) { + LOGERR("Error Hibernate timeout process PID %d ret %d", pid, resp.respCode); + return HIBERNATE_ERROR_TIMEOUT; } else { LOGERR("Error Hibernate process PID %d ret %d", pid, resp.respCode); return HIBERNATE_ERROR_GENERAL; diff --git a/Source/extensions/hibernate/hibernate.h b/Source/extensions/hibernate/hibernate.h index 1f9666574..888371416 100644 --- a/Source/extensions/hibernate/hibernate.h +++ b/Source/extensions/hibernate/hibernate.h @@ -28,6 +28,7 @@ extern "C" { #define HIBERNATE_ERROR_NONE 0 #define HIBERNATE_ERROR_GENERAL 1 +#define HIBERNATE_ERROR_TIMEOUT 2 uint32_t HibernateProcess(const uint32_t timeout, const pid_t pid, const char data_dir[], const char volatile_dir[], void** storage); uint32_t WakeupProcess(const uint32_t timeout, const pid_t pid, const char data_dir[], const char volatile_dir[], void** storage);