From 794fd184a5d0ed8007c23a21ccedce34ea959ae6 Mon Sep 17 00:00:00 2001 From: Jake Smith Date: Thu, 7 Dec 2023 19:12:53 +0000 Subject: [PATCH] HPCC-30994 Ensure jobId's removed from log manager in worker Also set jobId earlier in containerized, meaning that some early logging is tagged with the correct jobId instead of "UNK". Signed-off-by: Jake Smith --- thorlcr/slave/slavmain.cpp | 1 + thorlcr/slave/thslavemain.cpp | 55 ++++++++++++++++++++--------------- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/thorlcr/slave/slavmain.cpp b/thorlcr/slave/slavmain.cpp index 928ad5e1a86..cb7cd0b8954 100644 --- a/thorlcr/slave/slavmain.cpp +++ b/thorlcr/slave/slavmain.cpp @@ -1885,6 +1885,7 @@ class CJobListener : public CSimpleInterface ILogMsgFilter *existingLogHandler = queryLogMsgManager()->queryMonitorFilter(logHandler); dbgassertex(existingLogHandler); verifyex(queryLogMsgManager()->changeMonitorFilterOwn(logHandler, getCategoryLogMsgFilter(existingLogHandler->queryAudienceMask(), existingLogHandler->queryClassMask(), maxLogDetail))); + queryLogMsgManager()->removeJobId(thorJob.queryJobID()); LogMsgJobId thorJobId = queryLogMsgManager()->addJobId(wuid); thorJob.setJobID(thorJobId); setDefaultJobId(thorJobId); diff --git a/thorlcr/slave/thslavemain.cpp b/thorlcr/slave/thslavemain.cpp index 2d055c94719..45916bf96f2 100644 --- a/thorlcr/slave/thslavemain.cpp +++ b/thorlcr/slave/thslavemain.cpp @@ -292,24 +292,35 @@ class CReleaseMutex : public CSimpleInterface, public Mutex ILogMsgHandler *startSlaveLog() { ILogMsgHandler *logHandler = nullptr; -#ifndef _CONTAINERIZED - StringBuffer fileName("thorslave"); - Owned lf = createComponentLogFileCreator(globals->queryProp("@logDir"), "thor"); - StringBuffer slaveNumStr; - lf->setPostfix(slaveNumStr.append(mySlaveNum).str()); - lf->setCreateAliasFile(false); - lf->setName(fileName.str());//override default filename - logHandler = lf->beginLogging(); + if (!isContainerized()) + { + StringBuffer fileName("thorslave"); + Owned lf = createComponentLogFileCreator(globals->queryProp("@logDir"), "thor"); + StringBuffer slaveNumStr; + lf->setPostfix(slaveNumStr.append(mySlaveNum).str()); + lf->setCreateAliasFile(false); + lf->setName(fileName.str());//override default filename + logHandler = lf->beginLogging(); #ifndef _DEBUG - // keep duplicate logging output to stderr to aide debugging - queryLogMsgManager()->removeMonitor(queryStderrLogMsgHandler()); + // keep duplicate logging output to stderr to aide debugging + queryLogMsgManager()->removeMonitor(queryStderrLogMsgHandler()); #endif - LOG(MCdebugProgress, thorJob, "Opened log file %s", lf->queryLogFileSpec()); -#else - setupContainerizedLogMsgHandler(); - logHandler = queryStderrLogMsgHandler(); -#endif + LOG(MCdebugProgress, thorJob, "Opened log file %s", lf->queryLogFileSpec()); + } + else + { + setupContainerizedLogMsgHandler(); + logHandler = queryStderrLogMsgHandler(); + StringBuffer wuid; + if (getComponentConfigSP()->getProp("@workunit", wuid)) + { + LogMsgJobId thorJobId = queryLogMsgManager()->addJobId(wuid); + thorJob.setJobID(thorJobId); + setDefaultJobId(thorJobId); + } + } + //setupContainerizedStorageLocations(); LOG(MCdebugProgress, thorJob, "Build %s", hpccBuildInfo.buildTag); return logHandler; @@ -385,12 +396,9 @@ int main( int argc, const char *argv[] ) usage(); mySlaveNum = globals->getPropInt("@slavenum", NotFound); - /* NB: in cloud/non-local storage mode, slave number is not known until after registration with the master - * For the time being log file names are based on their slave number, so can only start when known. - */ - ILogMsgHandler *slaveLogHandler = nullptr; - if (NotFound != mySlaveNum) - slaveLogHandler = startSlaveLog(); + if (!isContainerized() && (NotFound == mySlaveNum)) + throw makeStringException(0, "Slave number not specified (@slavenum)"); + ILogMsgHandler *slaveLogHandler = startSlaveLog(); // In container world, SLAVE= will not be used const char *slave = globals->queryProp("@slave"); @@ -427,9 +435,8 @@ int main( int argc, const char *argv[] ) if (RegisterSelf(masterEp)) { - if (!slaveLogHandler) - slaveLogHandler = startSlaveLog(); - + if (globals->getPropBool("@MPChannelReconnect")) + getMPServer()->setOpt(mpsopt_channelreopen, "true"); if (getExpertOptBool("slaveDaliClient")) enableThorSlaveAsDaliClient();