From 503dc92f9106a41e987ede398899d893f4cd0a81 Mon Sep 17 00:00:00 2001 From: Gavin Halliday Date: Fri, 24 Nov 2023 15:06:25 +0000 Subject: [PATCH] HPCC-30917 Avoid core when eclagent terminates due to span destruction Signed-off-by: Gavin Halliday --- common/thorhelper/thorcommon.hpp | 4 ++ ecl/eclagent/eclagent.cpp | 2 +- esp/bindings/http/platform/httpservice.cpp | 24 ++++++---- esp/bindings/http/platform/httptransport.cpp | 5 +- esp/bindings/http/platform/httptransport.ipp | 2 +- esp/platform/esp.hpp | 4 +- roxie/ccd/ccd.hpp | 4 ++ roxie/ccd/ccdcontext.cpp | 4 ++ roxie/ccd/ccdlistener.cpp | 3 +- roxie/ccd/ccdserver.cpp | 8 ++++ system/jlib/jlog.cpp | 4 ++ system/jlib/jlog.hpp | 6 ++- system/jlib/jtrace.hpp | 48 ++++++++++++++++++++ thorlcr/graph/thgraphmaster.cpp | 2 +- thorlcr/graph/thgraphslave.cpp | 2 +- 15 files changed, 103 insertions(+), 19 deletions(-) diff --git a/common/thorhelper/thorcommon.hpp b/common/thorhelper/thorcommon.hpp index 9edd2ea3741..b13fc1ddd9b 100644 --- a/common/thorhelper/thorcommon.hpp +++ b/common/thorhelper/thorcommon.hpp @@ -717,6 +717,10 @@ class CStatsContextLogger : public CSimpleInterfaceOf { return traceLevel; } + virtual ISpan * queryActiveSpan() const override + { + return activeSpan; + } virtual void setActiveSpan(ISpan * span) override { activeSpan.set(span); diff --git a/ecl/eclagent/eclagent.cpp b/ecl/eclagent/eclagent.cpp index dc22e1f69dd..d8ea4e78835 100644 --- a/ecl/eclagent/eclagent.cpp +++ b/ecl/eclagent/eclagent.cpp @@ -2132,7 +2132,7 @@ void EclAgent::runProcess(IEclProcess *process) Owned traceHeaders = extractTraceDebugOptions(queryWorkUnit()); Owned requestSpan = queryTraceManager().createServerSpan(queryWorkUnit()->queryWuid(), traceHeaders); - updateDummyContextLogger().setActiveSpan(requestSpan); + ContextSpanScope spanScope(updateDummyContextLogger(), requestSpan); // a component may specify an alternate name for the agent/workflow memory area, // e.g. Thor specifies in "eclAgentMemory" diff --git a/esp/bindings/http/platform/httpservice.cpp b/esp/bindings/http/platform/httpservice.cpp index e6182d9bd6a..29adbe5c11c 100644 --- a/esp/bindings/http/platform/httpservice.cpp +++ b/esp/bindings/http/platform/httpservice.cpp @@ -377,18 +377,24 @@ int CEspHttpServer::processRequest() checkSetCORSAllowOrigin(thebinding, m_request, m_response); - if (thebinding!=NULL) + if (thebinding && thebinding->isUnrestrictedSSType(stype)) { - if (thebinding->isUnrestrictedSSType(stype)) - { - thebinding->onGetUnrestricted(m_request.get(), m_response.get(), serviceName.str(), methodName.str(), stype); - ctx->addTraceSummaryTimeStamp(LogMin, "handleHttp"); - return 0; - } + //Avoid creating a span for unrestrictedSSType requests + thebinding->onGetUnrestricted(m_request.get(), m_response.get(), serviceName.str(), methodName.str(), stype); + ctx->addTraceSummaryTimeStamp(LogMin, "handleHttp"); + return 0; + } - //Avoids unrestrictedSSType requests - m_request->startSpan(); + //The context will be destroyed when this request is destroyed. So rather than using + //EspContextSpanScope spanContext(*ctx, serverSpan); + //which would remove the activeSpan when this function exits, use + //setActiveSpan() + //It is possible that using EspContextSpanScope would be better + Owned serverSpan = m_request->createServerSpan(); + ctx->setActiveSpan(serverSpan); + if (thebinding!=NULL) + { if(stricmp(method.str(), POST_METHOD)==0) thebinding->handleHttpPost(m_request.get(), m_response.get()); else if(!stricmp(method.str(), GET_METHOD)) diff --git a/esp/bindings/http/platform/httptransport.cpp b/esp/bindings/http/platform/httptransport.cpp index 91c2af7adf0..b73f1722759 100644 --- a/esp/bindings/http/platform/httptransport.cpp +++ b/esp/bindings/http/platform/httptransport.cpp @@ -1915,12 +1915,11 @@ int CHttpRequest::receive(IMultiException *me) return 0; } -void CHttpRequest::startSpan() +ISpan * CHttpRequest::createServerSpan() { //MORE: The previous code would be better off querying httpHeaders... Owned httpHeaders = getHeadersAsProperties(m_headers); - Owned requestSpan = queryTraceManager().createServerSpan("HTTPRequest", httpHeaders, SpanFlags::EnsureGlobalId); - m_context->setActiveSpan(requestSpan); + return queryTraceManager().createServerSpan("HTTPRequest", httpHeaders, SpanFlags::EnsureGlobalId); } void CHttpRequest::annotateSpan(const char * key, const char * value) diff --git a/esp/bindings/http/platform/httptransport.ipp b/esp/bindings/http/platform/httptransport.ipp index 7bdea9ee0f3..55f0214ca19 100644 --- a/esp/bindings/http/platform/httptransport.ipp +++ b/esp/bindings/http/platform/httptransport.ipp @@ -369,7 +369,7 @@ public: virtual int receive(IMultiException *me); - void startSpan(); + ISpan * createServerSpan(); void updateContext(); void annotateSpan(const char * key, const char * value); diff --git a/esp/platform/esp.hpp b/esp/platform/esp.hpp index 54173681888..75dd142b14b 100644 --- a/esp/platform/esp.hpp +++ b/esp/platform/esp.hpp @@ -235,13 +235,15 @@ interface IEspContext : extends IInterface virtual void setRequest(IHttpMessage* req) = 0; virtual IHttpMessage* queryRequest() = 0; - virtual void setActiveSpan(ISpan * span)=0; + virtual void setActiveSpan(ISpan * span)=0; // Only call this function directly if this object's lifetime matches the lifetime of the span. If there is any doubt use EspContextSpanScope(ctx, span) virtual ISpan * queryActiveSpan() const = 0; virtual IProperties * getClientSpanHeaders() const = 0; virtual const char* getGlobalId() const = 0; virtual const char* getCallerId() const = 0; virtual const char* getLocalId() const = 0; }; +//Class for managing the active span in the context +using EspContextSpanScope = ContextSpanScopeImp; typedef unsigned LogLevel; diff --git a/roxie/ccd/ccd.hpp b/roxie/ccd/ccd.hpp index 0537b0f0b06..fea95771eb4 100644 --- a/roxie/ccd/ccd.hpp +++ b/roxie/ccd/ccd.hpp @@ -742,6 +742,10 @@ class ContextLogger : implements IRoxieContextLogger, public CInterface { stats.reset(); } + virtual ISpan * queryActiveSpan() const override + { + return activeSpan; + } virtual void setActiveSpan(ISpan * span) override { activeSpan.set(span); diff --git a/roxie/ccd/ccdcontext.cpp b/roxie/ccd/ccdcontext.cpp index 307e134f6d7..2daf8e0b853 100644 --- a/roxie/ccd/ccdcontext.cpp +++ b/roxie/ccd/ccdcontext.cpp @@ -1382,6 +1382,10 @@ class CRoxieContextBase : implements IRoxieAgentContext, implements ICodeContext { return logctx.queryTraceLevel(); } + virtual ISpan * queryActiveSpan() const override + { + return logctx.queryActiveSpan(); + } virtual void setActiveSpan(ISpan * span) override { const_cast(logctx).setActiveSpan(span); diff --git a/roxie/ccd/ccdlistener.cpp b/roxie/ccd/ccdlistener.cpp index 99a48198fe8..32ca4d076a3 100644 --- a/roxie/ccd/ccdlistener.cpp +++ b/roxie/ccd/ccdlistener.cpp @@ -1180,7 +1180,7 @@ class RoxieWorkUnitWorker : public RoxieQueryWorker Owned traceHeaders = extractTraceDebugOptions(wu); Owned requestSpan = queryTraceManager().createServerSpan(wu->queryWuid(), traceHeaders); - logctx->setActiveSpan(requestSpan); + ContextSpanScope spanScope(*logctx, requestSpan); Owned queryFactory; try @@ -1456,6 +1456,7 @@ class RoxieProtocolMsgContext : implements IHpccProtocolMsgContext, public CInte ensureContextLogger(); Owned requestSpan = queryTraceManager().createServerSpan("request", allHeaders, flags); + //The span has a lifetime the same length as the logctx, so no need to restore it at the end of the query logctx->setActiveSpan(requestSpan); const char * globalId = requestSpan->queryGlobalId(); diff --git a/roxie/ccd/ccdserver.cpp b/roxie/ccd/ccdserver.cpp index 3eeee9656d7..0aeebe0643d 100644 --- a/roxie/ccd/ccdserver.cpp +++ b/roxie/ccd/ccdserver.cpp @@ -276,6 +276,10 @@ class IndirectAgentContext : implements IRoxieAgentContext, public CInterface { return ctx->isBlind(); } + virtual ISpan * queryActiveSpan() const override + { + return ctx->queryActiveSpan(); + } virtual void setActiveSpan(ISpan * span) override { ctx->setActiveSpan(span); @@ -1351,6 +1355,10 @@ class CRoxieServerActivity : implements CInterfaceOf, impl return traceLevel; } + virtual ISpan * queryActiveSpan() const override + { + return ctx->queryActiveSpan(); + } virtual void setActiveSpan(ISpan * span) override { if (ctx) diff --git a/system/jlib/jlog.cpp b/system/jlib/jlog.cpp index 2ac17b56f35..5085d94906e 100644 --- a/system/jlib/jlog.cpp +++ b/system/jlib/jlog.cpp @@ -2868,6 +2868,10 @@ class DummyLogCtx : implements IContextLogger { return 0; } + virtual ISpan * queryActiveSpan() const override + { + return activeSpan; + } virtual void setActiveSpan(ISpan * span) override { activeSpan.set(span); diff --git a/system/jlib/jlog.hpp b/system/jlib/jlog.hpp index 185f7d128a9..7b7d30f8c7a 100644 --- a/system/jlib/jlog.hpp +++ b/system/jlib/jlog.hpp @@ -1271,12 +1271,16 @@ interface jlib_decl IContextLogger : extends IInterface virtual const char *queryLocalId() const = 0; virtual const char *queryCallerId() const = 0; virtual const CRuntimeStatisticCollection & queryStats() const = 0; - virtual void setActiveSpan(ISpan * span) = 0; + virtual ISpan * queryActiveSpan() const = 0; + virtual void setActiveSpan(ISpan * span) = 0; // Only call this function directly if this object's lifetime matches the lifetime of the span. If there is any doubt use ContextSpanScope(ctx, span) virtual IProperties * getClientHeaders() const = 0; virtual void recordStatistics(IStatisticGatherer &progress) const = 0; virtual const LogMsgJobInfo & queryJob() const { return unknownJob; } }; +using ContextSpanScope = ContextSpanScopeImp; + + extern jlib_decl StringBuffer &appendGloballyUniqueId(StringBuffer &s); extern jlib_decl const IContextLogger &queryDummyContextLogger(); diff --git a/system/jlib/jtrace.hpp b/system/jlib/jtrace.hpp index 2c1234b0622..5f823cb005d 100644 --- a/system/jlib/jtrace.hpp +++ b/system/jlib/jtrace.hpp @@ -75,6 +75,54 @@ extern jlib_decl ISpan * getNullSpan(); extern jlib_decl void initTraceManager(const char * componentName, const IPropertyTree * componentConfig, const IPropertyTree * globalConfig); extern jlib_decl ITraceManager & queryTraceManager(); +//The following class is responsible for ensuring that the active span is restored in a context when the scope is exited +//Use a template class so it can be reused for IContextLogger and IEspContext +template +class ContextSpanScopeImp +{ +public: + ContextSpanScopeImp(CONTEXT & _ctx, ISpan * span) + : ctx(_ctx) + { + prev.set(ctx.queryActiveSpan()); + ctx.setActiveSpan(span); + } + ~ContextSpanScopeImp() + { + ctx.setActiveSpan(prev); + } + +protected: + CONTEXT & ctx; + Owned prev; +}; + +// A variant of the class above that allows startSpan to be called after construction +template +class DynamicContextSpanScopeImp +{ +public: + ~DynamicContextSpanScopeImp() + { + finishSpan(); + } + void startSpan(CONTEXT & _ctx, ISpan * span) + { + ctx = &_ctx; + prev.set(ctx->queryActiveSpan()); + ctx->setActiveSpan(span); + } + void finishSpan() + { + if (ctx) + ctx->setActiveSpan(prev); + } + +protected: + CONTEXT * ctx{nullptr}; + Owned prev; +}; + /* To use feature-level tracing flags, protect the tracing with a test such as: diff --git a/thorlcr/graph/thgraphmaster.cpp b/thorlcr/graph/thgraphmaster.cpp index dae32d807ca..678ea25a8a6 100644 --- a/thorlcr/graph/thgraphmaster.cpp +++ b/thorlcr/graph/thgraphmaster.cpp @@ -1486,7 +1486,7 @@ CJobMaster::CJobMaster(IConstWorkUnit &_workunit, const char *graphName, ILoaded Owned traceHeaders = extractTraceDebugOptions(workunit); Owned requestSpan = queryTraceManager().createServerSpan(workunit->queryWuid(), traceHeaders); - logctx->setActiveSpan(requestSpan); + ContextSpanScope spanScope(*logctx, requestSpan); resumed = WUActionResume == workunit->getAction(); fatalHandler.setown(new CFatalHandler(globals->getPropInt("@fatal_timeout", FATAL_TIMEOUT))); diff --git a/thorlcr/graph/thgraphslave.cpp b/thorlcr/graph/thgraphslave.cpp index 66e49f4e1fa..76b1acfcd94 100644 --- a/thorlcr/graph/thgraphslave.cpp +++ b/thorlcr/graph/thgraphslave.cpp @@ -1681,7 +1681,7 @@ CJobSlave::CJobSlave(ISlaveWatchdog *_watchdog, IPropertyTree *_workUnitInfo, co Owned traceHeaders = deserializeTraceDebugOptions(workUnitInfo->queryPropTree("Debug")); Owned requestSpan = queryTraceManager().createServerSpan(wuid, traceHeaders); - logctx->setActiveSpan(requestSpan); + ContextSpanScope spanScope(*logctx, requestSpan); oldNodeCacheMem = 0; slavemptag = _slavemptag;