From ba205f161842333608e0f7c268654d0b3be41586 Mon Sep 17 00:00:00 2001 From: Jake Smith Date: Thu, 19 Oct 2023 17:57:37 +0100 Subject: [PATCH 1/9] HPCC-29215 Add DFU Copy Ensure functionality Add an 'ensure' option to dfu copy request, which copies the file if necessary, but published the meta information only if the physical files already exist. Signed-off-by: Jake Smith --- dali/dfu/dfurun.cpp | 43 +++++++++++++++++++++++++---- dali/dfu/dfuwu.cpp | 12 +++++++- dali/dfu/dfuwu.hpp | 2 ++ esp/scm/ws_fs.ecm | 1 + esp/services/ws_fs/ws_fsService.cpp | 1 + 5 files changed, 53 insertions(+), 6 deletions(-) diff --git a/dali/dfu/dfurun.cpp b/dali/dfu/dfurun.cpp index 72989b5ed34..36de1b7a9eb 100644 --- a/dali/dfu/dfurun.cpp +++ b/dali/dfu/dfurun.cpp @@ -1442,6 +1442,8 @@ class CDFUengine: public CInterface, implements IDFUengine } break; } + + bool ensureLfnAlreadyPublished = false; // fill dstfile for commands that need it switch (cmd) { case DFUcmd_copymerge: @@ -1542,6 +1544,14 @@ class CDFUengine: public CInterface, implements IDFUengine Owned oldfile = wsdfs::lookup(tmp.str(),userdesc,AccessMode::tbdWrite,false,false,nullptr,defaultPrivilegedUser,INFINITE); if (oldfile) { + if (options->getEnsure()) + { + // logical file already exists. + ensureLfnAlreadyPublished = true; + dstFile.setown(oldfile.getClear()); + dstName.set(tmp); + break; + } StringBuffer reason; bool canRemove = oldfile->canRemove(reason); oldfile.clear(); @@ -1700,12 +1710,35 @@ class CDFUengine: public CInterface, implements IDFUengine } } else { - fsys.copy(srcFile,dstFile,recovery, recoveryconn, filter, opttree, &feedback, &abortnotify, dfuwuid); - if (!abortnotify.abortRequested()) { - if (needrep) - replicating = true; + bool performCopy = true; + if (options->getEnsure()) + { + if (ensureLfnAlreadyPublished) + performCopy = false; else - dstFile->attach(dstName.get(),userdesc); + { + if (dstFile->existsPhysicalPartFiles(0)) + { + dstFile->attach(dstName.get(), userdesc); + performCopy = false; + } + } + if (!performCopy) + { + feedback.repmode=cProgressReporter::REPnone; + feedback.displaySummary(nullptr, 0); + Audit("COPYENSURE", userdesc, srcFile?srcName.str():nullptr, dstName.get()); + } + } + if (performCopy) + { + fsys.copy(srcFile,dstFile,recovery, recoveryconn, filter, opttree, &feedback, &abortnotify, dfuwuid); + if (!abortnotify.abortRequested()) { + if (needrep) + replicating = true; + else + dstFile->attach(dstName.get(),userdesc); + } Audit("COPY",userdesc,srcFile?srcName.str():NULL,dstName.get()); } } diff --git a/dali/dfu/dfuwu.cpp b/dali/dfu/dfuwu.cpp index 2444d37fd41..26306e8db90 100644 --- a/dali/dfu/dfuwu.cpp +++ b/dali/dfu/dfuwu.cpp @@ -2002,6 +2002,11 @@ class CDFUoptions: public CLinkedDFUWUchild, implements IDFUoptions return queryRoot()->getPropInt("@overwrite")!=0; } + bool getEnsure() const + { + return queryRoot()->getPropInt("@ensure")!=0; + } + DFUreplicateMode getReplicateMode(StringBuffer &cluster, bool &repeatlast,bool &onlyrepeated) const { repeatlast = false; @@ -2146,7 +2151,7 @@ class CDFUoptions: public CLinkedDFUWUchild, implements IDFUoptions queryRoot()->setPropInt("@throttle",val); } - void setTransferBufferSize(unsigned val) + void setTransferBufferSize(size32_t val) { queryRoot()->setPropInt("@transferBufferSize",val); } @@ -2161,6 +2166,11 @@ class CDFUoptions: public CLinkedDFUWUchild, implements IDFUoptions queryRoot()->setPropInt("@overwrite",val?1:0); } + void setEnsure(bool val=true) + { + queryRoot()->setPropInt("@ensure",val?1:0); + } + void setReplicateMode(DFUreplicateMode val,const char *cluster=NULL,bool repeatlast=false,bool onlyrepeated=false) { queryRoot()->setPropInt("@replicatemode",(int)val); diff --git a/dali/dfu/dfuwu.hpp b/dali/dfu/dfuwu.hpp index 59c15a49e9d..923630a719c 100644 --- a/dali/dfu/dfuwu.hpp +++ b/dali/dfu/dfuwu.hpp @@ -153,6 +153,7 @@ interface IConstDFUoptions : extends IInterface virtual size32_t getTransferBufferSize() const = 0; virtual bool getVerify() const = 0; virtual bool getOverwrite() const = 0; + virtual bool getEnsure() const = 0; virtual DFUreplicateMode getReplicateMode(StringBuffer &cluster, bool &repeatlast,bool &onlyrepeated) const = 0; virtual const char *queryPartFilter() const = 0; virtual bool getKeepHeader() const = 0; @@ -195,6 +196,7 @@ interface IDFUoptions : extends IConstDFUoptions virtual void setTransferBufferSize(size32_t val) = 0; virtual void setVerify(bool val=true) = 0; virtual void setOverwrite(bool val=true) = 0; + virtual void setEnsure(bool val=true) = 0; virtual void setReplicateMode(DFUreplicateMode val,const char *cluster=NULL,bool repeatlast=false,bool onlyrepeated=false) = 0; virtual void setPartFilter(const char *filter) = 0; // format n,n-n,n etc virtual void setKeepHeader(bool val=true) = 0; diff --git a/esp/scm/ws_fs.ecm b/esp/scm/ws_fs.ecm index 6aab351fb1c..064039e1e39 100644 --- a/esp/scm/ws_fs.ecm +++ b/esp/scm/ws_fs.ecm @@ -481,6 +481,7 @@ ESPrequest [nil_remove] Copy string srcusername; string srcpassword; bool overwrite; + bool ensure; bool replicate; int ReplicateOffset(1); diff --git a/esp/services/ws_fs/ws_fsService.cpp b/esp/services/ws_fs/ws_fsService.cpp index dd41506ce37..10bab1b8edf 100644 --- a/esp/services/ws_fs/ws_fsService.cpp +++ b/esp/services/ws_fs/ws_fsService.cpp @@ -2790,6 +2790,7 @@ bool CFileSprayEx::onCopy(IEspContext &context, IEspCopy &req, IEspCopyResponse wuFSpecDest->setLogicalName(dstname); wuFSpecDest->setFileMask(fileMask.str()); wuOptions->setOverwrite(req.getOverwrite()); + wuOptions->setEnsure(req.getEnsure()); wuOptions->setPreserveCompression(req.getPreserveCompression()); if (!req.getExpireDays_isNull()) wuOptions->setExpireDays(req.getExpireDays()); From 87d20d891f8edad4f0260ba1304effb4ddf5c6b7 Mon Sep 17 00:00:00 2001 From: Tim Klemm Date: Thu, 9 Nov 2023 15:14:47 -0500 Subject: [PATCH 2/9] HPCC-30768 Ignore hpcc.gitpatch Add second path to hpcc.gitpatch sans leading "./" for improved matching. Retain existing path in case in functions in some other environment. Signed-off-by: Tim Klemm --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 7f646324d87..75596d3e688 100644 --- a/.gitignore +++ b/.gitignore @@ -16,5 +16,6 @@ eclcc.log *.pyc ./helm/examples/azure/sa/env-sa ./dockerfiles/platform-build-incremental/hpcc.gitpatch +dockerfiles/platform-build-incremental/hpcc.gitpatch .env /vcpkg.json From dccec153967f8c74ceb7cf1d09155af03c300fbe Mon Sep 17 00:00:00 2001 From: Jake Smith Date: Fri, 10 Nov 2023 15:04:38 +0000 Subject: [PATCH 3/9] HPCC-30807 Fix foreign access to striped storage Pass plane info back to client, which is used to decide whether to map. NB: also used by plane aliasing. Signed-off-by: Jake Smith --- dali/base/dadfs.cpp | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/dali/base/dadfs.cpp b/dali/base/dadfs.cpp index 6ad138b9e3e..c6c1fdb2442 100644 --- a/dali/base/dadfs.cpp +++ b/dali/base/dadfs.cpp @@ -11173,12 +11173,21 @@ class CDaliDFSServer: public Thread, public CTransactionLogTracker, implements I Owned tree = getNamedPropTree(sroot,queryDfsXmlBranchName(DXB_File),"@name",tail.str(),false); if (tree) { -#ifdef _CONTAINERIZED - // This is for bare-metal clients using ~foreign pointing at a containerized/k8s setup, - // asking for the returned meta data to be remapped to point to the dafilesrv service. - if (hasMask(opts, GetFileTreeOpts::remapToService)) - remapGroupsToDafilesrv(tree, &queryNamedGroupStore()); -#endif + if (isContainerized()) + { + // This is for bare-metal clients using ~foreign pointing at a containerized/k8s setup, + // asking for the returned meta data to be remapped to point to the dafilesrv service. + if (hasMask(opts, GetFileTreeOpts::remapToService)) + { + remapGroupsToDafilesrv(tree, &queryNamedGroupStore()); + + const char *remotePlaneName = tree->queryProp("@group"); + Owned filePlane = getStoragePlane(remotePlaneName); + assertex(filePlane); + // Used by DFS clients to determine if stripe and/or alias translation needed + tree->setPropTree("Attr/_remoteStoragePlane", createPTreeFromIPT(filePlane)); + } + } Owned fdesc = deserializeFileDescriptorTree(tree,&queryNamedGroupStore(),IFDSF_EXCLUDE_CLUSTERNAMES); mb.append((int)1); // 1 == standard file From 5534d92d76e55fc3c3464e23f9114a4a927c0db4 Mon Sep 17 00:00:00 2001 From: Gordon Smith Date: Tue, 14 Nov 2023 08:27:02 +0100 Subject: [PATCH 4/9] HPCC-30772 Add "Download DOT" to metrics Signed-off-by: Gordon Smith --- esp/src/src-react/components/Metrics.tsx | 130 +++++++++++++---------- esp/src/src-react/util/metricGraph.ts | 2 +- esp/src/src/nls/hpcc.ts | 1 + 3 files changed, 75 insertions(+), 58 deletions(-) diff --git a/esp/src/src-react/components/Metrics.tsx b/esp/src/src-react/components/Metrics.tsx index 1292c779860..717533a92ca 100644 --- a/esp/src/src-react/components/Metrics.tsx +++ b/esp/src/src-react/components/Metrics.tsx @@ -154,63 +154,6 @@ export const Metrics: React.FunctionComponent = ({ pushUrl(`/workunits/${wuid}/metrics/${selection}`); }, [wuid, selection]); - // Command Bar --- - const buttons = React.useMemo((): ICommandBarItemProps[] => [ - { - key: "refresh", text: nlsHPCC.Refresh, iconProps: { iconName: "Refresh" }, - onClick: () => refresh() - }, - { - key: "hotspot", text: nlsHPCC.Hotspots, iconProps: { iconName: "SpeedHigh" }, - disabled: !hotspots, onClick: () => onHotspot() - }, - { key: "divider_1", itemType: ContextualMenuItemType.Divider, onRender: () => }, - { - key: "timeline", text: nlsHPCC.Timeline, canCheck: true, checked: showTimeline, iconProps: { iconName: "TimelineProgress" }, - onClick: () => { - setShowTimeline(!showTimeline); - } - }, - { - key: "options", text: nlsHPCC.Options, iconProps: { iconName: "Settings" }, - onClick: () => { - setOptions({ ...options, layout: dockpanel.layout() }); - setShowMetricOptions(true); - } - } - ], [dockpanel, hotspots, onHotspot, options, refresh, setOptions, showTimeline]); - - const formatColumns = React.useMemo((): Utility.ColumnMap => { - const copyColumns: Utility.ColumnMap = {}; - for (const key in columns) { - copyColumns[key] = { - field: key, - label: key - }; - } - return copyColumns; - }, [columns]); - - const rightButtons = React.useMemo((): ICommandBarItemProps[] => [ - { - key: "copy", text: nlsHPCC.CopyToClipboard, disabled: !metrics.length || !navigator?.clipboard?.writeText, iconOnly: true, iconProps: { iconName: "Copy" }, - onClick: () => { - const tsv = Utility.formatAsDelim(formatColumns, metrics, "\t"); - navigator?.clipboard?.writeText(tsv); - } - }, - { - key: "download", text: nlsHPCC.DownloadToCSV, disabled: !metrics.length, iconOnly: true, iconProps: { iconName: "Download" }, - onClick: () => { - const csv = Utility.formatAsDelim(formatColumns, metrics, ","); - Utility.downloadText(csv, `metrics-${wuid}.csv`); - } - }, { - key: "fullscreen", title: nlsHPCC.MaximizeRestore, iconProps: { iconName: fullscreen ? "ChromeRestore" : "FullScreen" }, - onClick: () => setFullscreen(!fullscreen) - } - ], [formatColumns, fullscreen, metrics, wuid]); - // Timeline --- const timeline = useConst(() => new WUTimelinePatched() .maxZoom(Number.MAX_SAFE_INTEGER) @@ -548,6 +491,79 @@ export const Metrics: React.FunctionComponent = ({ saveOptions(); }, [options, saveOptions, setOptions]); + // Command Bar --- + const buttons = React.useMemo((): ICommandBarItemProps[] => [ + { + key: "refresh", text: nlsHPCC.Refresh, iconProps: { iconName: "Refresh" }, + onClick: () => refresh() + }, + { + key: "hotspot", text: nlsHPCC.Hotspots, iconProps: { iconName: "SpeedHigh" }, + disabled: !hotspots, onClick: () => onHotspot() + }, + { key: "divider_1", itemType: ContextualMenuItemType.Divider, onRender: () => }, + { + key: "timeline", text: nlsHPCC.Timeline, canCheck: true, checked: showTimeline, iconProps: { iconName: "TimelineProgress" }, + onClick: () => { + setShowTimeline(!showTimeline); + } + }, + { + key: "options", text: nlsHPCC.Options, iconProps: { iconName: "Settings" }, + onClick: () => { + setOptions({ ...options, layout: dockpanel.layout() }); + setShowMetricOptions(true); + } + } + ], [dockpanel, hotspots, onHotspot, options, refresh, setOptions, showTimeline]); + + const formatColumns = React.useMemo((): Utility.ColumnMap => { + const copyColumns: Utility.ColumnMap = {}; + for (const key in columns) { + copyColumns[key] = { + field: key, + label: key + }; + } + return copyColumns; + }, [columns]); + + const rightButtons = React.useMemo((): ICommandBarItemProps[] => [ + { + key: "copy", text: nlsHPCC.CopyToClipboard, disabled: !metrics.length || !navigator?.clipboard?.writeText, iconOnly: true, iconProps: { iconName: "Copy" }, + onClick: () => { + const tsv = Utility.formatAsDelim(formatColumns, metrics, "\t"); + navigator?.clipboard?.writeText(tsv); + } + }, + { + key: "download", text: nlsHPCC.DownloadToCSV, disabled: !metrics.length, iconOnly: true, iconProps: { iconName: "Download" }, + subMenuProps: { + items: [{ + key: "downloadCSV", + text: nlsHPCC.DownloadToCSV, + iconProps: { iconName: "Table" }, + onClick: () => { + const csv = Utility.formatAsDelim(formatColumns, metrics, ","); + Utility.downloadText(csv, `metrics-${wuid}.csv`); + } + }, + { + key: "downloadDOT", + text: nlsHPCC.DownloadToDOT, + iconProps: { iconName: "Relationship" }, + onClick: () => { + const dot = metricGraph.graphTpl(selectedMetrics, options); + Utility.downloadText(dot, `metrics-${wuid}.dot`); + } + }] + } + }, { + key: "fullscreen", title: nlsHPCC.MaximizeRestore, iconProps: { iconName: fullscreen ? "ChromeRestore" : "FullScreen" }, + onClick: () => setFullscreen(!fullscreen) + } + ], [formatColumns, fullscreen, metricGraph, metrics, options, selectedMetrics, wuid]); + return diff --git a/esp/src/src-react/util/metricGraph.ts b/esp/src/src-react/util/metricGraph.ts index fb6bbae9cc6..de0cd30df87 100644 --- a/esp/src/src-react/util/metricGraph.ts +++ b/esp/src/src-react/util/metricGraph.ts @@ -177,7 +177,7 @@ export class MetricGraph extends Graph2 { }); data.forEach((scope: IScope) => { - if (scope.type === "edge") { + if (scope.type === "edge" && scope.IdSource !== undefined && scope.IdTarget !== undefined) { if (!this.vertexExists(this._activityIndex[(scope as IScopeEdge).IdSource])) logger.warning(`Missing vertex: ${(scope as IScopeEdge).IdSource}`); else if (!this.vertexExists(this._activityIndex[(scope as IScopeEdge).IdTarget])) { diff --git a/esp/src/src/nls/hpcc.ts b/esp/src/src/nls/hpcc.ts index d244520eb63..1fe2ef88fde 100644 --- a/esp/src/src/nls/hpcc.ts +++ b/esp/src/src/nls/hpcc.ts @@ -254,6 +254,7 @@ export = { Downloads: "Downloads", DownloadToCSV: "Download to CSV", DownloadToCSVNonFlatWarning: "Please note: downloading files containing nested datasets as comma-separated data may not be formatted as expected", + DownloadToDOT: "Download to DOT", DownloadSelectionAsCSV: "Download selection as CSV", DropZone: "Drop Zone", DueToInctivity: "You will be logged out of all ECL Watch sessions in 3 minutes due to inactivity.", From e0da31b46cf3df192912e6514d7aa67d6545d010 Mon Sep 17 00:00:00 2001 From: wangkx Date: Mon, 6 Nov 2023 13:40:27 -0500 Subject: [PATCH 5/9] HPCC-30184 Expose WU "Process" meta info in WsWorkunits.WUInfo Revise based on review: 1. rename the getAllProcesses() to getProcessTypes(). 2. revise the code for querying process data from IPropertyTree. 3. set default values from ecm file. Signed-off-by: wangkx --- common/workunit/workunit.cpp | 9 ++++ common/workunit/workunit.hpp | 1 + common/workunit/workunit.ipp | 1 + esp/scm/ws_workunits.ecm | 2 +- esp/scm/ws_workunits_req_resp.ecm | 1 + esp/scm/ws_workunits_struct.ecm | 13 ++++++ .../ws_workunits/ws_workunitsHelpers.cpp | 46 +++++++++++++++++++ .../ws_workunits/ws_workunitsHelpers.hpp | 2 + .../ws_workunits/ws_workunitsService.cpp | 2 + 9 files changed, 76 insertions(+), 1 deletion(-) diff --git a/common/workunit/workunit.cpp b/common/workunit/workunit.cpp index 64f569e5284..0675947fb51 100644 --- a/common/workunit/workunit.cpp +++ b/common/workunit/workunit.cpp @@ -4423,6 +4423,8 @@ class CLockedWorkUnit : implements ILocalWorkUnit, implements IExtendedWUInterfa { return c->getHash(); } virtual IStringIterator *getLogs(const char *type, const char *instance) const { return c->getLogs(type, instance); } + virtual IPropertyTreeIterator *getProcessTypes() const + { return c->getProcessTypes(); } virtual IStringIterator *getProcesses(const char *type) const { return c->getProcesses(type); } virtual IPropertyTreeIterator* getProcesses(const char *type, const char *instance) const @@ -8616,6 +8618,13 @@ IStringIterator *CLocalWorkUnit::getLogs(const char *type, const char *instance) return new CStringPTreeAttrIterator(p->getElements(xpath.str()), "@log"); } +IPropertyTreeIterator* CLocalWorkUnit::getProcessTypes() const +{ + VStringBuffer xpath("Process/*"); + CriticalBlock block(crit); + return p->getElements(xpath.str()); +} + IPropertyTreeIterator* CLocalWorkUnit::getProcesses(const char *type, const char *instance) const { VStringBuffer xpath("Process/%s/", type); diff --git a/common/workunit/workunit.hpp b/common/workunit/workunit.hpp index 60c06b5a56d..02543eb5eba 100644 --- a/common/workunit/workunit.hpp +++ b/common/workunit/workunit.hpp @@ -1292,6 +1292,7 @@ interface IConstWorkUnit : extends IConstWorkUnitInfo virtual const IPropertyTree * getXmlParams() const = 0; virtual unsigned __int64 getHash() const = 0; virtual IStringIterator *getLogs(const char *type, const char *instance=NULL) const = 0; + virtual IPropertyTreeIterator *getProcessTypes() const = 0; virtual IStringIterator *getProcesses(const char *type) const = 0; virtual IPropertyTreeIterator* getProcesses(const char *type, const char *instance) const = 0; diff --git a/common/workunit/workunit.ipp b/common/workunit/workunit.ipp index 4f166ea67fb..d2d7156258d 100644 --- a/common/workunit/workunit.ipp +++ b/common/workunit/workunit.ipp @@ -267,6 +267,7 @@ public: virtual const IPropertyTree *getXmlParams() const; virtual unsigned __int64 getHash() const; virtual IStringIterator *getLogs(const char *type, const char *component) const; + virtual IPropertyTreeIterator *getProcessTypes() const; virtual IStringIterator *getProcesses(const char *type) const; virtual IPropertyTreeIterator* getProcesses(const char *type, const char *instance) const; virtual IStringVal & getSnapshot(IStringVal & str) const; diff --git a/esp/scm/ws_workunits.ecm b/esp/scm/ws_workunits.ecm index 03064b548f5..25edec054ed 100644 --- a/esp/scm/ws_workunits.ecm +++ b/esp/scm/ws_workunits.ecm @@ -25,7 +25,7 @@ EspInclude(ws_workunits_queryset_req_resp); ESPservice [ auth_feature("DEFERRED"), //This declares that the method logic handles feature level authorization - version("1.97"), default_client_version("1.97"), cache_group("ESPWsWUs"), + version("1.98"), default_client_version("1.98"), cache_group("ESPWsWUs"), noforms,exceptions_inline("./smc_xslt/exceptions.xslt"),use_method_name] WsWorkunits { ESPmethod [cache_seconds(60), resp_xsl_default("/esp/xslt/workunits.xslt")] WUQuery(WUQueryRequest, WUQueryResponse); diff --git a/esp/scm/ws_workunits_req_resp.ecm b/esp/scm/ws_workunits_req_resp.ecm index 0b8bbf28c8a..522dbc32f4d 100644 --- a/esp/scm/ws_workunits_req_resp.ecm +++ b/esp/scm/ws_workunits_req_resp.ecm @@ -357,6 +357,7 @@ ESPrequest WUInfoRequest [min_ver("1.66")] bool IncludeAllowedClusters(true); [min_ver("1.73")] bool IncludeTotalClusterTime(true); [min_ver("1.78")] bool IncludeServiceNames(false); + [min_ver("1.98")] bool IncludeProcesses(false); [min_ver("1.16")] bool SuppressResultSchemas(false); [min_ver("1.25")] string ThorSlaveIP; }; diff --git a/esp/scm/ws_workunits_struct.ecm b/esp/scm/ws_workunits_struct.ecm index fcad4bf90df..cf628ce31bc 100644 --- a/esp/scm/ws_workunits_struct.ecm +++ b/esp/scm/ws_workunits_struct.ecm @@ -339,6 +339,18 @@ ESPStruct [nil_remove] ThorLogInfo int NumberSlaves; }; +ESPStruct [nil_remove] ECLWUProcess +{ + string Name; + string Type; + string PodName; //containerized only + int InstanceNumber(1); //containerized only + string Log; //bare metal only + string PID; //bare metal only + string Pattern; //bare metal only + int Max(1); //bare metal only +}; + ESPStruct [nil_remove] ECLWorkunitLW { string Wuid; @@ -442,6 +454,7 @@ ESPStruct [nil_remove] ECLWorkunit [min_ver("1.85")] double FileAccessCost; [min_ver("1.87")] double CompileCost; [min_ver("1.91")] bool NoAccess(false); + [min_ver("1.98")] ESParray ECLWUProcessList; }; ESPStruct [nil_remove] WUECLAttribute diff --git a/esp/services/ws_workunits/ws_workunitsHelpers.cpp b/esp/services/ws_workunits/ws_workunitsHelpers.cpp index a4e91a67aa6..c0c3061432e 100644 --- a/esp/services/ws_workunits/ws_workunitsHelpers.cpp +++ b/esp/services/ws_workunits/ws_workunitsHelpers.cpp @@ -1210,6 +1210,50 @@ void WsWuInfo::getServiceNames(IEspECLWorkunit &info, unsigned long flags) info.setServiceNames(serviceNames); } +void WsWuInfo::getECLWUProcesses(IEspECLWorkunit &info, unsigned long flags) +{ + if (!(flags & WUINFO_IncludeProcesses)) + return; + + IArrayOf processList; + Owned processGroupItr = cw->getProcessTypes(); + ForEach(*processGroupItr) + { + IPropertyTree &processGroup = processGroupItr->query(); + const char *type = processGroup.queryName(); + + Owned processItr = processGroup.getElements("*"); + ForEach(*processItr) + { + IPropertyTree &process = processItr->query(); + Owned p = createECLWUProcess(); + p->setName(process.queryName()); + p->setType(type); + const char *podName = process.queryProp("@podName"); + if (!isEmptyString(podName)) + p->setPodName(podName); + unsigned instanceNum = process.getPropInt("@instanceNum", NotFound); + if (NotFound != instanceNum) + p->setInstanceNumber(instanceNum); + const char *pid = process.queryProp("@pid"); + if (!isEmptyString(pid)) + p->setPID(pid); + const char *log = process.queryProp("@log"); + if (!isEmptyString(log)) + p->setLog(log); + unsigned max = process.getPropInt("@max", NotFound); + if (NotFound != max) + p->setMax(max); + const char *pattern = process.queryProp("@pattern"); + if (!isEmptyString(pattern)) + p->setPattern(pattern); + + processList.append(*p.getClear()); + } + } + info.setECLWUProcessList(processList); +} + void WsWuInfo::getEventScheduleFlag(IEspECLWorkunit &info) { info.setEventSchedule(0); @@ -1371,6 +1415,8 @@ void WsWuInfo::getInfo(IEspECLWorkunit &info, unsigned long flags) getApplicationValues(info, flags); getWorkflow(info, flags); getServiceNames(info, flags); + if (version>=1.98) + getECLWUProcesses(info, flags); } #ifndef _CONTAINERIZED diff --git a/esp/services/ws_workunits/ws_workunitsHelpers.hpp b/esp/services/ws_workunits/ws_workunitsHelpers.hpp index 5305b0dec1a..16eca77c5b7 100644 --- a/esp/services/ws_workunits/ws_workunitsHelpers.hpp +++ b/esp/services/ws_workunits/ws_workunitsHelpers.hpp @@ -142,6 +142,7 @@ struct WsWUExceptions #define WUINFO_IncludeAllowedClusters 0x10000 #define WUINFO_IncludeTotalClusterTime 0x20000 #define WUINFO_IncludeServiceNames 0x40000 +#define WUINFO_IncludeProcesses 0x80000 #define WUINFO_All 0xFFFFFFFF static constexpr unsigned defaultMaxLogRecords = 10000; @@ -470,6 +471,7 @@ class WsWuInfo void getResult(IConstWUResult &r, IArrayOf& results, unsigned long flags); void getStats(const WuScopeFilter & filter, const StatisticsFilter& statsFilter, bool createDescriptions, IArrayOf& statistics); void getServiceNames(IEspECLWorkunit &info, unsigned long flags); + void getECLWUProcesses(IEspECLWorkunit &info, unsigned long flags); #ifndef _CONTAINERIZED void getWUProcessLogSpecs(const char* processName, const char* logSpec, const char* logDir, bool eclAgent, StringArray& logSpecs); diff --git a/esp/services/ws_workunits/ws_workunitsService.cpp b/esp/services/ws_workunits/ws_workunitsService.cpp index efd3b32d4e4..b912dc0caa9 100644 --- a/esp/services/ws_workunits/ws_workunitsService.cpp +++ b/esp/services/ws_workunits/ws_workunitsService.cpp @@ -1615,6 +1615,8 @@ bool CWsWorkunitsEx::onWUInfo(IEspContext &context, IEspWUInfoRequest &req, IEsp flags|=WUINFO_IncludeTotalClusterTime; if (req.getIncludeServiceNames()) flags|=WUINFO_IncludeServiceNames; + if (req.getIncludeProcesses()) + flags|=WUINFO_IncludeProcesses; PROGLOG("WUInfo: %s %lx", wuid.str(), flags); From b8d0e497129c5a9dec537af83e08302618ec7758 Mon Sep 17 00:00:00 2001 From: Rodrigo Pastrana Date: Sun, 22 Oct 2023 22:48:31 -0400 Subject: [PATCH 6/9] HPCC-30401 JTrace optionally suppress trace/span IDs - Copies global.tracing to component helm values - Revises logic to create otel span -- Span created if per-span flag set -- or global/component config flag set -- or serverSpan receives valid remote parent - Adds cppunit (cannot test scenario where no span created yet) Signed-off-by: Rodrigo Pastrana --- helm/hpcc/templates/_helpers.tpl | 15 +++- helm/hpcc/templates/dafilesrv.yaml | 3 +- helm/hpcc/templates/dali.yaml | 3 +- helm/hpcc/templates/dfuserver.yaml | 3 +- helm/hpcc/templates/eclagent.yaml | 6 +- helm/hpcc/templates/eclccserver.yaml | 3 +- helm/hpcc/templates/eclscheduler.yaml | 3 +- helm/hpcc/templates/esp.yaml | 3 +- helm/hpcc/templates/localroxie.yaml | 3 +- helm/hpcc/templates/roxie.yaml | 6 +- helm/hpcc/templates/thor.yaml | 6 +- helm/hpcc/values.schema.json | 4 ++ helm/hpcc/values.yaml | 1 + system/jlib/jtrace.cpp | 100 +++++++++++++++----------- system/jlib/jtrace.hpp | 2 + testing/unittests/jlibtests.cpp | 16 +++++ 16 files changed, 121 insertions(+), 56 deletions(-) diff --git a/helm/hpcc/templates/_helpers.tpl b/helm/hpcc/templates/_helpers.tpl index 74ae4379e0a..abbb2f0014a 100644 --- a/helm/hpcc/templates/_helpers.tpl +++ b/helm/hpcc/templates/_helpers.tpl @@ -288,6 +288,18 @@ logging: {{- end -}} {{- end -}} +{{/* +Generate local tracing info, merged with global +Pass in dict with root and me +*/}} +{{- define "hpcc.generateTracingConfig" -}} +{{- $tracing := deepCopy (.me.tracing | default dict) | mergeOverwrite dict (.root.Values.global.tracing | default dict) -}} +{{- if not (empty $tracing) }} +tracing: +{{ toYaml $tracing | indent 2 }} +{{- end -}} +{{- end -}} + {{/* Generate local metrics configuration, merged with global Pass in dict with root and me @@ -1304,8 +1316,9 @@ data: {{ $configMapName }}.yaml: version: 1.0 sasha: -{{ toYaml (omit .me "logging") | indent 6 }} +{{ toYaml (omit .me "logging" "tracing") | indent 6 }} {{- include "hpcc.generateLoggingConfig" . | indent 6 }} +{{- include "hpcc.generateTracingConfig" . | indent 6 }} {{ include "hpcc.generateVaultConfig" . | indent 6 }} {{- if hasKey .me "plane" }} {{- $sashaStoragePlane := .me.plane | default (include "hpcc.getFirstPlaneForCategory" (dict "root" .root "category" "sasha")) }} diff --git a/helm/hpcc/templates/dafilesrv.yaml b/helm/hpcc/templates/dafilesrv.yaml index 6fcf791d2d6..70c0a09ff49 100644 --- a/helm/hpcc/templates/dafilesrv.yaml +++ b/helm/hpcc/templates/dafilesrv.yaml @@ -10,8 +10,9 @@ data: {{ .me.name }}.yaml: version: 1.0 dafilesrv: -{{ toYaml (omit .me "logging" "env") | indent 6 }} +{{ toYaml (omit .me "logging" "tracing" "env") | indent 6 }} {{- include "hpcc.generateLoggingConfig" . | indent 6 }} +{{- include "hpcc.generateTracingConfig" . | indent 6 }} global: {{ include "hpcc.generateGlobalConfigMap" .root | indent 6 }} {{- end -}} diff --git a/helm/hpcc/templates/dali.yaml b/helm/hpcc/templates/dali.yaml index 74861f63b34..a8cdd9ff3ce 100644 --- a/helm/hpcc/templates/dali.yaml +++ b/helm/hpcc/templates/dali.yaml @@ -170,9 +170,10 @@ data: {{ $dali.name }}.yaml: | version: 1.0 dali: -{{ toYaml (omit $dali "logging" "services" "env") | indent 6 }} +{{ toYaml (omit $dali "logging" "tracing" "services" "env") | indent 6 }} dataPath: {{ include "hpcc.getPlanePrefix" (dict "root" $ "planeName" $daliStoragePlane) }} {{- include "hpcc.generateLoggingConfig" $commonCtx | indent 6 }} +{{- include "hpcc.generateTracingConfig" $commonCtx | indent 6 }} {{- include "hpcc.generateMetricsConfig" $commonCtx | indent 6 }} {{ include "hpcc.generateVaultConfig" (dict "root" $ "secretsCategories" $daliSecretsCategories ) | indent 6 }} foreignAccess: {{ include "hpcc.isForeignAccessConfigured" (dict "root" $ "me" .) | default "false" }} diff --git a/helm/hpcc/templates/dfuserver.yaml b/helm/hpcc/templates/dfuserver.yaml index b69c5bb653e..9193b3bdefa 100644 --- a/helm/hpcc/templates/dfuserver.yaml +++ b/helm/hpcc/templates/dfuserver.yaml @@ -29,8 +29,9 @@ data: {{ .me.name }}.yaml: version: 1.0 dfuserver: -{{ toYaml (omit .me "logging" "env") | indent 6 }} +{{ toYaml (omit .me "logging" "tracing" "env") | indent 6 }} {{- include "hpcc.generateLoggingConfig" . | indent 6 }} +{{- include "hpcc.generateTracingConfig" . | indent 6 }} global: {{ include "hpcc.generateGlobalConfigMap" .root | indent 6 }} {{- end -}} diff --git a/helm/hpcc/templates/eclagent.yaml b/helm/hpcc/templates/eclagent.yaml index 4728ce1dce8..69756c7274d 100644 --- a/helm/hpcc/templates/eclagent.yaml +++ b/helm/hpcc/templates/eclagent.yaml @@ -35,11 +35,13 @@ data: {{ .me.name }}.yaml: version: 1.0 eclagent: -{{ toYaml (omit .me "logging" "env") | indent 6 }} +{{ toYaml (omit .me "logging" "tracing" "env") | indent 6 }} {{- include "hpcc.generateLoggingConfig" . | indent 6 }} +{{- include "hpcc.generateTracingConfig" . | indent 6 }} {{ $apptype }}: -{{ toYaml (omit .me "logging" "env") | indent 6 }} +{{ toYaml (omit .me "logging" "tracing" "env") | indent 6 }} {{- include "hpcc.generateLoggingConfig" . | indent 6 }} +{{- include "hpcc.generateTracingConfig" . | indent 6 }} {{ include "hpcc.generateVaultConfig" . | indent 6 }} global: {{ include "hpcc.generateGlobalConfigMap" .root | indent 6 }} diff --git a/helm/hpcc/templates/eclccserver.yaml b/helm/hpcc/templates/eclccserver.yaml index cd8d4bfe04e..1aae0c0b823 100644 --- a/helm/hpcc/templates/eclccserver.yaml +++ b/helm/hpcc/templates/eclccserver.yaml @@ -34,8 +34,9 @@ data: {{ .me.name }}.yaml: version: 1.0 eclccserver: -{{ toYaml (omit .me "logging") | indent 6 }} +{{ toYaml (omit .me "logging" "tracing") | indent 6 }} {{- include "hpcc.generateLoggingConfig" . | indent 6 }} +{{- include "hpcc.generateTracingConfig" . | indent 6 }} queues: {{ include "hpcc.generateConfigMapQueues" .root | indent 6 }} {{ include "hpcc.generateVaultConfig" . | indent 6 }} diff --git a/helm/hpcc/templates/eclscheduler.yaml b/helm/hpcc/templates/eclscheduler.yaml index f1850e1b2d6..646ad76b748 100644 --- a/helm/hpcc/templates/eclscheduler.yaml +++ b/helm/hpcc/templates/eclscheduler.yaml @@ -33,8 +33,9 @@ data: {{ .me.name }}.yaml: version: 1.0 eclscheduler: -{{ toYaml (omit .me "logging" "env") | indent 6 }} +{{ toYaml (omit .me "logging" "tracing" "env") | indent 6 }} {{- include "hpcc.generateLoggingConfig" . | indent 6 }} +{{- include "hpcc.generateTracingConfig" . | indent 6 }} queues: {{ include "hpcc.generateConfigMapQueues" .root | indent 6 }} {{ include "hpcc.generateVaultConfig" . | indent 6 }} diff --git a/helm/hpcc/templates/esp.yaml b/helm/hpcc/templates/esp.yaml index 032e6dc943b..a37ffb29776 100644 --- a/helm/hpcc/templates/esp.yaml +++ b/helm/hpcc/templates/esp.yaml @@ -33,8 +33,9 @@ data: {{ .me.name }}.yaml: version: 1.0 esp: -{{ toYaml (omit .me "logging" "metrics" "env") | indent 6 }} +{{ toYaml (omit .me "logging" "tracing" "metrics" "env") | indent 6 }} {{- include "hpcc.generateLoggingConfig" . | indent 6 }} +{{- include "hpcc.generateTracingConfig" . | indent 6 }} {{- include "hpcc.generateMetricsConfig" . | indent 6 }} {{- if and .root.Values.certificates .root.Values.certificates.enabled }} {{- $externalCert := (ne (include "hpcc.isVisibilityPublic" (dict "root" .root "visibility" .me.service.visibility)) "") -}} diff --git a/helm/hpcc/templates/localroxie.yaml b/helm/hpcc/templates/localroxie.yaml index d1e81b3d325..f512b68f85b 100644 --- a/helm/hpcc/templates/localroxie.yaml +++ b/helm/hpcc/templates/localroxie.yaml @@ -33,8 +33,9 @@ data: {{ .me.name }}.yaml: version: 1.0 roxie: -{{ toYaml (omit .me "logging" "env") | indent 6 }} +{{ toYaml (omit .me "logging" "tracing" "env") | indent 6 }} {{- include "hpcc.generateLoggingConfig" . | indent 6 }} +{{- include "hpcc.generateTracingConfig" . | indent 6 }} {{ include "hpcc.generateVaultConfig" . | indent 6 }} global: {{ include "hpcc.generateGlobalConfigMap" .root | indent 6 }} diff --git a/helm/hpcc/templates/roxie.yaml b/helm/hpcc/templates/roxie.yaml index b89efcc970a..17d1350306b 100644 --- a/helm/hpcc/templates/roxie.yaml +++ b/helm/hpcc/templates/roxie.yaml @@ -44,7 +44,7 @@ data: {{- include "hpcc.addTLSServiceEntries" (dict "root" $root "service" $service "component" $component "visibility" $service.visibility "remoteClients" $service.remoteClients "trustClients" $service.trustClients "includeTrustedPeers" true "incluedRoxieAndEspServices" true) | indent 6 }} {{- end }} {{- end }} -{{ toYaml ( omit .me "logging" "topoServer" "encryptInTransit" "env" "services") | indent 6 }} +{{ toYaml ( omit .me "logging" "tracing" "topoServer" "encryptInTransit" "env" "services") | indent 6 }} numChannels: {{ .numChannels }} topologyServers: "{{ .toponame }}:{{ .topoport }}" heartbeatInterval: {{ .heartbeatInterval }} @@ -60,6 +60,7 @@ data: encryptInTransit: {{ $mtlsEnabled }} {{ end -}} {{- include "hpcc.generateLoggingConfig" (dict "root" .root "me" .me) | indent 6 }} +{{- include "hpcc.generateTracingConfig" (dict "root" .root "me" .me) | indent 6 }} {{ include "hpcc.generateVaultConfig" . | indent 6 }} global: {{ include "hpcc.generateGlobalConfigMap" .root | indent 6 }} @@ -73,8 +74,9 @@ data: {{ .toponame }}.yaml: version: 1.0 toposerver: -{{ toYaml ( omit .toposerver "logging" "env") | indent 6 }} +{{ toYaml ( omit .toposerver "logging" "tracing" "env") | indent 6 }} {{- include "hpcc.generateLoggingConfig" (dict "root" .root "me" .toposerver) | indent 6 }} +{{- include "hpcc.generateTracingConfig" (dict "root" .root "me" .toposerver) | indent 6 }} global: {{ include "hpcc.generateGlobalConfigMap" .root | indent 6 }} {{- end -}} diff --git a/helm/hpcc/templates/thor.yaml b/helm/hpcc/templates/thor.yaml index a87a04a4a1f..f6f01d32891 100644 --- a/helm/hpcc/templates/thor.yaml +++ b/helm/hpcc/templates/thor.yaml @@ -32,7 +32,7 @@ Pass in dict with root and me {{- $thorAgentScope := dict "name" .thorAgentName "replicas" .thorAgentReplicas "maxActive" .me.maxGraphs | merge (pick .me "keepJobs") }} {{- $eclAgentResources := .me.eclAgentResources | default dict -}} {{- $hthorScope := dict "name" $hthorName "jobMemorySectionName" "eclAgentMemory" | merge (pick .me "multiJobLinger" "maxGraphStartupTime") | merge (dict "resources" (deepCopy $eclAgentResources)) }} -{{- $thorScope := omit .me "eclagent" "thoragent" "hthor" "logging" "env" "eclAgentResources" "eclAgentUseChildProcesses" "eclAgentReplicas" "thorAgentReplicas" "eclAgentType" }} +{{- $thorScope := omit .me "eclagent" "thoragent" "hthor" "logging" "tracing" "env" "eclAgentResources" "eclAgentUseChildProcesses" "eclAgentReplicas" "thorAgentReplicas" "eclAgentType" }} {{- $misc := .root.Values.global.misc | default dict }} {{- $postJobCommand := $misc.postJobCommand | default "" }} {{- $eclAgentJobName := printf "%s-job-_HPCC_JOBNAME_" $eclAgentType }} @@ -51,6 +51,7 @@ data: thor: {{ toYaml $thorScope | indent 6 }} {{- include "hpcc.generateLoggingConfig" (dict "root" .root "me" $thorScope) | indent 6 }} +{{- include "hpcc.generateTracingConfig" (dict "root" .root "me" $thorScope) | indent 6 }} {{ include "hpcc.generateVaultConfig" . | indent 6 }} {{ $eclAgentType }}: # hthor or roxie {{ toYaml $hthorScope | indent 6 }} @@ -58,13 +59,16 @@ data: type: "thor" width: {{ mul ($thorScope.numWorkers | default 1) ( $thorScope.channelsPerWorker | default 1) }} {{- include "hpcc.generateLoggingConfig" (dict "root" .root "me" $hthorScope ) | indent 6 }} +{{- include "hpcc.generateTracingConfig" (dict "root" .root "me" $hthorScope ) | indent 6 }} {{ include "hpcc.generateVaultConfig" . | indent 6 }} eclagent: # main agent Q handler {{ toYaml $eclAgentScope | indent 6 }} {{- include "hpcc.generateLoggingConfig" (dict "root" .root "me" $eclAgentScope) | indent 6 }} +{{- include "hpcc.generateTracingConfig" (dict "root" .root "me" $eclAgentScope) | indent 6 }} thoragent: # Thor graph handler {{ toYaml $thorAgentScope | indent 6 }} {{- include "hpcc.generateLoggingConfig" (dict "root" .root "me" $thorAgentScope) | indent 6 }} +{{- include "hpcc.generateTracingConfig" (dict "root" .root "me" $thorAgentScope) | indent 6 }} type: thor global: {{ include "hpcc.generateGlobalConfigMap" .root| indent 6 }} diff --git a/helm/hpcc/values.schema.json b/helm/hpcc/values.schema.json index 3ec37c7dcbd..5691d3b77a2 100644 --- a/helm/hpcc/values.schema.json +++ b/helm/hpcc/values.schema.json @@ -1111,6 +1111,10 @@ "disabled": { "type": "boolean", "description": "If true, disable OTel based trace/span generation" + }, + "alwaysCreateTraceIds": { + "type": "boolean", + "description": "If true, components generate trace/span ids to aid in unit of worktracing" } }, "additionalProperties": { "type": ["integer", "string", "boolean"] } diff --git a/helm/hpcc/values.yaml b/helm/hpcc/values.yaml index 8cf1695b94f..a4bb0a98743 100644 --- a/helm/hpcc/values.yaml +++ b/helm/hpcc/values.yaml @@ -28,6 +28,7 @@ global: # tracing sets the default tracing information for all components. Can be overridden locally tracing: disabled: false + alwaysCreateTraceIds: true ## resource settings for stub components #stubInstanceResources: diff --git a/system/jlib/jtrace.cpp b/system/jlib/jtrace.cpp index ae411aab96f..427714564e7 100644 --- a/system/jlib/jtrace.cpp +++ b/system/jlib/jtrace.cpp @@ -16,7 +16,6 @@ ############################################################################## */ - #include "opentelemetry/trace/semantic_conventions.h" //known span defines #include "opentelemetry/context/propagation/global_propagator.h" // context::propagation::GlobalTextMapPropagator::GetGlobalPropagator #include "opentelemetry/sdk/trace/tracer_provider_factory.h" //opentelemetry::sdk::trace::TracerProviderFactory::Create(context) @@ -171,6 +170,7 @@ class CSpan : public CInterfaceOf return spanID.get(); } + ISpan * createClientSpan(const char * name) override; ISpan * createInternalSpan(const char * name) override; @@ -187,16 +187,15 @@ class CSpan : public CInterfaceOf if (span != nullptr) { - out.append(",\"SpanID\":\"").append(spanID.get()).append("\""); - out.append(",\"TraceID\":\"").append(traceID.get()).append("\""); + out.append(",\"SpanID\":\"").append(spanID.get()).append("\""); + } - if (localParentSpan != nullptr) - { - out.append(",\"ParentSpanID\": \""); - out.append(localParentSpan->getSpanID()); - out.append("\""); - } + if (localParentSpan != nullptr) + { + out.append(",\"ParentSpanID\": \""); + out.append(localParentSpan->getSpanID()); + out.append("\""); } } @@ -219,19 +218,19 @@ class CSpan : public CInterfaceOf if (span != nullptr) { out.append(",\"SpanID\":\"").append(spanID.get()).append("\""); + } - if (isLeaf) - { - out.append(",\"TraceID\":\"").append(traceID.get()).append("\"") - .append(",\"TraceFlags\":\"").append(traceFlags.get()).append("\""); - } - - if (localParentSpan != nullptr) - { - out.append(",\"ParentSpan\":{ "); - localParentSpan->toString(out, false); - out.append(" }"); - } + if (isLeaf) + { + out.append(",\"TraceID\":\"").append(traceID.get()).append("\"") + .append(",\"TraceFlags\":\"").append(traceFlags.get()).append("\""); + } + + if (localParentSpan != nullptr) + { + out.append(",\"ParentSpan\":{ "); + localParentSpan->toString(out, false); + out.append(" }"); } }; @@ -417,7 +416,10 @@ class CSpan : public CInterfaceOf name.set(spanName); localParentSpan = parent; if (localParentSpan != nullptr) + { + injectlocalParentSpan(localParentSpan); tracerName.set(parent->queryTraceName()); + } } CSpan(const char * spanName, const char * nameOfTracer) @@ -427,31 +429,34 @@ class CSpan : public CInterfaceOf tracerName.set(nameOfTracer); } - void init() + void init(SpanFlags flags) { bool createLocalId = !isEmptyString(hpccGlobalId); if (createLocalId) hpccLocalId.set(ln_uid::createUniqueIdString().c_str()); - auto provider = opentelemetry::trace::Provider::GetTracerProvider(); + //we don't always want to create trace/span IDs - //what if tracerName is empty? - auto tracer = provider->GetTracer(tracerName.get()); + if (hasMask(flags, SpanFlags::EnsureTraceId) || //per span flags + queryTraceManager().alwaysCreateTraceIds() || //Global/conponet flags + nostd::get(opts.parent).IsValid()) // valid parent was passed in + { + auto provider = opentelemetry::trace::Provider::GetTracerProvider(); - if (localParentSpan != nullptr) - injectlocalParentSpan(localParentSpan); + //what if tracerName is empty? + auto tracer = provider->GetTracer(tracerName.get()); - span = tracer->StartSpan(name.get(), {}, opts); + span = tracer->StartSpan(name.get(), {}, opts); - if (span != nullptr) - { - storeSpanContext(); + if (span != nullptr) + { + storeSpanContext(); - StringBuffer out; - toLog(out); - LOG(MCmonitorEvent, "Span start: {%s}", out.str()); + StringBuffer out; + toLog(out); + LOG(MCmonitorEvent, "Span start: {%s}", out.str()); + } } - } void storeSpanContext() @@ -471,7 +476,11 @@ class CSpan : public CInterfaceOf auto localParentSpanCtx = localParentSpan->querySpanContext(); if(localParentSpanCtx.IsValid()) + { opts.parent = localParentSpanCtx; + } + + } void storeTraceID() @@ -570,7 +579,7 @@ class CInternalSpan : public CSpan : CSpan(spanName, parent) { opts.kind = opentelemetry::trace::SpanKind::kInternal; - init(); + init(SpanFlags::None); } void toLog(StringBuffer & out) const override @@ -593,7 +602,7 @@ class CClientSpan : public CSpan : CSpan(spanName, parent) { opts.kind = opentelemetry::trace::SpanKind::kClient; - init(); + init(SpanFlags::None); } void toLog(StringBuffer & out) const override @@ -629,11 +638,6 @@ class CServerSpan : public CSpan { if (httpHeaders) { - // perform any key mapping needed... - //Instrumented http client/server Capitalizes the first letter of the header name - //if (key == opentel_trace::propagation::kTraceParent || key == opentel_trace::propagation::kTraceState ) - // theKey[0] = toupper(theKey[0]); - if (httpHeaders->hasProp(kGlobalIdHttpHeaderName)) hpccGlobalId.set(httpHeaders->queryProp(kGlobalIdHttpHeaderName)); else if (httpHeaders->hasProp(kLegacyGlobalIdHttpHeaderName)) @@ -662,6 +666,7 @@ class CServerSpan : public CSpan { remoteParentSpanCtx = remoteParentSpan->GetContext(); opts.parent = remoteParentSpanCtx; + } } } @@ -697,7 +702,7 @@ class CServerSpan : public CSpan { opts.kind = opentelemetry::trace::SpanKind::kServer; setSpanContext(httpHeaders, flags); - init(); + init(flags); setContextAttributes(); } @@ -715,6 +720,7 @@ class CServerSpan : public CSpan .append("\""); } } + void toString(StringBuffer & out, bool isLeaf) const override { out.append("\"Type\":\"Server\""); @@ -747,6 +753,7 @@ class CTraceManager : implements ITraceManager, public CInterface private: bool enabled = true; bool optAlwaysCreateGlobalIds = false; + bool optAlwaysCreateTraceIds = true; StringAttr moduleName; //Initializes the global trace provider which is required for all Otel based tracing operations. @@ -856,6 +863,7 @@ class CTraceManager : implements ITraceManager, public CInterface tracing: #optional - tracing enabled by default disabled: true #optional - disable OTel tracing alwaysCreateGlobalIds : false #optional - should global ids always be created? + alwaysCreateTraceIds #optional - should trace ids always be created? exporter: #optional - Controls how trace data is exported/reported type: OTLP #OS|OTLP|Prometheus|HPCC (default: no export, jlog entry) endpoint: "localhost:4317" #exporter specific key/value pairs @@ -910,6 +918,7 @@ class CTraceManager : implements ITraceManager, public CInterface if (traceConfig) { optAlwaysCreateGlobalIds = traceConfig->getPropBool("@alwaysCreateGlobalIds", optAlwaysCreateGlobalIds); + optAlwaysCreateTraceIds = traceConfig->getPropBool("@alwaysCreateTraceIds", optAlwaysCreateTraceIds); } // The global propagator should be set regardless of whether tracing is enabled or not. @@ -982,6 +991,11 @@ class CTraceManager : implements ITraceManager, public CInterface { return optAlwaysCreateGlobalIds; } + + virtual bool alwaysCreateTraceIds() const + { + return optAlwaysCreateTraceIds; + } }; static Singleton theTraceManager; diff --git a/system/jlib/jtrace.hpp b/system/jlib/jtrace.hpp index 79baf27e6cb..f305fe49829 100644 --- a/system/jlib/jtrace.hpp +++ b/system/jlib/jtrace.hpp @@ -34,6 +34,7 @@ enum class SpanFlags : unsigned { None = 0x000000000, EnsureGlobalId = 0x000000001, + EnsureTraceId = 0x000000010, }; BITMASK_ENUM(SpanFlags); @@ -64,6 +65,7 @@ interface ITraceManager : extends IInterface virtual ISpan * createServerSpan(const char * name, const IProperties * httpHeaders, SpanFlags flags = SpanFlags::None) const = 0; virtual bool isTracingEnabled() const = 0; virtual bool alwaysCreateGlobalIds() const = 0; + virtual bool alwaysCreateTraceIds() const = 0; virtual const char * getTracedComponentName() const = 0; }; diff --git a/testing/unittests/jlibtests.cpp b/testing/unittests/jlibtests.cpp index 6a074989d43..7efdd4611c9 100644 --- a/testing/unittests/jlibtests.cpp +++ b/testing/unittests/jlibtests.cpp @@ -56,6 +56,7 @@ class JlibTraceTest : public CppUnit::TestFixture CPPUNIT_TEST(testMultiNestedSpanTraceOutput); CPPUNIT_TEST(testNullSpan); CPPUNIT_TEST(testClientSpanGlobalID); + CPPUNIT_TEST(testEnsureTraceID); CPPUNIT_TEST_SUITE_END(); const char * simulatedGlobalYaml = R"!!(global: @@ -109,6 +110,21 @@ class JlibTraceTest : public CppUnit::TestFixture initTraceManager("somecomponent", traceConfig, nullptr); } + void testEnsureTraceID() + { + SpanFlags flags = SpanFlags::EnsureTraceId; + Owned emptyMockHTTPHeaders = createProperties(); + Owned serverSpan = queryTraceManager().createServerSpan("noRemoteParentEnsureTraceID", emptyMockHTTPHeaders, flags); + + Owned retrievedSpanCtxAttributes = createProperties(); + bool getSpanCtxSuccess = serverSpan->getSpanContext(retrievedSpanCtxAttributes.get(), false); + + CPPUNIT_ASSERT_EQUAL_MESSAGE("Unexpected getSpanContext failure detected - EnsureTraceID flag was set", true, getSpanCtxSuccess); + + CPPUNIT_ASSERT_EQUAL_MESSAGE("Unexpected empty TraceID detected", false, isEmptyString(retrievedSpanCtxAttributes->queryProp("traceID"))); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Unexpected empty SpanID detected", false, isEmptyString(retrievedSpanCtxAttributes->queryProp("spanID"))); + } + void testIDPropegation() { Owned mockHTTPHeaders = createProperties(); From bbb02b37ea91a7e377d6b838b27e5b65e682af60 Mon Sep 17 00:00:00 2001 From: Gavin Halliday Date: Thu, 16 Nov 2023 11:39:27 +0000 Subject: [PATCH 7/9] HPCC-30866 Remove fork() unsafe code from open-telemetry random number generator Signed-off-by: Gavin Halliday --- .../add-missing-dependencies.patch | 15 ++++ .../add-missing-find-dependency.patch | 13 +++ .../hpcc-remove-unsafe-onfork.patch | 56 ++++++++++++ .../opentelemetry-cpp/portfile.cmake | 75 ++++++++++++++++ .../use-default-cxx-version.patch | 26 ++++++ vcpkg_overlays/opentelemetry-cpp/vcpkg.json | 87 +++++++++++++++++++ 6 files changed, 272 insertions(+) create mode 100644 vcpkg_overlays/opentelemetry-cpp/add-missing-dependencies.patch create mode 100644 vcpkg_overlays/opentelemetry-cpp/add-missing-find-dependency.patch create mode 100644 vcpkg_overlays/opentelemetry-cpp/hpcc-remove-unsafe-onfork.patch create mode 100644 vcpkg_overlays/opentelemetry-cpp/portfile.cmake create mode 100644 vcpkg_overlays/opentelemetry-cpp/use-default-cxx-version.patch create mode 100644 vcpkg_overlays/opentelemetry-cpp/vcpkg.json diff --git a/vcpkg_overlays/opentelemetry-cpp/add-missing-dependencies.patch b/vcpkg_overlays/opentelemetry-cpp/add-missing-dependencies.patch new file mode 100644 index 00000000000..2de8be70c7a --- /dev/null +++ b/vcpkg_overlays/opentelemetry-cpp/add-missing-dependencies.patch @@ -0,0 +1,15 @@ +diff --git a/cmake/opentelemetry-proto.cmake b/cmake/opentelemetry-proto.cmake +index 34b33d3..19e67e9 100644 +--- a/cmake/opentelemetry-proto.cmake ++++ b/cmake/opentelemetry-proto.cmake +@@ -311,6 +311,10 @@ if(WITH_OTLP_GRPC) + endif() + endif() + ++if(TARGET gRPC::grpc++) ++ target_link_libraries(opentelemetry_proto PUBLIC gRPC::grpc++) ++endif() ++ + if(BUILD_SHARED_LIBS) + foreach(proto_target ${OPENTELEMETRY_PROTO_TARGETS}) + set_property(TARGET ${proto_target} PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/vcpkg_overlays/opentelemetry-cpp/add-missing-find-dependency.patch b/vcpkg_overlays/opentelemetry-cpp/add-missing-find-dependency.patch new file mode 100644 index 00000000000..1f9c12d1636 --- /dev/null +++ b/vcpkg_overlays/opentelemetry-cpp/add-missing-find-dependency.patch @@ -0,0 +1,13 @@ +diff --git a/cmake/opentelemetry-cpp-config.cmake.in b/cmake/opentelemetry-cpp-config.cmake.in +index adae58d..2642772 100644 +--- a/cmake/opentelemetry-cpp-config.cmake.in ++++ b/cmake/opentelemetry-cpp-config.cmake.in +@@ -69,6 +69,8 @@ set(OPENTELEMETRY_VERSION + # ############################################################################## + + find_package(Threads) ++include(CMakeFindDependencyMacro) ++find_dependency(absl) + + set_and_check(OPENTELEMETRY_CPP_INCLUDE_DIRS "@PACKAGE_INCLUDE_INSTALL_DIR@") + set_and_check(OPENTELEMETRY_CPP_LIBRARY_DIRS "@PACKAGE_CMAKE_INSTALL_LIBDIR@") diff --git a/vcpkg_overlays/opentelemetry-cpp/hpcc-remove-unsafe-onfork.patch b/vcpkg_overlays/opentelemetry-cpp/hpcc-remove-unsafe-onfork.patch new file mode 100644 index 00000000000..b9a0a16725a --- /dev/null +++ b/vcpkg_overlays/opentelemetry-cpp/hpcc-remove-unsafe-onfork.patch @@ -0,0 +1,56 @@ +diff --git a/sdk/src/common/random.cc b/sdk/src/common/random.cc +index 77b88cfa..14e94d0c 100644 +--- a/sdk/src/common/random.cc ++++ b/sdk/src/common/random.cc +@@ -13,11 +13,10 @@ namespace sdk + { + namespace common + { +-// Wraps a thread_local random number generator, but adds a fork handler so that +-// the generator will be correctly seeded after forking. +-// +-// See https://stackoverflow.com/q/51882689/4447365 and +-// https://github.com/opentracing-contrib/nginx-opentracing/issues/52 ++// Wraps a thread_local random number generator. ++// The previous fork handler is removed because it was not safe and was solving a problem that did ++// not need to be solved since there should be no logic in the child() before it calls exec(). ++ + namespace + { + class TlsRandomNumberGenerator +@@ -26,17 +25,14 @@ class TlsRandomNumberGenerator + TlsRandomNumberGenerator() noexcept + { + Seed(); +- platform::AtFork(nullptr, nullptr, OnFork); + } + +- static FastRandomNumberGenerator &engine() noexcept { return engine_; } ++ FastRandomNumberGenerator & engine() noexcept { return engine_; } + + private: +- static thread_local FastRandomNumberGenerator engine_; +- +- static void OnFork() noexcept { Seed(); } ++ FastRandomNumberGenerator engine_; + +- static void Seed() noexcept ++ void Seed() noexcept + { + std::random_device random_device; + std::seed_seq seed_seq{random_device(), random_device(), random_device(), random_device()}; +@@ -44,13 +40,12 @@ class TlsRandomNumberGenerator + } + }; + +-thread_local FastRandomNumberGenerator TlsRandomNumberGenerator::engine_{}; + } // namespace + + FastRandomNumberGenerator &Random::GetRandomNumberGenerator() noexcept + { + static thread_local TlsRandomNumberGenerator random_number_generator{}; +- return TlsRandomNumberGenerator::engine(); ++ return random_number_generator.engine(); + } + + uint64_t Random::GenerateRandom64() noexcept diff --git a/vcpkg_overlays/opentelemetry-cpp/portfile.cmake b/vcpkg_overlays/opentelemetry-cpp/portfile.cmake new file mode 100644 index 00000000000..facb6883226 --- /dev/null +++ b/vcpkg_overlays/opentelemetry-cpp/portfile.cmake @@ -0,0 +1,75 @@ +if(VCPKG_TARGET_IS_WINDOWS) + vcpkg_check_linkage(ONLY_STATIC_LIBRARY) +endif() + +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO open-telemetry/opentelemetry-cpp + REF "v${VERSION}" + SHA512 86cf0320f9ee50bc1aa2b7a8b254fb0df25d1bd1f5f01ebc3630ab7fe2f6ca5e53ca8e042518b4e7096dbb102c0b880e9a25fcdf5f668d24ff57d9247237bf62 + HEAD_REF main + PATCHES + # Use the compiler's default C++ version. Picking a version with + # CMAKE_CXX_STANDARD is not needed as the Abseil port already picked + # one and propagates that version across all its downstream deps. + use-default-cxx-version.patch + # When compiling code generated by gRPC we need to link the gRPC library + # too. + add-missing-dependencies.patch + # Missing find_dependency for Abseil + add-missing-find-dependency.patch + # HPCC-fix: Remove code that reinitialised the random number generator on fork() + hpcc-remove-unsafe-onfork.patch +) + +vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS + FEATURES + etw WITH_ETW + zipkin WITH_ZIPKIN + prometheus WITH_PROMETHEUS + elasticsearch WITH_ELASTICSEARCH + jaeger WITH_JAEGER + otlp WITH_OTLP + otlp-http WITH_OTLP_HTTP + zpages WITH_ZPAGES + otlp-grpc WITH_OTLP_GRPC +) + +# opentelemetry-proto is a third party submodule and opentelemetry-cpp release did not pack it. +if(WITH_OTLP) + set(OTEL_PROTO_VERSION "0.19.0") + vcpkg_download_distfile(ARCHIVE + URLS "https://github.com/open-telemetry/opentelemetry-proto/archive/v${OTEL_PROTO_VERSION}.tar.gz" + FILENAME "opentelemetry-proto-${OTEL_PROTO_VERSION}.tar.gz" + SHA512 b6d47aaa90ff934eb24047757d5fdb8a5be62963a49b632460511155f09a725937fb7535cf34f738b81cc799600adbbc3809442aba584d760891c0a1f0ce8c03 + ) + + vcpkg_extract_source_archive(src ARCHIVE "${ARCHIVE}") + file(REMOVE_RECURSE "${SOURCE_PATH}/third_party/opentelemetry-proto") + file(COPY "${src}/." DESTINATION "${SOURCE_PATH}/third_party/opentelemetry-proto") + # Create empty .git directory to prevent opentelemetry from cloning it during build time + file(MAKE_DIRECTORY "${SOURCE_PATH}/third_party/opentelemetry-proto/.git") + list(APPEND FEATURE_OPTIONS -DCMAKE_CXX_STANDARD=14) + list(APPEND FEATURE_OPTIONS -DgRPC_CPP_PLUGIN_EXECUTABLE=${CURRENT_HOST_INSTALLED_DIR}/tools/grpc/grpc_cpp_plugin${VCPKG_HOST_EXECUTABLE_SUFFIX}) +endif() + +vcpkg_cmake_configure( + SOURCE_PATH "${SOURCE_PATH}" + OPTIONS + -DBUILD_TESTING=OFF + -DWITH_EXAMPLES=OFF + -DWITH_LOGS_PREVIEW=ON + -DOPENTELEMETRY_INSTALL=ON + -DWITH_ABSEIL=ON + ${FEATURE_OPTIONS} + MAYBE_UNUSED_VARIABLES + WITH_OTLP_GRPC +) + +vcpkg_cmake_install() +vcpkg_cmake_config_fixup(CONFIG_PATH lib/cmake/${PORT}) +vcpkg_copy_pdbs() + +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include") +file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/share") +vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE") diff --git a/vcpkg_overlays/opentelemetry-cpp/use-default-cxx-version.patch b/vcpkg_overlays/opentelemetry-cpp/use-default-cxx-version.patch new file mode 100644 index 00000000000..53a31faf91b --- /dev/null +++ b/vcpkg_overlays/opentelemetry-cpp/use-default-cxx-version.patch @@ -0,0 +1,26 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index f4fa064..a868106 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -126,21 +126,6 @@ endif() + option(OPENTELEMETRY_INSTALL "Whether to install opentelemetry targets" + ${OPENTELEMETRY_INSTALL_default}) + +-if(NOT DEFINED CMAKE_CXX_STANDARD) +- if(WITH_STL) +- # Require at least C++17. C++20 is needed to avoid gsl::span +- if(CMAKE_VERSION VERSION_GREATER 3.11.999) +- # Ask for 20, may get anything below +- set(CMAKE_CXX_STANDARD 20) +- else() +- # Ask for 17, may get anything below +- set(CMAKE_CXX_STANDARD 17) +- endif() +- else() +- set(CMAKE_CXX_STANDARD 11) +- endif() +-endif() +- + if(WITH_STL) + # These definitions are needed for test projects that do not link against + # opentelemetry-api library directly. We ensure that variant implementation diff --git a/vcpkg_overlays/opentelemetry-cpp/vcpkg.json b/vcpkg_overlays/opentelemetry-cpp/vcpkg.json new file mode 100644 index 00000000000..b525fbeb2d5 --- /dev/null +++ b/vcpkg_overlays/opentelemetry-cpp/vcpkg.json @@ -0,0 +1,87 @@ +{ + "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg-tool/main/docs/vcpkg.schema.json", + "name": "opentelemetry-cpp", + "version-semver": "1.9.1", + "description": [ + "OpenTelemetry is a collection of tools, APIs, and SDKs.", + "You use it to instrument, generate, collect, and export telemetry data (metrics, logs, and traces) for analysis in order to understand your software's performance and behavior." + ], + "homepage": "https://github.com/open-telemetry/opentelemetry-cpp", + "license": "Apache-2.0", + "dependencies": [ + "abseil", + "curl", + "nlohmann-json", + { + "name": "vcpkg-cmake", + "host": true + }, + { + "name": "vcpkg-cmake-config", + "host": true + } + ], + "features": { + "elasticsearch": { + "description": "Whether to include the Elasticsearch Client in the SDK" + }, + "etw": { + "description": "Whether to include the ETW Exporter in the SDK", + "supports": "windows" + }, + "jaeger": { + "description": "Whether to include the Jaeger exporter", + "dependencies": [ + "thrift" + ] + }, + "otlp": { + "description": "Whether to include the OpenTelemetry Protocol in the SDK", + "dependencies": [ + "protobuf" + ] + }, + "otlp-grpc": { + "description": "Whether to include the OTLP gRPC exporter in the SDK", + "dependencies": [ + "grpc", + { + "name": "grpc", + "host": true + }, + { + "name": "opentelemetry-cpp", + "default-features": false, + "features": [ + "otlp" + ] + } + ] + }, + "otlp-http": { + "description": "Whether to include the OpenTelemetry Protocol over HTTP in the SDK", + "dependencies": [ + "curl", + { + "name": "opentelemetry-cpp", + "default-features": false, + "features": [ + "otlp" + ] + } + ] + }, + "prometheus": { + "description": "Whether to include the Prometheus Client in the SDK", + "dependencies": [ + "prometheus-cpp" + ] + }, + "zipkin": { + "description": "Whether to include the Zipkin exporter in the SDK" + }, + "zpages": { + "description": "Whether to include the Zpages Server in the SDK" + } + } +} From 9128cfcbb5d1f7a2fcc76d150aec0d1074b72fd9 Mon Sep 17 00:00:00 2001 From: Rodrigo Pastrana Date: Fri, 10 Nov 2023 09:02:34 -0500 Subject: [PATCH 8/9] HPCC-30795 Extends Jtrace exporter and configuration support - Adds Otel dependancy - Exposes otlp-http exporter configuration - Defines jtrace processor and exporter config syntax - Adds support for OTLP HTTP/GRCP support - Adds helm/tracing/readme documentation - Adds sample values block for otlp exporters - Adds ca cert path support Signed-off-by: Rodrigo Pastrana --- helm/examples/tracing/README.md | 96 +++++++++++++++++++ .../tracing/otlp-grcp-collector-default.yaml | 6 ++ .../tracing/otlp-http-collector-default.yaml | 6 ++ helm/hpcc/values.schema.json | 20 ++++ system/jlib/CMakeLists.txt | 2 +- system/jlib/jtrace.cpp | 63 ++++++++---- 6 files changed, 173 insertions(+), 20 deletions(-) create mode 100644 helm/examples/tracing/README.md create mode 100644 helm/examples/tracing/otlp-grcp-collector-default.yaml create mode 100644 helm/examples/tracing/otlp-http-collector-default.yaml diff --git a/helm/examples/tracing/README.md b/helm/examples/tracing/README.md new file mode 100644 index 00000000000..bd1778a4aa5 --- /dev/null +++ b/helm/examples/tracing/README.md @@ -0,0 +1,96 @@ +# HPCC Component Instrumentation and Tracing + +The HPCC Platform is instrumented to optionally track distributed work actions (Traces) and their various sub-task (spans) via Open Telemetry tooling. Traces and spans track information vital information which can be reported to services which specialize in processing and visualization of standardized trace/span information for the purpose or monitoring and observability of the health of individual requests processed by the platform. + +Tracing of HPCC components is enabled by default, and can be configured to fit the needs of the given deployment. + +## Configuration +All configuration options detailed here are part of the HPCC Systems Helm chart, and apply at the global or component level. + +### Tracing cofiguration options +- disabled - (default: false) disables tracking and reporting of internal traces and spans +- alwaysCreateGlobalIds - If true, assign newly created global ID to any requests that do not supply one. +- exporter - Defines The type of exporter in charge of forwarding span data to target back-end + - type - (defalt: NONE) "OTLP-HTTP" | "OTLP-GRCP" | "OS" | "NONE" + - OTLP-HTTP + - endpoint - (default localhost:4318) Specifies the target OTLP-HTTP backend + - timeOutSecs - (default 10secs) + - consoleDebug - (default false) + - OTLP-GRCP + - endpoint: (default localhost:4317) The endpoint to export to. By default the OpenTelemetry Collector's default endpoint. + - useSslCredentials - By default when false, uses grpc::InsecureChannelCredentials; If true uses sslCredentialsCACertPath + - sslCredentialsCACertPath - Path to .pem file to be used for SSL encryption. + - timeOutSeconds - (default 10secs) Timeout for grpc deadline +- processor - Controls span processing style. One by one as available, or in batches. + - type - (default: simple) "simple" | "batch" + +### Sample configuration +Below is a sample helm values block directing the HPCC tracing framework to process span information serially, and export the data over OTLP/HTTP protocol to localhost:4318 and output export debug information to console: + +```console +global: + tracing: + exporter: + type: OTLP-HTTP + consoleDebug: true + processor: + type: simple +``` +### Sample configuration command + +Sample helm command deploying an HPCC chart named myTracedHPCC using the hpcc helm repo and providing a the above tracing configuration. + +```console +helm install myTracedHPCC hpcc/hpcc -f otlp-http-collector-default.yaml +``` +## Tracing information +HPCC tracing information includes data needed to trace requests as they traverse over distributed components, and detailed information pertaining to important request subtasks in the form of span information. Each trace and all its related spans are assigned unique IDs which follow the Open Telemetry standard. + +The start and end of spans are reported to HPCC component logs regardless of any exporter related configuration. + +Sample span reported as log event: +```console +000000A3 MON EVT 2023-10-10 22:12:23.827 24212 25115 Span start: {"Type":"Server","Name":"propagatedServerSpan","GlobalID":"IncomingUGID","CallerID":"IncomingCID","LocalID":"JDbF4xnv7LSWDV4Eug1SpJ","TraceID":"beca49ca8f3138a2842e5cf21402bfff","SpanID":"4b960b3e4647da3f"} + +000000FF MON EVT 2023-10-10 22:12:24.927 24212 25115 Span end: {"Type":"Server","Name":"propagatedServerSpan","GlobalID":"IncomingUGID","CallerID":"IncomingCID","LocalID":"JDbF4xnv7LSWDV4Eug1SpJ","TraceID":"beca49ca8f3138a2842e5cf21402bfff","SpanID":"4b960b3e4647da3f"} +``` + +Each log statement denotes the time of the tracing event (start or stop), the span type, name, trace and span id, and any HPCC specific attribute such as legacy GlobalID (if any), HPCC CallerID (if any), LocalID (if any). + +Spans exported via exporters will contain more detailed information such as explicit start time, duration, and any other attribute assigned to the span by the component instrumentation. + +Sample exported span data: +```json +{ + "Name":"propagatedServerSpan", + "TraceId":"beca49ca8f3138a2842e5cf21402bfff", + "SpanId":"6225221529c24252", + "kind":"Server", + "ParentSpanId":"4b960b3e4647da3f", + "Start":1696983526105561763, + "Duration":1056403, + "Description":"", + "Status":"Unset", + "TraceState":"hpcc=4b960b3e4647da3f", + "Attributes":{ + "hpcc.callerid":"IncomingCID", + "hpcc.globalid":"IncomingUGID" + }, + "Events":{ + }, + "Links":{ + }, + "Resources":{ + "service.name":"unknown_service", + "telemetry.sdk.version":"1.9.1", + "telemetry.sdk.name":"opentelemetry", + "telemetry.sdk.language":"cpp" + }, + "InstrumentedLibrary":"esp" + +} +``` + +## Directory Contents + +- 'otlp-http-collector-default.yaml' - Sample tracing configuration targeting OTLP/HTTP trace collector diff --git a/helm/examples/tracing/otlp-grcp-collector-default.yaml b/helm/examples/tracing/otlp-grcp-collector-default.yaml new file mode 100644 index 00000000000..c6042b206d9 --- /dev/null +++ b/helm/examples/tracing/otlp-grcp-collector-default.yaml @@ -0,0 +1,6 @@ +global: + tracing: + exporter: + type: OTLP-GRCP + endpoint: "localhost:4317" + useSslCredentials: false diff --git a/helm/examples/tracing/otlp-http-collector-default.yaml b/helm/examples/tracing/otlp-http-collector-default.yaml new file mode 100644 index 00000000000..717c0984298 --- /dev/null +++ b/helm/examples/tracing/otlp-http-collector-default.yaml @@ -0,0 +1,6 @@ +global: + tracing: + exporter: + type: OTLP-HTTP + endpoint: "localhost:4318" + consoleDebug: true \ No newline at end of file diff --git a/helm/hpcc/values.schema.json b/helm/hpcc/values.schema.json index 5691d3b77a2..c57bd13ae45 100644 --- a/helm/hpcc/values.schema.json +++ b/helm/hpcc/values.schema.json @@ -1115,6 +1115,26 @@ "alwaysCreateTraceIds": { "type": "boolean", "description": "If true, components generate trace/span ids to aid in unit of worktracing" + }, + "exporter": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["OTLP-HTTP", "OTLP-GRCP", "OS", "NONE"], + "description": "The type of exporter in charge of forwarding span data to target back-end" + } + } + }, + "processor": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["batch", "simple"], + "description": "Defines the manner in which trace data is processed - in batches, or simple as available" + } + } } }, "additionalProperties": { "type": ["integer", "string", "boolean"] } diff --git a/system/jlib/CMakeLists.txt b/system/jlib/CMakeLists.txt index 7d0cf4397c3..0b5a9a0490f 100644 --- a/system/jlib/CMakeLists.txt +++ b/system/jlib/CMakeLists.txt @@ -261,7 +261,7 @@ target_link_libraries ( jlib # opentelemetry-cpp::otlp_grpc_log_record_exporter - Imported target of opentelemetry-cpp::otlp_grpc_log_record_exporter # opentelemetry-cpp::otlp_grpc_metrics_exporter - Imported target of opentelemetry-cpp::otlp_grpc_metrics_exporter # opentelemetry-cpp::otlp_http_client - Imported target of opentelemetry-cpp::otlp_http_client - # opentelemetry-cpp::otlp_http_exporter - Imported target of opentelemetry-cpp::otlp_http_exporter + opentelemetry-cpp::otlp_http_exporter # - Imported target of opentelemetry-cpp::otlp_http_exporter # opentelemetry-cpp::otlp_http_log_record_exporter - Imported target of opentelemetry-cpp::otlp_http_log_record_exporter # opentelemetry-cpp::otlp_http_metric_exporter - Imported target of opentelemetry-cpp::otlp_http_metric_exporter # opentelemetry-cpp::ostream_metrics_exporter - Imported target of opentelemetry-cpp::ostream_metrics_exporter diff --git a/system/jlib/jtrace.cpp b/system/jlib/jtrace.cpp index 427714564e7..3914417ce8e 100644 --- a/system/jlib/jtrace.cpp +++ b/system/jlib/jtrace.cpp @@ -34,6 +34,7 @@ #define ForEach(i) for((i).first();(i).isValid();(i).next()) #include "opentelemetry/exporters/otlp/otlp_grpc_exporter_factory.h" +#include "opentelemetry/exporters/otlp/otlp_http_exporter_factory.h" #include "opentelemetry/exporters/otlp/otlp_http_exporter_options.h" #include "opentelemetry/exporters/memory/in_memory_span_data.h" @@ -782,31 +783,53 @@ class CTraceManager : implements ITraceManager, public CInterface exporter = opentelemetry::exporter::trace::OStreamSpanExporterFactory::Create(); DBGLOG("Tracing to stdout/err..."); } - else if (stricmp(exportType.str(), "OTLP")==0) + else if (stricmp(exportType.str(), "OTLP")==0 || stricmp(exportType.str(), "OTLP-HTTP")==0) + { + opentelemetry::exporter::otlp::OtlpHttpExporterOptions trace_opts; + const char * endPoint = exportConfig->queryProp("@endpoint"); + if (endPoint) + trace_opts.url = endPoint; + + if (exportConfig->hasProp("@timeOutSecs")) //not sure exactly what this value actually affects + trace_opts.timeout = std::chrono::seconds(exportConfig->getPropInt("@timeOutSecs")); + + // Whether to print the status of the exporter in the console + trace_opts.console_debug = exportConfig->getPropBool("@consoleDebug", false); + + exporter = opentelemetry::exporter::otlp::OtlpHttpExporterFactory::Create(trace_opts); + DBGLOG("Exporting traces via OTLP/HTTP to: (%s)", trace_opts.url.c_str()); + } + else if (stricmp(exportType.str(), "OTLP-GRPC")==0) { namespace otlp = opentelemetry::exporter::otlp; otlp::OtlpGrpcExporterOptions opts; - StringBuffer endPoint; - exportConfig->getProp("@endpoint", endPoint); - opts.endpoint = endPoint.str(); + + const char * endPoint = exportConfig->queryProp("@endpoint"); + if (endPoint) + opts.endpoint = endPoint; opts.use_ssl_credentials = exportConfig->getPropBool("@useSslCredentials", false); if (opts.use_ssl_credentials) { - StringBuffer sslCACert; - exportConfig->getProp("@sslCredentialsCACcert", sslCACert); - opts.ssl_credentials_cacert_as_string = sslCACert.str(); + StringBuffer sslCACertPath; + exportConfig->getProp("@sslCredentialsCACertPath", sslCACertPath); + opts.ssl_credentials_cacert_path = sslCACertPath.str(); } + if (exportConfig->hasProp("@timeOutSecs")) //grpc deadline timeout in seconds + opts.timeout = std::chrono::seconds(exportConfig->getPropInt("@timeOutSecs")); + exporter = otlp::OtlpGrpcExporterFactory::Create(opts); - DBGLOG("Tracing to OTLP (%s)", endPoint.str()); + DBGLOG("Exporting traces via OTLP/GRPC to: (%s)", opts.endpoint.c_str()); } else if (stricmp(exportType.str(), "Prometheus")==0) DBGLOG("Tracing to Prometheus currently not supported"); - else if (stricmp(exportType.str(), "HPCC")==0) - DBGLOG("Tracing to HPCC JLog currently not supported"); + else if (stricmp(exportType.str(), "NONE")==0) + DBGLOG("Tracing exporter set to 'NONE', no trace exporting will be performed"); + else + DBGLOG("Tracing exporter type not supported: '%s', no trace exporting will be performed", exportType.str()); } else DBGLOG("Tracing exporter type not specified"); @@ -882,22 +905,24 @@ class CTraceManager : implements ITraceManager, public CInterface { const char * simulatedGlobalYaml = R"!!(global: tracing: - disable: true + disabled: false exporter: - type: OTLP - endpoint: "localhost:4317" - useSslCredentials: true - sslCredentialsCACcert: "ssl-certificate" + type: OTLP-HTTP + timeOutSecs: 15 + consoleDebug: true processor: - type: batch + type: simple )!!"; testTree.setown(createPTreeFromYAMLString(simulatedGlobalYaml, ipt_none, ptr_ignoreWhiteSpace, nullptr)); traceConfig = testTree->queryPropTree("global/tracing"); } + if (traceConfig) + { + StringBuffer xml; + toXML(traceConfig, xml); + DBGLOG("traceConfig tree: %s", xml.str()); + } - StringBuffer xml; - toXML(traceConfig, xml); - DBGLOG("traceConfig tree: %s", xml.str()); #endif bool disableTracing = traceConfig && traceConfig->getPropBool("@disabled", false); From 9a644365a90d3cb9c9a826c6a159141892351a22 Mon Sep 17 00:00:00 2001 From: Rodrigo Pastrana Date: Tue, 14 Nov 2023 23:01:32 -0500 Subject: [PATCH 9/9] HPCC-30399 Do not trace all ESP requests - Bypasses non-transaction http requests - Adds startSpan function - Adds annotateSpan function - Annotates HTTP GET/POST - Initializes m_activespan as nullspan - Removes no longer necessary span != nullptr - Uses standard http method attribute name Signed-off-by: Rodrigo Pastrana --- esp/bindings/http/platform/httpbinding.cpp | 3 +++ esp/bindings/http/platform/httpservice.cpp | 3 +++ esp/bindings/http/platform/httptransport.cpp | 17 ++++++++++++----- esp/bindings/http/platform/httptransport.ipp | 2 ++ esp/platform/espcontext.cpp | 11 ++--------- 5 files changed, 22 insertions(+), 14 deletions(-) diff --git a/esp/bindings/http/platform/httpbinding.cpp b/esp/bindings/http/platform/httpbinding.cpp index 0cbcaffcc09..cabdfd0630d 100644 --- a/esp/bindings/http/platform/httpbinding.cpp +++ b/esp/bindings/http/platform/httpbinding.cpp @@ -1124,6 +1124,8 @@ void EspHttpBinding::handleHttpPost(CHttpRequest *request, CHttpResponse *respon IEspCache *cacheClient = nullptr; IEspContext &context = *request->queryContext(); + request->annotateSpan("http.request.method", "POST"); + IEspContainer *espContainer = getESPContainer(); if (espContainer->hasCacheClient() && (cacheMethods > 0) && queryCacheSeconds(request->queryServiceMethod(), cacheSeconds)) //ESP cache is needed for this method @@ -1190,6 +1192,7 @@ int EspHttpBinding::onGet(CHttpRequest* request, CHttpResponse* response) { IEspContext& context = *request->queryContext(); + request->annotateSpan("http.request.method", "GET"); // At this time, the request is already received and fully passed, and // the user authenticated LogLevel level = getEspLogLevel(&context); diff --git a/esp/bindings/http/platform/httpservice.cpp b/esp/bindings/http/platform/httpservice.cpp index 904266907fa..e6182d9bd6a 100644 --- a/esp/bindings/http/platform/httpservice.cpp +++ b/esp/bindings/http/platform/httpservice.cpp @@ -386,6 +386,9 @@ int CEspHttpServer::processRequest() return 0; } + //Avoids unrestrictedSSType requests + m_request->startSpan(); + if(stricmp(method.str(), POST_METHOD)==0) thebinding->handleHttpPost(m_request.get(), m_response.get()); else if(!stricmp(method.str(), GET_METHOD)) diff --git a/esp/bindings/http/platform/httptransport.cpp b/esp/bindings/http/platform/httptransport.cpp index 42e112d284c..91c2af7adf0 100644 --- a/esp/bindings/http/platform/httptransport.cpp +++ b/esp/bindings/http/platform/httptransport.cpp @@ -1915,6 +1915,18 @@ int CHttpRequest::receive(IMultiException *me) return 0; } +void CHttpRequest::startSpan() +{ + //MORE: The previous code would be better off querying httpHeaders... + Owned httpHeaders = getHeadersAsProperties(m_headers); + Owned requestSpan = queryTraceManager().createServerSpan("HTTPRequest", httpHeaders, SpanFlags::EnsureGlobalId); + m_context->setActiveSpan(requestSpan); +} + +void CHttpRequest::annotateSpan(const char * key, const char * value) +{ + m_context->queryActiveSpan()->setSpanAttribute(key, value); +} void CHttpRequest::updateContext() { @@ -1982,11 +1994,6 @@ void CHttpRequest::updateContext() m_context->setUseragent(useragent.str()); getHeader("Accept-Language", acceptLanguage); m_context->setAcceptLanguage(acceptLanguage.str()); - - //MORE: The previous code would be better off querying httpHeaders... - Owned httpHeaders = getHeadersAsProperties(m_headers); - Owned requestSpan = queryTraceManager().createServerSpan("request", httpHeaders, SpanFlags::EnsureGlobalId); - m_context->setActiveSpan(requestSpan); } } diff --git a/esp/bindings/http/platform/httptransport.ipp b/esp/bindings/http/platform/httptransport.ipp index 33aa7e64691..7bdea9ee0f3 100644 --- a/esp/bindings/http/platform/httptransport.ipp +++ b/esp/bindings/http/platform/httptransport.ipp @@ -369,7 +369,9 @@ public: virtual int receive(IMultiException *me); + void startSpan(); void updateContext(); + void annotateSpan(const char * key, const char * value); virtual void setMaxRequestEntityLength(int len) {m_MaxRequestEntityLength = len;} virtual int getMaxRequestEntityLength() { return m_MaxRequestEntityLength; } diff --git a/esp/platform/espcontext.cpp b/esp/platform/espcontext.cpp index 7cb4094b855..a2bca7b701f 100755 --- a/esp/platform/espcontext.cpp +++ b/esp/platform/espcontext.cpp @@ -114,6 +114,7 @@ class CEspContext : public CInterface, implements IEspContext updateTraceSummaryHeader(); m_secureContext.setown(secureContext); m_SecurityHandler.setSecureContext(secureContext); + m_activeSpan.set(getNullSpan()); } ~CEspContext() @@ -633,29 +634,21 @@ class CEspContext : public CInterface, implements IEspContext { return m_activeSpan; } - //GH Can these be deleted? + virtual const char* getGlobalId() const override { - if (!m_activeSpan) - return nullptr; return m_activeSpan->queryGlobalId(); } virtual const char* getCallerId() const override { - if (!m_activeSpan) - return nullptr; return m_activeSpan->queryCallerId(); } virtual const char* getLocalId() const override { - if (!m_activeSpan) - return nullptr; return m_activeSpan->queryLocalId(); } virtual IProperties * getClientSpanHeaders() const override { - if (!m_activeSpan) - return nullptr; return ::getClientHeaders(m_activeSpan); } };