-
Notifications
You must be signed in to change notification settings - Fork 304
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HPCC-32248 Add tracing to rowservice #19314
base: candidate-9.8.x
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -366,13 +366,31 @@ version: 1.0 | |
detail: 100 | ||
)!!"; | ||
|
||
IPropertyTree * loadConfigurationWithGlobalDefault(const char * defaultYaml, Owned<IPropertyTree>& globalConfig, const char * * argv, const char * componentTag, const char * envPrefix) | ||
{ | ||
Owned<IPropertyTree> componentDefault; | ||
if (defaultYaml) | ||
{ | ||
Owned<IPropertyTree> defaultConfig = createPTreeFromYAMLString(defaultYaml, 0, ptr_ignoreWhiteSpace, nullptr); | ||
componentDefault.set(defaultConfig->queryPropTree(componentTag)); | ||
if (!componentDefault) | ||
throw makeStringExceptionV(99, "Default configuration does not contain the tag %s", componentTag); | ||
} | ||
else | ||
componentDefault.setown(createPTree(componentTag)); | ||
|
||
mergePTree(componentDefault, globalConfig); | ||
|
||
return loadConfiguration(componentDefault, argv, componentTag, envPrefix, nullptr, nullptr); | ||
} | ||
|
||
int main(int argc, const char* argv[]) | ||
{ | ||
InitModuleObjects(); | ||
|
||
EnableSEHtoExceptionMapping(); | ||
#ifndef __64BIT__ | ||
|
||
// Restrict stack sizes on 32-bit systems | ||
Thread::setDefaultStackSize(0x10000); // 64K stack (also set in windows DSP) | ||
#endif | ||
|
@@ -386,7 +404,17 @@ int main(int argc, const char* argv[]) | |
StringBuffer componentName; | ||
|
||
// NB: bare-metal dafilesrv does not have a component specific xml | ||
Owned<IPropertyTree> config = loadConfiguration(defaultYaml, argv, "dafilesrv", "DAFILESRV", nullptr, nullptr); | ||
Owned<IPropertyTree> extractedGlobalConfig = createPTree("dafilesrv"); | ||
|
||
#ifndef _CONTAINERIZED | ||
Owned<IPropertyTree> env = getHPCCEnvironment(); | ||
IPropertyTree* globalTracing = env->queryPropTree("Software/tracing"); | ||
if (globalTracing != nullptr) | ||
extractedGlobalConfig->addPropTree("tracing", globalTracing); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should be LINK(globalTracing) (or env->getPropTree) |
||
#endif | ||
|
||
// NB: bare-metal dafilesrv does not have a component specific xml, extracting relevant global configuration instead | ||
Owned<IPropertyTree> config = loadConfigurationWithGlobalDefault(defaultYaml, extractedGlobalConfig, argv, "dafilesrv", "DAFILESRV"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. instead of new function/adding to global, could you add 'tracing' to the component config instead?
(and combine with #else // __CONTAINERIZED block below) |
||
|
||
Owned<IPropertyTree> keyPairInfo; // NB: not used in containerized mode | ||
// Get SSL Settings | ||
|
@@ -513,7 +541,6 @@ int main(int argc, const char* argv[]) | |
|
||
IPropertyTree *dafileSrvInstance = nullptr; | ||
#ifndef _CONTAINERIZED | ||
Owned<IPropertyTree> env = getHPCCEnvironment(); | ||
Owned<IPropertyTree> _dafileSrvInstance; | ||
if (env) | ||
{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -162,6 +162,69 @@ static ISecureSocket *createSecureSocket(ISocket *sock, bool disableClientCertVe | |
} | ||
#endif | ||
|
||
//------------------------------------------------------------------------------ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ActiveSpanScope is very similar to ThreadedSpanScope described by Gavin here: https://hpccsystems.atlassian.net/jira/software/c/projects/HPCC/issues/HPCC-32982. I liked the name ActiveSpanScope because I believe the class has utility outside of multithreaded contexts, IE: time slicing. Would it be worthwhile to move this out of dafilesrv into jtrace? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes. These utility changes should be in jlib, ideally as separate PRs/requests. It would be worth merging your other PR, and having a PR that implements an agreed solution to HPCC-32982, and then rebasing this PR on it. |
||
// ActiveSpanScope Design Notes: | ||
//------------------------------------------------------------------------------ | ||
// ActiveSpanScope updates the threadActiveSpan when it is intstantiated | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. intstantiated -> instantiated |
||
// and restores it to a user configurable previous ISpan when it leaves scope. | ||
// ActiveSpanScope does not control its referenced ISpan's lifetime or ending. | ||
// | ||
// This design allows ActiveSpanScope to be used to update threadActiveSpan | ||
// for long running ISpans that are time sliced, worked on from multiple threads, | ||
// and / or passed between threads. In these cases multiple ActiveSpanScopes | ||
// will be created over the lifetime of the referenced of the ISpan to represent | ||
// a slice of work done towards that ISpan. | ||
// | ||
// Allowing the previous / restored ISpan to be configured allows for | ||
// "disconnected" work on ISpans to be done. Where the previously active ISpan | ||
// may not be the correct ISpan to restore when an ActiveSpanScope leaves scope. | ||
// | ||
// When an ActiveSpanScope is destroyed it will return the prevSpan to active, | ||
// if and only if its span is still the threadActiveSpan. If this is not this | ||
// implies that there is a conflicting ActiveSpanScope and that a code structure | ||
// issue exists that needs to be addressed. A IERRLOG message will be added | ||
// in debug builds for these cases. | ||
//------------------------------------------------------------------------------ | ||
|
||
class ActiveSpanScope | ||
{ | ||
public: | ||
// Captures current threadActiveSpan for prevSpan | ||
ActiveSpanScope(ISpan * _ptr) : ActiveSpanScope(_ptr, queryThreadedActiveSpan()) {} | ||
ActiveSpanScope(ISpan * _ptr, ISpan * _prev) : span(_ptr), prevSpan(_prev) | ||
{ | ||
setThreadedActiveSpan(_ptr); | ||
} | ||
ActiveSpanScope(const ActiveSpanScope& rhs) = delete; | ||
|
||
~ActiveSpanScope() | ||
{ | ||
ISpan* current = queryThreadedActiveSpan(); | ||
if (current != span) | ||
{ | ||
const char* currSpanID = current != nullptr ? current->querySpanId() : "null"; | ||
const char* expectedSpanID = span != nullptr ? span->querySpanId() : "null"; | ||
|
||
IERRLOG("~ActiveSpanScope: threadActiveSpan has changed unexpectedly, expected: %s actual: %s", expectedSpanID, currSpanID); | ||
return; | ||
} | ||
|
||
setThreadedActiveSpan(prevSpan); | ||
} | ||
|
||
inline ISpan * operator -> () const { return span; } | ||
inline operator ISpan *() const { return span; } | ||
|
||
inline ActiveSpanScope& operator=(ISpan * ptr) = delete; | ||
inline ActiveSpanScope& operator=(const ActiveSpanScope& rhs) = delete; | ||
|
||
inline bool operator == (ISpan * _ptr) const { return span == _ptr; } | ||
inline bool operator != (ISpan * _ptr) const { return span != _ptr; } | ||
private: | ||
ISpan * span = nullptr; | ||
ISpan * prevSpan = nullptr; | ||
}; | ||
|
||
static void reportFailedSecureAccepts(const char *context, IException *exception, unsigned &numFailedConn, unsigned &timeLastLog) | ||
{ | ||
numFailedConn++; | ||
|
@@ -839,6 +902,9 @@ class CRemoteRequest : public CSimpleInterfaceOf<IInterface> | |
MemoryBuffer expandMb; | ||
Owned<IXmlWriterExt> responseWriter; // for xml or json response | ||
|
||
Owned<ISpan> requestSpan; | ||
std::string requestTraceParent; | ||
|
||
bool handleFull(MemoryBuffer &inMb, size32_t inPos, MemoryBuffer &compressMb, ICompressor *compressor, size32_t replyLimit, size32_t &totalSz) | ||
{ | ||
size32_t sz = inMb.length()-inPos; | ||
|
@@ -1092,13 +1158,54 @@ class CRemoteRequest : public CSimpleInterfaceOf<IInterface> | |
responseWriter->outputUInt(cursorHandle, sizeof(cursorHandle), "handle"); | ||
} | ||
} | ||
|
||
~CRemoteRequest() | ||
{ | ||
if (requestSpan != nullptr) | ||
{ | ||
requestSpan->setSpanStatusSuccess(true); | ||
requestSpan->endSpan(); | ||
} | ||
} | ||
|
||
OutputFormat queryFormat() const { return format; } | ||
unsigned __int64 queryReplyLimit() const { return replyLimit; } | ||
IRemoteActivity *queryActivity() const { return activity; } | ||
ICompressor *queryCompressor() const { return compressor; } | ||
|
||
void process(IPropertyTree *requestTree, MemoryBuffer &restMb, MemoryBuffer &responseMb, CClientStats &stats) | ||
{ | ||
const char* fullTraceContext = requestTree->queryProp("_trace/traceparent"); | ||
|
||
// We only want to compare the trace-id & span-id, so remove the last "sampling" group | ||
std::string traceParent = fullTraceContext ? fullTraceContext : ""; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor: You can use strrchr on the const char * to avoid cloning the string. Alternatively use std::string_view and assign to a new string. |
||
traceParent = traceParent.substr(0,traceParent.find_last_of("-")); | ||
|
||
if (!traceParent.empty() && requestTraceParent != traceParent) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: I am checking if the traceParent has changed every time process is called here because the client side may use multiple spans during the lifetime a single CRemoteRequest. See below screenshots for an example. |
||
{ | ||
// Check to see if we have an existing span that needs to be closed out, this can happen | ||
// when the span parent changes on the client side | ||
if (requestSpan != nullptr) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be automatic when requestSpan is cleared. That would come if the changes in HPCC-32982 are implemented. Possibly requires a boolean to indicate success.. |
||
{ | ||
requestSpan->setSpanStatusSuccess(true); | ||
requestSpan->endSpan(); | ||
} | ||
|
||
Owned<IProperties> traceHeaders = createProperties(); | ||
traceHeaders->setProp("traceparent", fullTraceContext); | ||
|
||
std::string requestSpanName; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor efficiency: use a const char * and avoid a string being cloned. |
||
if (activity->queryIsReadActivity()) | ||
requestSpanName = "ReadRequest"; | ||
else | ||
requestSpanName = "WriteRequest"; | ||
|
||
requestSpan.set(queryTraceManager().createServerSpan(requestSpanName.c_str(), traceHeaders)); | ||
requestTraceParent = traceParent; | ||
} | ||
|
||
ActiveSpanScope activeSpan(requestSpan.get()); | ||
|
||
if (requestTree->hasProp("replyLimit")) | ||
replyLimit = requestTree->getPropInt64("replyLimit", defaultDaFSReplyLimitKB) * 1024; | ||
|
||
|
@@ -3027,12 +3134,12 @@ class CRemoteFileServer : implements IRemoteFileServer, public CInterface | |
else | ||
{ | ||
if (gc) | ||
THROWJSOCKEXCEPTION(JSOCKERR_graceful_close); | ||
THROWJSOCKEXCEPTION(JSOCKERR_graceful_close); | ||
break; // wait for rest via subsequent notifySelected's | ||
} | ||
} | ||
else if (gc) | ||
THROWJSOCKEXCEPTION(JSOCKERR_graceful_close); | ||
THROWJSOCKEXCEPTION(JSOCKERR_graceful_close); | ||
// to be here, implies handled full message, loop around to see if more on the wire. | ||
// will break out if nothing/partial. | ||
} | ||
|
@@ -4818,7 +4925,7 @@ class CRemoteFileServer : implements IRemoteFileServer, public CInterface | |
* } | ||
* } | ||
* } | ||
* | ||
* | ||
* fetch continuation: | ||
* { | ||
* "format" : "binary", | ||
|
@@ -4960,8 +5067,23 @@ class CRemoteFileServer : implements IRemoteFileServer, public CInterface | |
} | ||
case StreamCmd::CLOSE: | ||
{ | ||
OwnedSpanScope closeSpan; | ||
const char* traceParent = requestTree->queryProp("_trace/traceparent"); | ||
if (traceParent != nullptr) | ||
{ | ||
Owned<IProperties> traceHeaders = createProperties(); | ||
traceHeaders->setProp("traceparent", traceParent); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this also have the sampling suffix removed? |
||
|
||
closeSpan.set(queryTraceManager().createServerSpan("CloseRequest", traceHeaders)); | ||
} | ||
|
||
if (0 == cursorHandle) | ||
throw createDafsException(DAFSERR_cmdstream_protocol_failure, "cursor handle not supplied to 'close' command"); | ||
{ | ||
IException* exception = createDafsException(DAFSERR_cmdstream_protocol_failure, "cursor handle not supplied to 'close' command"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be changed to : IDAFS_Exception* exception = createDafsException.. otherwise the throw exception (of an IException) will not be caught by catch (IDAFS_Exception *e) handlers. |
||
closeSpan->recordException(exception); | ||
throw exception; | ||
} | ||
|
||
IFileIO *dummy; | ||
checkFileIOHandle(cursorHandle, dummy, true); | ||
break; | ||
|
@@ -4990,6 +5112,16 @@ class CRemoteFileServer : implements IRemoteFileServer, public CInterface | |
{ | ||
case StreamCmd::VERSION: | ||
{ | ||
OwnedSpanScope versionSpan; | ||
const char* traceParent = requestTree->queryProp("_trace/traceparent"); | ||
if (traceParent != nullptr) | ||
{ | ||
Owned<IProperties> traceHeaders = createProperties(); | ||
traceHeaders->setProp("traceparent", traceParent); | ||
|
||
versionSpan = queryTraceManager().createServerSpan("VersionRequest", traceHeaders); | ||
} | ||
|
||
if (outFmt_Binary == outputFormat) | ||
reply.append(DAFILESRV_VERSIONSTRING); | ||
else | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function is similar to work Jake has done in HPCC-32991, might be worthwhile to retarget to master and call the overloaded doLoadConfiguration instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes. Should reuse the from HPCC-32991. If this change are wanted in to 9.8, we could consider cherry-picking back the changed in HPCC-32991 to 9.8.