Skip to content

Commit

Permalink
Merge pull request #19175 from rpastrana/HPCC-32761-ALA-BlobMode
Browse files Browse the repository at this point in the history
HPCC-32761 ALA LogAccess Blob Mode

Reviewed-by: Gavin Halliday <[email protected]>
Merged-by: Gavin Halliday <[email protected]>
  • Loading branch information
ghalliday authored Oct 3, 2024
2 parents 28a2b3e + 55d0900 commit b8a79e1
Show file tree
Hide file tree
Showing 4 changed files with 248 additions and 20 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Configures HPCC logAccess to target Azure Log Analytics Workspace
global:
logAccess:
name: "Azure LogAnalytics LogAccess"
type: "AzureLogAnalyticsCurl"
blobMode: "true"
#connection:
#All connection attributes are optional.
#It is preferable to provide connection values as secret values category 'esp', secret name 'azure_logaccess'
# NOTE: secret 'azure_logaccess' must include 'aad-client-secret' and it cannot be provided in configuration
#
#workspaceID: "XYZ" #ID of the Azure LogAnalytics workspace to query logs from
# Secret value equivalent: 'ala-workspace-id'
#clientID: "DEF" #ID of Azure Active Directory registered application with api.loganalytics.io access - format: 00000000-0000-0000-0000-000000000000
# Secret value equivalent: 'aad-client-id'
#tenantID: "ABC" #The Azure Active Directory Tenant ID, required for KQL API access
# Secret value equivalent: 'aad-tenant-id'
logMaps:
- type: "global"
storeName: "ContainerLog"
searchColumn: "LogEntry"
columnType: "string"
columnMode: "MIN"
timeStampColumn: "TimeGenerated"
- type: "workunits"
searchColumn: "LogEntry"
columnMode: "DEFAULT"
columnType: "string"
- type: "components"
storeName: "ContainerInventory"
searchColumn: "Name"
keyColumn: "ContainerID"
columnMode: "MIN"
columnType: "string"
timeStampColumn: "TimeGenerated"
disableJoins: false #Potentially expensive join operations needed to fetch a certain column can be disabled
- type: "audience"
searchColumn: "LogEntry"
enumValues:
- code: OPR
- code: USR
- code: PRO
- code: ADT
- code: MON
columnMode: "DEFAULT"
columnType: "enum"
- type: "class"
searchColumn: "LogEntry"
enumValues:
- code: DIS
- code: ERR
- code: WRN
- code: INF
- code: PRO
- code: MET
- code: EVT
columnMode: "DEFAULT"
columnType: "enum"
- type: "instance"
columnMode: "DEFAULT"
searchColumn: "Computer"
columnMode: "ALL"
columnType: "string"
- type: "message"
searchColumn: "LogEntry"
columnMode: "MIN"
columnType: "string"
- type: "logid"
searchColumn: "LogEntry"
columnMode: "DEFAULT"
columnType: "string"
- type: "processid"
searchColumn: "LogEntry"
columnMode: "ALL"
columnType: "string"
- type: "threadid"
searchColumn: "LogEntry"
columnMode: "DEFAULT"
columnType: "string"
- type: "timestamp"
searchColumn: "TimeGenerated"
columnMode: "MIN"
columnType: "Timestamp"
#- type: "pod"
# searchColumn: "PodName"
# columnMode: "DEFAULT"
# columnType: "string"
- type: "spanid"
searchColumn: "LogEntry"
columnMode: "DEFAULT"
columnType: "string"
- type: "traceid"
searchColumn: "LogEntry"
columnMode: "DEFAULT"
columnType: "string"
secrets:
esp:
azure-logaccess: "azure-logaccess"
vaults:
esp:
- name: my-azure-logaccess-vault
url: http://${env.VAULT_SERVICE_HOST}:${env.VAULT_SERVICE_PORT}/v1/secret/data/esp/${secret}
kind: kv-v2
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Configures HPCC logAccess to target Azure Log Analytics Workspace
global:
logAccess:
name: "Azure LogAnalytics LogAccess"
type: "AzureLogAnalyticsCurl"
blobMode: "true"
#connection:
#All connection attributes are optional.
#It is preferable to provide connection values as secret values category 'esp', secret name 'azure_logaccess'
# NOTE: secret 'azure_logaccess' must include 'aad-client-secret' and it cannot be provided in configuration
#
#workspaceID: "XYZ" #ID of the Azure LogAnalytics workspace to query logs from
# Secret value equivalent: 'ala-workspace-id'
#clientID: "DEF" #ID of Azure Active Directory registered application with api.loganalytics.io access - format: 00000000-0000-0000-0000-000000000000
# Secret value equivalent: 'aad-client-id'
#tenantID: "ABC" #The Azure Active Directory Tenant ID, required for KQL API access
# Secret value equivalent: 'aad-tenant-id'
logMaps:
- type: "global"
storeName: "ContainerLogV2"
searchColumn: "LogMessage"
columnType: "dynamic"
columnMode: "MIN"
timeStampColumn: "TimeGenerated"
- type: "workunits"
searchColumn: "LogMessage"
columnMode: "DEFAULT"
columnType: "string"
- type: "components"
storeName: "ContainerLogV2"
searchColumn: "ContainerName" # Container name happens to coincide with component name
keyColumn: "ContainerName"
columnMode: "DEFAULT"
columnType: "string"
- type: "audience"
searchColumn: "LogMessage"
enumValues:
- code: OPR
- code: USR
- code: PRO
- code: ADT
- code: MON
columnMode: "DEFAULT"
columnType: "enum"
- type: "class"
searchColumn: "LogMessage"
enumValues:
- code: DIS
- code: ERR
- code: WRN
- code: INF
- code: PRO
- code: MET
- code: EVT
columnMode: "DEFAULT"
columnType: "enum"
- type: "instance"
columnMode: "DEFAULT"
searchColumn: "Computer"
columnMode: "ALL"
columnType: "string"
- type: "message"
searchColumn: "LogMessage"
columnMode: "MIN"
columnType: "string"
- type: "logid"
searchColumn: "LogMessage"
columnMode: "DEFAULT"
columnType: "string"
- type: "processid"
searchColumn: "LogMessage"
columnMode: "ALL"
columnType: "string"
- type: "threadid"
searchColumn: "LogMessage"
columnMode: "DEFAULT"
columnType: "string"
- type: "timestamp"
searchColumn: "LogMessage"
columnMode: "MIN"
columnType: "string"
- type: "pod"
searchColumn: "PodName"
columnMode: "DEFAULT"
columnType: "string"
- type: "spanid"
searchColumn: "LogMessage"
columnMode: "DEFAULT"
columnType: "string"
- type: "traceid"
searchColumn: "LogMessage"
columnMode: "DEFAULT"
columnType: "string"
secrets:
esp:
azure-logaccess: "azure-logaccess"
vaults:
esp:
- name: my-azure-logaccess-vault
url: http://${env.VAULT_SERVICE_HOST}:${env.VAULT_SERVICE_PORT}/v1/secret/data/esp/${secret}
kind: kv-v2
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,10 @@ AzureLogAnalyticsCurlClient::AzureLogAnalyticsCurlClient(IPropertyTree & logAcce

m_pluginCfg.set(&logAccessPluginConfig);

//blobMode is a flag to determine if the log entry is to be parsed or not
m_blobMode = logAccessPluginConfig.getPropBool("@blobMode", false);
DBGLOG("%s: Blob Mode: %s", COMPONENT_NAME, m_blobMode ? "Enabled" : "Disabled");

m_globalIndexTimestampField.set(defaultHPCCLogTimeStampCol);
m_globalIndexSearchPattern.set(defaultIndexPattern);
m_globalSearchColName.set(defaultHPCCLogMessageCol);
Expand Down Expand Up @@ -483,7 +487,11 @@ void AzureLogAnalyticsCurlClient::getMinReturnColumns(StringBuffer & columns, co
columns.append(defaultHPCCLogComponentCol);
columns.append(", ");
}
columns.appendf("%s, %s", m_globalIndexTimestampField.str(), defaultHPCCLogMessageCol);

if (m_blobMode)
columns.appendf("%s, %s", m_globalIndexTimestampField.str(), m_globalSearchColName.str());
else
columns.appendf("%s, %s", m_globalIndexTimestampField.str(), defaultHPCCLogMessageCol);
}

void AzureLogAnalyticsCurlClient::getDefaultReturnColumns(StringBuffer & columns, const bool includeComponentName)
Expand Down Expand Up @@ -517,12 +525,15 @@ void AzureLogAnalyticsCurlClient::getDefaultReturnColumns(StringBuffer & columns
if (!isEmptyString(m_podSearchColName))
columns.appendf("%s, ", m_podSearchColName.str());

columns.appendf("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s",
m_globalIndexTimestampField.str(), defaultHPCCLogMessageCol, m_classSearchColName.str(),
m_audienceSearchColName.str(), m_workunitSearchColName.str(), m_traceSearchColName.str(), m_spanSearchColName.str(), defaultHPCCLogSeqCol, defaultHPCCLogThreadIDCol, defaultHPCCLogProcIDCol);
if (m_blobMode)
columns.appendf("%s, %s", m_globalIndexTimestampField.str(), m_globalSearchColName.str());
else
columns.appendf("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s",
m_globalIndexTimestampField.str(), defaultHPCCLogMessageCol, m_classSearchColName.str(),
m_audienceSearchColName.str(), m_workunitSearchColName.str(), m_traceSearchColName.str(), m_spanSearchColName.str(), defaultHPCCLogSeqCol, defaultHPCCLogThreadIDCol, defaultHPCCLogProcIDCol);
}

bool generateHPCCLogColumnstAllColumns(StringBuffer & kql, const char * colName, bool targetsV2)
bool generateHPCCLogColumnstAllColumns(StringBuffer & kql, const char * colName, bool targetsV2, bool blobMode)
{
if (isEmptyString(colName))
{
Expand All @@ -538,25 +549,32 @@ bool generateHPCCLogColumnstAllColumns(StringBuffer & kql, const char * colName,
else
throw makeStringExceptionV(-1, "%s: Invalid Azure Log Analytics log message column name detected: '%s'. Review logAccess configuration.", COMPONENT_NAME, colName);

kql.appendf("\n| extend hpcclogfields = extract_all(@\'^([0-9A-Fa-f]+)\\s+(OPR|USR|PRG|AUD|UNK)\\s+(DIS|ERR|WRN|INF|PRO|MET|UNK)\\s+(\\d{4}-\\d{2}-\\d{2}\\s\\d{2}:\\d{2}:\\d{2}\\.\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(UNK|[A-Z]\\d{8}-\\d{6}(?:-\\d+)?)\\s*([0-9a-fA-F]{32}|UNK)?\\s*([0-9a-fA-F]{16}|UNK)?\\s+)?\\\"(.*)\\\"$', %s)[0]", sourceCol.str());
kql.appendf("\n| extend %s = tostring(hpcclogfields.[0])", defaultHPCCLogSeqCol);
kql.appendf("\n| extend %s = tostring(hpcclogfields.[1])", defaultHPCCLogAudCol);
kql.appendf("\n| extend %s = tostring(hpcclogfields.[2])", defaultHPCCLogTypeCol);
kql.appendf("\n| extend %s = todatetime(hpcclogfields.[3])", defaultHPCCLogTimeStampCol);
kql.appendf("\n| extend %s = toint(hpcclogfields.[4])", defaultHPCCLogProcIDCol);
kql.appendf("\n| extend %s = toint(hpcclogfields.[5])", defaultHPCCLogThreadIDCol);
kql.appendf("\n| extend %s = tostring(hpcclogfields.[6])", defaultHPCCLogJobIDCol);
kql.appendf("\n| extend %s = tostring(hpcclogfields.[7])", defaultHPCCLogTraceIDCol);
kql.appendf("\n| extend %s = tostring(hpcclogfields.[8])", defaultHPCCLogSpanIDCol);
kql.appendf("\n| extend %s = tostring(hpcclogfields.[9])", defaultHPCCLogMessageCol);
kql.appendf("\n| project-away hpcclogfields, Type, TenantId, _ResourceId, %s, ", colName);
if (!blobMode)
{
kql.appendf("\n| extend hpcclogfields = extract_all(@\'^([0-9A-Fa-f]+)\\s+(OPR|USR|PRG|AUD|UNK)\\s+(DIS|ERR|WRN|INF|PRO|MET|UNK)\\s+(\\d{4}-\\d{2}-\\d{2}\\s\\d{2}:\\d{2}:\\d{2}\\.\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(UNK|[A-Z]\\d{8}-\\d{6}(?:-\\d+)?)\\s*([0-9a-fA-F]{32}|UNK)?\\s*([0-9a-fA-F]{16}|UNK)?\\s+)?\\\"(.*)\\\"$', %s)[0]", sourceCol.str());
kql.appendf("\n| extend %s = tostring(hpcclogfields.[0])", defaultHPCCLogSeqCol);
kql.appendf("\n| extend %s = tostring(hpcclogfields.[1])", defaultHPCCLogAudCol);
kql.appendf("\n| extend %s = tostring(hpcclogfields.[2])", defaultHPCCLogTypeCol);
kql.appendf("\n| extend %s = todatetime(hpcclogfields.[3])", defaultHPCCLogTimeStampCol);
kql.appendf("\n| extend %s = toint(hpcclogfields.[4])", defaultHPCCLogProcIDCol);
kql.appendf("\n| extend %s = toint(hpcclogfields.[5])", defaultHPCCLogThreadIDCol);
kql.appendf("\n| extend %s = tostring(hpcclogfields.[6])", defaultHPCCLogJobIDCol);
kql.appendf("\n| extend %s = tostring(hpcclogfields.[7])", defaultHPCCLogTraceIDCol);
kql.appendf("\n| extend %s = tostring(hpcclogfields.[8])", defaultHPCCLogSpanIDCol);
kql.appendf("\n| extend %s = tostring(hpcclogfields.[9])", defaultHPCCLogMessageCol);

kql.appendf("\n| project-away hpcclogfields, Type, TenantId, _ResourceId, %s, ", colName);
}
else
{
kql.appendf("\n| project-away Type, TenantId, _ResourceId, ");
}

if (targetsV2)
kql.append("LogSource, SourceSystem");
else
kql.append("LogEntrySource, TimeOfCommand, SourceSystem");


return true;
}

Expand Down Expand Up @@ -659,6 +677,11 @@ void AzureLogAnalyticsCurlClient::populateKQLQueryString(StringBuffer & queryStr
std::string queryField = m_globalSearchColName.str();
std::string queryOperator = " =~ ";

if (m_blobMode)
{
queryOperator = " has ";
}

filter->toString(queryValue);
switch (filter->filterType())
{
Expand Down Expand Up @@ -879,7 +902,8 @@ void AzureLogAnalyticsCurlClient::populateKQLQueryString(StringBuffer & queryStr
declareContainerIndexJoinTable(queryString, options);

queryString.append(queryIndex);
generateHPCCLogColumnstAllColumns(queryString, m_globalSearchColName.str(), targetIsContainerLogV2);
//this used to parse m_globalSearchColName into hpcc.log.* fields, now just does a project-away
generateHPCCLogColumnstAllColumns(queryString, m_globalSearchColName.str(), targetIsContainerLogV2, m_blobMode);

if (options.queryFilter() == nullptr || options.queryFilter()->filterType() == LOGACCESS_FILTER_wildcard) // No filter
{
Expand All @@ -895,7 +919,6 @@ void AzureLogAnalyticsCurlClient::populateKQLQueryString(StringBuffer & queryStr
StringBuffer range;
azureLogAnalyticsTimestampQueryRangeString(range, m_globalIndexTimestampField.str(), trange.getStartt().getSimple(),trange.getEndt().isNull() ? -1 : trange.getEndt().getSimple());
queryString.append("\n| where ").append(range.str());
//if (includeComponentName)
if (!m_disableComponentNameJoins && !targetIsContainerLogV2)
queryString.append("\n) on ").append(m_componentsLookupKeyColumn);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class AzureLogAnalyticsCurlClient : public CInterfaceOf<IRemoteLogAccess>
StringBuffer m_instanceLookupKeyColumn;

bool targetIsContainerLogV2 = false;
bool m_blobMode = false;

public:
AzureLogAnalyticsCurlClient(IPropertyTree & logAccessPluginConfig);
Expand Down

0 comments on commit b8a79e1

Please sign in to comment.