Skip to content

Commit

Permalink
[APM] Improve logic to determine when to use the transaction.duration…
Browse files Browse the repository at this point in the history
….summary field (#171315)

fixes #167578

## Summary

This pull request addresses an issue related to the rendering of latency
charts in the UI when using APM Servers with versions both pre-8.7 and
8.7 or higher.

it changes the `GET /internal/apm/time_range_metadata` to will only
return `hasDurationSummary` `true` when **all** documents within a give
time range are produced with `transaction.duration.summary` field.


### Services Inventory

| before  |  after   |
|--------|--------|
| <img width="800" alt="image"
src="https://github.com/elastic/kibana/assets/2767137/eb8b7552-3bd8-4920-b0bf-2f64c6129e16">
| <img width="800" alt="image"
src="https://github.com/elastic/kibana/assets/2767137/46079b0c-f29c-489f-8951-5235eae8ba71">
|



### Service Overview
<img width="1427" alt="image"
src="https://github.com/elastic/kibana/assets/2767137/1511b919-9f86-4e98-94a5-2415bd1d6fc5">

```json
{
    "isUsingServiceDestinationMetrics": false,
    "sources": [
        {
            "documentType": "serviceTransactionMetric",
            "rollupInterval": "1m",
            "hasDocs": true,
            "hasDurationSummaryField": false
        },
        {
            "documentType": "serviceTransactionMetric",
            "rollupInterval": "10m",
            "hasDocs": true,
            "hasDurationSummaryField": false
        },
        {
            "documentType": "serviceTransactionMetric",
            "rollupInterval": "60m",
            "hasDocs": true,
            "hasDurationSummaryField": false
        },
        {
            "documentType": "transactionMetric",
            "rollupInterval": "1m",
            "hasDocs": true,
            "hasDurationSummaryField": false
        },
        {
            "documentType": "transactionMetric",
            "rollupInterval": "10m",
            "hasDocs": true,
            "hasDurationSummaryField": false
        },
        {
            "documentType": "transactionMetric",
            "rollupInterval": "60m",
            "hasDocs": true,
            "hasDurationSummaryField": false
        },
        {
            "documentType": "transactionEvent",
            "rollupInterval": "none",
            "hasDocs": true,
            "hasDurationSummaryField": false
        }
    ]
}
```

**Time range with only APM Server >= 8.7 produced documents**

<img width="1427" alt="image"
src="https://github.com/elastic/kibana/assets/2767137/7a9fbc34-f233-4b89-923d-bc9970e9097b">

```json
{
    "isUsingServiceDestinationMetrics": false,
    "sources": [
        {
            "documentType": "serviceTransactionMetric",
            "rollupInterval": "1m",
            "hasDocs": true,
            "hasDurationSummaryField": true
        },
        {
            "documentType": "serviceTransactionMetric",
            "rollupInterval": "10m",
            "hasDocs": true,
            "hasDurationSummaryField": true
        },
        {
            "documentType": "serviceTransactionMetric",
            "rollupInterval": "60m",
            "hasDocs": true,
            "hasDurationSummaryField": true
        },
        {
            "documentType": "transactionMetric",
            "rollupInterval": "1m",
            "hasDocs": true,
            "hasDurationSummaryField": true
        },
        {
            "documentType": "transactionMetric",
            "rollupInterval": "10m",
            "hasDocs": true,
            "hasDurationSummaryField": true
        },
        {
            "documentType": "transactionMetric",
            "rollupInterval": "60m",
            "hasDocs": true,
            "hasDurationSummaryField": true
        },
        {
            "documentType": "transactionEvent",
            "rollupInterval": "none",
            "hasDocs": true,
            "hasDurationSummaryField": false
        }
    ]
}
```

**Time range with only APM Server < 8.7 produced documents**

<img width="1427" alt="image"
src="https://github.com/elastic/kibana/assets/2767137/4fd1aba5-75df-4f4c-a788-fae18fca38b0">

```json
{
    "isUsingServiceDestinationMetrics": false,
    "sources": [
        {
            "documentType": "serviceTransactionMetric",
            "rollupInterval": "1m",
            "hasDocs": true,
            "hasDurationSummaryField": false
        },
        {
            "documentType": "serviceTransactionMetric",
            "rollupInterval": "10m",
            "hasDocs": true,
            "hasDurationSummaryField": false
        },
        {
            "documentType": "serviceTransactionMetric",
            "rollupInterval": "60m",
            "hasDocs": true,
            "hasDurationSummaryField": false
        },
        {
            "documentType": "transactionMetric",
            "rollupInterval": "1m",
            "hasDocs": true,
            "hasDurationSummaryField": false
        },
        {
            "documentType": "transactionMetric",
            "rollupInterval": "10m",
            "hasDocs": true,
            "hasDurationSummaryField": false
        },
        {
            "documentType": "transactionMetric",
            "rollupInterval": "60m",
            "hasDocs": true,
            "hasDurationSummaryField": false
        },
        {
            "documentType": "transactionEvent",
            "rollupInterval": "none",
            "hasDocs": true,
            "hasDurationSummaryField": false
        }
    ]
}
```



### How to test
- Setup local Kibana and ES instance
- run Synthtrace to produce documents simulating APM Server < 8.7
```
node scripts/synthtrace service_summary_field_version_dependent.ts  --versionOverride=8.6.2  --from=now-15m --to=now
```
- run Synthtrace to produce documents simulating APM Server >= 8.7

```
node scripts/synthtrace service_summary_field_version_dependent.ts  --versionOverride=8.9.2  --from=now-15m --to=now
```
- Navigate to APM > Service and Trace and click through the services

---------

Co-authored-by: Kibana Machine <[email protected]>
  • Loading branch information
crespocarlos and kibanamachine authored Jan 16, 2024
1 parent 9b901b2 commit b82203a
Show file tree
Hide file tree
Showing 6 changed files with 335 additions and 167 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { ApmFields, apm } from '@kbn/apm-synthtrace-client';
import { random } from 'lodash';
import { pipeline, Readable } from 'stream';
import semver from 'semver';
import { Scenario } from '../cli/scenario';
import {
addObserverVersionTransform,
deleteSummaryFieldTransform,
} from '../lib/utils/transform_helpers';
import { withClient } from '../lib/utils/with_client';

const scenario: Scenario<ApmFields> = async ({ logger, versionOverride }) => {
const version = versionOverride as string;
const isLegacy = versionOverride && semver.lt(version, '8.7.0');
return {
bootstrap: async ({ apmEsClient }) => {
if (isLegacy) {
apmEsClient.pipeline((base: Readable) => {
const defaultPipeline = apmEsClient.getDefaultPipeline()(
base
) as unknown as NodeJS.ReadableStream;

return pipeline(
defaultPipeline,
addObserverVersionTransform(version),
deleteSummaryFieldTransform(),
(err) => {
if (err) {
logger.error(err);
}
}
);
});
}
},
generate: ({ range, clients: { apmEsClient } }) => {
const successfulTimestamps = range.ratePerMinute(6);
const instance = apm
.service({
name: `java${isLegacy ? '-legacy' : ''}`,
environment: 'production',
agentName: 'java',
})
.instance(`instance`);

return withClient(
apmEsClient,
successfulTimestamps.generator((timestamp) => {
const randomHigh = random(1000, 4000);
const randomLow = random(100, randomHigh / 5);
const duration = random(randomLow, randomHigh);
return instance
.transaction({ transactionName: 'GET /order/{id}' })
.timestamp(timestamp)
.duration(duration)
.success();
})
);
},
};
};

export default scenario;
263 changes: 138 additions & 125 deletions x-pack/plugins/apm/server/lib/helpers/get_document_sources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,22 @@ import { ApmDocumentType } from '../../../common/document_type';
import { RollupInterval } from '../../../common/rollup';
import { APMEventClient } from './create_es_client/create_apm_event_client';
import { getConfigForDocumentType } from './create_es_client/document_type';
import { TRANSACTION_DURATION_SUMMARY } from '../../../common/es_fields/apm';
import { TimeRangeMetadata } from '../../../common/time_range_metadata';
import { getDurationLegacyFilter } from './transactions';

const QUERY_INDEX = {
BEFORE: 0,
CURRENT: 1,
DURATION_SUMMARY: 2,
} as const;

interface DocumentTypeData {
documentType: ApmDocumentType;
rollupInterval: RollupInterval;
hasDocBefore: boolean;
hasDocAfter: boolean;
allHaveDurationSummary: boolean;
}

const getRequest = ({
documentType,
Expand Down Expand Up @@ -64,170 +78,169 @@ export async function getDocumentSources({
kuery: string;
enableServiceTransactionMetrics: boolean;
enableContinuousRollups: boolean;
}) {
const currentRange = rangeQuery(start, end);
const diff = end - start;
const kql = kqlQuery(kuery);
const beforeRange = rangeQuery(start - diff, end - diff);

const sourcesToCheck = [
}): Promise<TimeRangeMetadata['sources']> {
const documentTypesToCheck = [
...(enableServiceTransactionMetrics
? [ApmDocumentType.ServiceTransactionMetric as const]
: []),
ApmDocumentType.TransactionMetric as const,
].flatMap((documentType) => {
const docTypeConfig = getConfigForDocumentType(documentType);

return (
enableContinuousRollups
? docTypeConfig.rollupIntervals
: [RollupInterval.OneMinute]
).flatMap((rollupInterval) => {
return {
documentType,
rollupInterval,
meta: {
checkSummaryFieldExists: false,
},
before: getRequest({
documentType,
rollupInterval,
filters: [...kql, ...beforeRange],
}),
current: getRequest({
documentType,
rollupInterval,
filters: [...kql, ...currentRange],
}),
};
});
];

const documentTypesInfo = await getDocumentTypesInfo({
apmEventClient,
start,
end,
kuery,
enableContinuousRollups,
documentTypesToCheck,
});

const sourcesToCheckWithSummary = [
ApmDocumentType.TransactionMetric as const,
].flatMap((documentType) => {
const docTypeConfig = getConfigForDocumentType(documentType);

return (
enableContinuousRollups
? docTypeConfig.rollupIntervals
: [RollupInterval.OneMinute]
).flatMap((rollupInterval) => {
const summaryExistsFilter = {
bool: {
filter: [
{
exists: {
field: TRANSACTION_DURATION_SUMMARY,
},
},
],
},
};
const hasAnySourceDocBefore = documentTypesInfo.some(
(source) => source.hasDocBefore
);

return {
documentType,
rollupInterval,
meta: {
checkSummaryFieldExists: true,
},
before: getRequest({
documentType,
rollupInterval,
filters: [...kql, ...beforeRange, summaryExistsFilter],
}),
current: getRequest({
documentType,
rollupInterval,
filters: [...kql, ...currentRange, summaryExistsFilter],
}),
};
});
return [
...mapToSources(documentTypesInfo, hasAnySourceDocBefore),
{
documentType: ApmDocumentType.TransactionEvent,
rollupInterval: RollupInterval.None,
hasDocs: true,
hasDurationSummaryField: false,
},
];
}

const getDocumentTypesInfo = async ({
apmEventClient,
start,
end,
kuery,
enableContinuousRollups,
documentTypesToCheck,
}: {
apmEventClient: APMEventClient;
start: number;
end: number;
kuery: string;
enableContinuousRollups: boolean;
documentTypesToCheck: ApmDocumentType[];
}) => {
const getRequests = getDocumentTypeRequestsFn({
enableContinuousRollups,
start,
end,
kuery,
});

const allSourcesToCheck = [...sourcesToCheck, ...sourcesToCheckWithSummary];
const sourceRequests = documentTypesToCheck.flatMap(getRequests);

const allSearches = allSourcesToCheck.flatMap(({ before, current }) => [
before,
current,
]);
const allSearches = sourceRequests
.flatMap(({ before, current, durationSummaryCheck }) => [
before,
current,
durationSummaryCheck,
])
.filter(
(request): request is ReturnType<typeof getRequest> =>
request !== undefined
);

const allResponses = (
await apmEventClient.msearch('get_document_availability', ...allSearches)
).responses;

const checkedSources = allSourcesToCheck.map((source, index) => {
const { documentType, rollupInterval } = source;
const responseBefore = allResponses[index * 2];
const responseAfter = allResponses[index * 2 + 1];

const hasDocBefore = responseBefore.hits.total.value > 0;
const hasDocAfter = responseAfter.hits.total.value > 0;
return sourceRequests.map(({ documentType, rollupInterval, ...queries }) => {
const numberOfQueries = Object.values(queries).filter(Boolean).length;
// allResponses is sorted by the order of the requests in sourceRequests
const docTypeResponses = allResponses.splice(0, numberOfQueries);

return {
documentType,
rollupInterval,
hasDocBefore,
hasDocAfter,
checkSummaryFieldExists: source.meta.checkSummaryFieldExists,
hasDocBefore: docTypeResponses[QUERY_INDEX.BEFORE].hits.total.value > 0,
hasDocAfter: docTypeResponses[QUERY_INDEX.CURRENT].hits.total.value > 0,
allHaveDurationSummary: docTypeResponses[QUERY_INDEX.DURATION_SUMMARY]
? docTypeResponses[QUERY_INDEX.DURATION_SUMMARY].hits.total.value === 0
: true,
};
});
};

const hasAnySourceDocBefore = checkedSources.some(
(source) => source.hasDocBefore
);
const getDocumentTypeRequestsFn =
({
enableContinuousRollups,
start,
end,
kuery,
}: {
enableContinuousRollups: boolean;
start: number;
end: number;
kuery: string;
}) =>
(documentType: ApmDocumentType) => {
const currentRange = rangeQuery(start, end);
const diff = end - start;
const kql = kqlQuery(kuery);
const beforeRange = rangeQuery(start - diff, end - diff);

const rollupIntervals = enableContinuousRollups
? getConfigForDocumentType(documentType).rollupIntervals
: [RollupInterval.OneMinute];

return rollupIntervals.map((rollupInterval) => ({
documentType,
rollupInterval,
before: getRequest({
documentType,
rollupInterval,
filters: [...kql, ...beforeRange],
}),
current: getRequest({
documentType,
rollupInterval,
filters: [...kql, ...currentRange],
}),
...(documentType !== ApmDocumentType.ServiceTransactionMetric
? {
durationSummaryCheck: getRequest({
documentType,
rollupInterval,
filters: [...kql, ...currentRange, getDurationLegacyFilter()],
}),
}
: {}),
}));
};

const sourcesWithHasDocs = checkedSources.map((checkedSource) => {
const mapToSources = (
sources: DocumentTypeData[],
hasAnySourceDocBefore: boolean
) => {
return sources.map((source) => {
const {
documentType,
hasDocAfter,
hasDocBefore,
rollupInterval,
checkSummaryFieldExists,
} = checkedSource;
allHaveDurationSummary,
} = source;

const hasDocBeforeOrAfter = hasDocBefore || hasDocAfter;

// If there is any data before, we require that data is available before
// this time range to mark this source as available. If we don't do that,
// users that upgrade to a version that starts generating service tx metrics
// will see a mostly empty screen for a while after upgrading.
// If we only check before, users with a new deployment will use raw transaction
// events.
const hasDocs = hasAnySourceDocBefore ? hasDocBefore : hasDocBeforeOrAfter;

return {
documentType,
rollupInterval,
checkSummaryFieldExists,
hasDocs,
hasDurationSummaryField: allHaveDurationSummary,
};
});

const sources: TimeRangeMetadata['sources'] = sourcesWithHasDocs
.filter((source) => !source.checkSummaryFieldExists)
.map((checkedSource) => {
const { documentType, hasDocs, rollupInterval } = checkedSource;
return {
documentType,
rollupInterval,
hasDocs,
hasDurationSummaryField:
documentType === ApmDocumentType.ServiceTransactionMetric ||
Boolean(
sourcesWithHasDocs.find((eSource) => {
return (
eSource.documentType === documentType &&
eSource.rollupInterval === rollupInterval &&
eSource.checkSummaryFieldExists
);
})?.hasDocs
),
};
});

return sources.concat({
documentType: ApmDocumentType.TransactionEvent,
rollupInterval: RollupInterval.None,
hasDocs: true,
hasDurationSummaryField: false,
});
}
};
Loading

0 comments on commit b82203a

Please sign in to comment.