-
Notifications
You must be signed in to change notification settings - Fork 8.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix search telemetry to only update SO periodically #93130
Changes from all commits
17c5d65
9d559bd
6cf229c
709f84b
c61a657
38068d7
4e4cd23
4b06ddd
4a638b1
b30416b
ca6ffb8
358f500
4d82f94
f59338a
dc8ec11
64972b5
47d464b
0be6151
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
--- | ||
id: kibCasesPluginApi | ||
slug: /kibana-dev-docs/casesPluginApi | ||
title: cases | ||
image: https://source.unsplash.com/400x175/?github | ||
summary: API docs for the cases plugin | ||
date: 2020-11-16 | ||
tags: ['contributor', 'dev', 'apidocs', 'kibana', 'cases'] | ||
warning: This document is auto-generated and is meant to be viewed inside our experimental, new docs system. Reach out in #docs-engineering for more info. | ||
--- | ||
|
||
import casesObj from './cases.json'; | ||
|
||
## Server | ||
|
||
### Interfaces | ||
<DocDefinitionList data={casesObj.server.interfaces}/> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,13 +6,13 @@ | |
* Side Public License, v 1. | ||
*/ | ||
|
||
import { once } from 'lodash'; | ||
import { once, debounce } from 'lodash'; | ||
import type { CoreSetup, Logger } from 'kibana/server'; | ||
import { SavedObjectsErrorHelpers } from '../../../../../core/server'; | ||
import type { IEsSearchResponse } from '../../../common'; | ||
import type { IEsSearchResponse, ISearchOptions } from '../../../common'; | ||
import { isCompleteResponse } from '../../../common'; | ||
import { CollectedUsage } from './register'; | ||
|
||
const SAVED_OBJECT_ID = 'search-telemetry'; | ||
const MAX_RETRY_COUNT = 3; | ||
|
||
export interface SearchUsage { | ||
trackError(): Promise<void>; | ||
|
@@ -25,34 +25,52 @@ export function usageProvider(core: CoreSetup): SearchUsage { | |
return coreStart.savedObjects.createInternalRepository(); | ||
}); | ||
|
||
const trackSuccess = async (duration: number, retryCount = 0) => { | ||
const repository = await getRepository(); | ||
try { | ||
await repository.incrementCounter(SAVED_OBJECT_ID, SAVED_OBJECT_ID, [ | ||
{ fieldName: 'successCount' }, | ||
{ | ||
fieldName: 'totalDuration', | ||
incrementBy: duration, | ||
}, | ||
]); | ||
} catch (e) { | ||
if (SavedObjectsErrorHelpers.isConflictError(e) && retryCount < MAX_RETRY_COUNT) { | ||
setTimeout(() => trackSuccess(duration, retryCount + 1), 1000); | ||
} | ||
} | ||
const collectedUsage: CollectedUsage = { | ||
successCount: 0, | ||
errorCount: 0, | ||
totalDuration: 0, | ||
}; | ||
|
||
const trackError = async (retryCount = 0) => { | ||
const repository = await getRepository(); | ||
try { | ||
await repository.incrementCounter(SAVED_OBJECT_ID, SAVED_OBJECT_ID, [ | ||
{ fieldName: 'errorCount' }, | ||
]); | ||
} catch (e) { | ||
if (SavedObjectsErrorHelpers.isConflictError(e) && retryCount < MAX_RETRY_COUNT) { | ||
setTimeout(() => trackError(retryCount + 1), 1000); | ||
// Instead of updating the search count every time a search completes, we update some in-memory | ||
// counts and only update the saved object every ~5 seconds | ||
const updateSearchUsage = debounce( | ||
async () => { | ||
const repository = await getRepository(); | ||
const { successCount, errorCount, totalDuration } = collectedUsage; | ||
const counterFields = Object.entries(collectedUsage) | ||
.map(([fieldName, incrementBy]) => ({ fieldName, incrementBy })) | ||
// Filter out any zero values because `incrementCounter` will still increment them | ||
.filter(({ incrementBy }) => incrementBy > 0); | ||
|
||
try { | ||
await repository.incrementCounter<CollectedUsage>( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Could it be that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I guess that's a possible edge case. I would think Kibana would be pretty much unusable if an |
||
SAVED_OBJECT_ID, | ||
SAVED_OBJECT_ID, | ||
counterFields | ||
); | ||
|
||
// Since search requests may have completed while the saved object was being updated, we minus | ||
// what was just updated in the saved object rather than resetting the values to 0 | ||
collectedUsage.successCount -= successCount; | ||
collectedUsage.errorCount -= errorCount; | ||
collectedUsage.totalDuration -= totalDuration; | ||
} catch (e) { | ||
// We didn't reset the counters so we'll retry when the next search request completes | ||
} | ||
} | ||
}, | ||
5000, | ||
{ maxWait: 5000 } | ||
); | ||
|
||
const trackSuccess = (duration: number) => { | ||
collectedUsage.successCount++; | ||
collectedUsage.totalDuration += duration; | ||
return updateSearchUsage(); | ||
}; | ||
|
||
const trackError = () => { | ||
collectedUsage.errorCount++; | ||
return updateSearchUsage(); | ||
}; | ||
|
||
return { trackSuccess, trackError }; | ||
|
@@ -61,9 +79,14 @@ export function usageProvider(core: CoreSetup): SearchUsage { | |
/** | ||
* Rxjs observer for easily doing `tap(searchUsageObserver(logger, usage))` in an rxjs chain. | ||
*/ | ||
export function searchUsageObserver(logger: Logger, usage?: SearchUsage) { | ||
export function searchUsageObserver( | ||
logger: Logger, | ||
usage?: SearchUsage, | ||
{ isRestore }: ISearchOptions = {} | ||
) { | ||
return { | ||
next(response: IEsSearchResponse) { | ||
if (isRestore || !isCompleteResponse(response)) return; | ||
logger.debug(`trackSearchStatus:next ${response.rawResponse.took}`); | ||
usage?.trackSuccess(response.rawResponse.took); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably not related to this pr, but I want to point out: When we restore a search session then this so assume:
I wonder if we actually want to track this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, we probably don't want to track at all if |
||
}, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: we are dynamically mapping these fields into telemetry objects. I wonder if this dynamic mapping makes it very easy to change field names without realizing that this breaks about telemetry logs structure.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if this might have some implications if we work in clustering mode 🤔
#68626