From a2f8a1f1350eaca436a33eef99bd7a35afcca615 Mon Sep 17 00:00:00 2001 From: Maciek Grzybowski Date: Fri, 7 Jan 2022 13:15:54 +0100 Subject: [PATCH 1/2] RUMM-1744 Collect Kronos telemetry if Internal Monitoring is enabled --- Datadog/Datadog.xcodeproj/project.pbxproj | 24 ++ .../Core/System/Time/ServerDateProvider.swift | 11 +- Sources/Datadog/Datadog.swift | 16 +- .../Kronos/KronosMonitor.swift | 294 ++++++++++++++++++ Sources/Datadog/Kronos/KronosClock.swift | 8 +- Sources/Datadog/Kronos/KronosNTPClient.swift | 24 +- .../Kronos/KronosMonitorTests.swift | 156 ++++++++++ 7 files changed, 525 insertions(+), 8 deletions(-) create mode 100644 Sources/Datadog/InternalMonitoring/Kronos/KronosMonitor.swift create mode 100644 Tests/DatadogTests/Datadog/InternalMonitoring/Kronos/KronosMonitorTests.swift diff --git a/Datadog/Datadog.xcodeproj/project.pbxproj b/Datadog/Datadog.xcodeproj/project.pbxproj index f739a44199..e88724313f 100644 --- a/Datadog/Datadog.xcodeproj/project.pbxproj +++ b/Datadog/Datadog.xcodeproj/project.pbxproj @@ -273,6 +273,7 @@ 617B954224BF4E7600E6F443 /* RUMMonitorConfigurationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 617B954124BF4E7600E6F443 /* RUMMonitorConfigurationTests.swift */; }; 617CD0DD24CEDDD300B0B557 /* RUMUserActionScopeTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 617CD0DC24CEDDD300B0B557 /* RUMUserActionScopeTests.swift */; }; 617CEB392456BC3A00AD4669 /* TracingUUID.swift in Sources */ = {isa = PBXBuildFile; fileRef = 617CEB382456BC3A00AD4669 /* TracingUUID.swift */; }; + 61815C06278867D1004A666C /* KronosMonitorTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61815C05278867D1004A666C /* KronosMonitorTests.swift */; }; 618236892710560900125326 /* DebugWebviewViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 618236882710560900125326 /* DebugWebviewViewController.swift */; }; 6182374325D3DFD5006A375B /* CrashReportingWithRUMIntegrationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6182374225D3DFD5006A375B /* CrashReportingWithRUMIntegrationTests.swift */; }; 6184751526EFCF1300C7C9C5 /* DatadogTestsObserver.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6184751426EFCF1300C7C9C5 /* DatadogTestsObserver.swift */; }; @@ -493,6 +494,7 @@ 61FF283024BC5E2D000B3D9B /* RUMEventFileOutputTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61FF282F24BC5E2D000B3D9B /* RUMEventFileOutputTests.swift */; }; 61FF416225EE5FF400CE35EC /* CrashReportingWithLoggingIntegrationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61FF416125EE5FF400CE35EC /* CrashReportingWithLoggingIntegrationTests.swift */; }; 61FF9A4525AC5DEA001058CC /* RUMViewIdentity.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61FF9A4425AC5DEA001058CC /* RUMViewIdentity.swift */; }; + 61FFFB89278457D400401A28 /* KronosMonitor.swift in Sources */ = {isa = PBXBuildFile; fileRef = 61FFFB88278457D300401A28 /* KronosMonitor.swift */; }; 9E26E6B924C87693000B3270 /* RUMDataModels.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9E26E6B824C87693000B3270 /* RUMDataModels.swift */; }; 9E2EF44F2694FA14008A7DAE /* VitalInfoSamplerTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9E2EF44E2694FA14008A7DAE /* VitalInfoSamplerTests.swift */; }; 9E307C3224C8846D0039607E /* RUMDataModels.swift in Sources */ = {isa = PBXBuildFile; fileRef = 9E26E6B824C87693000B3270 /* RUMDataModels.swift */; }; @@ -950,6 +952,7 @@ 617B954124BF4E7600E6F443 /* RUMMonitorConfigurationTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RUMMonitorConfigurationTests.swift; sourceTree = ""; }; 617CD0DC24CEDDD300B0B557 /* RUMUserActionScopeTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RUMUserActionScopeTests.swift; sourceTree = ""; }; 617CEB382456BC3A00AD4669 /* TracingUUID.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TracingUUID.swift; sourceTree = ""; }; + 61815C05278867D1004A666C /* KronosMonitorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KronosMonitorTests.swift; sourceTree = ""; }; 618236882710560900125326 /* DebugWebviewViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DebugWebviewViewController.swift; sourceTree = ""; }; 6182374225D3DFD5006A375B /* CrashReportingWithRUMIntegrationTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CrashReportingWithRUMIntegrationTests.swift; sourceTree = ""; }; 6184751426EFCF1300C7C9C5 /* DatadogTestsObserver.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DatadogTestsObserver.swift; sourceTree = ""; }; @@ -1175,6 +1178,7 @@ 61FF282F24BC5E2D000B3D9B /* RUMEventFileOutputTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RUMEventFileOutputTests.swift; sourceTree = ""; }; 61FF416125EE5FF400CE35EC /* CrashReportingWithLoggingIntegrationTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CrashReportingWithLoggingIntegrationTests.swift; sourceTree = ""; }; 61FF9A4425AC5DEA001058CC /* RUMViewIdentity.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RUMViewIdentity.swift; sourceTree = ""; }; + 61FFFB88278457D300401A28 /* KronosMonitor.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = KronosMonitor.swift; sourceTree = ""; }; 9E0542CA25F8EBBE007A3D0B /* Kronos.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = Kronos.xcframework; path = ../Carthage/Build/Kronos.xcframework; sourceTree = ""; }; 9E26E6B824C87693000B3270 /* RUMDataModels.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = RUMDataModels.swift; sourceTree = ""; }; 9E2EF44E2694FA14008A7DAE /* VitalInfoSamplerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VitalInfoSamplerTests.swift; sourceTree = ""; }; @@ -2545,6 +2549,14 @@ path = UUIDs; sourceTree = ""; }; + 61815C04278867BB004A666C /* Kronos */ = { + isa = PBXGroup; + children = ( + 61815C05278867D1004A666C /* KronosMonitorTests.swift */, + ); + path = Kronos; + sourceTree = ""; + }; 6182374125D3DFB8006A375B /* CrashReporting */ = { isa = PBXGroup; children = ( @@ -3107,6 +3119,7 @@ children = ( 61F1878325FA121F0022CE9A /* InternalMonitoringFeature.swift */, 61F1878C25FA33A90022CE9A /* InternalMonitor.swift */, + 61FFFB8A2784593B00401A28 /* Kronos */, ); path = InternalMonitoring; sourceTree = ""; @@ -3115,6 +3128,7 @@ isa = PBXGroup; children = ( 61F187FB25FA7DD60022CE9A /* InternalMonitoringFeatureTests.swift */, + 61815C04278867BB004A666C /* Kronos */, ); path = InternalMonitoring; sourceTree = ""; @@ -3288,6 +3302,14 @@ path = RUMEventOutputs; sourceTree = ""; }; + 61FFFB8A2784593B00401A28 /* Kronos */ = { + isa = PBXGroup; + children = ( + 61FFFB88278457D300401A28 /* KronosMonitor.swift */, + ); + path = Kronos; + sourceTree = ""; + }; 9E06058F26EF904200F5F935 /* LongTasks */ = { isa = PBXGroup; children = ( @@ -4111,6 +4133,7 @@ 61D3E0D3277B23F1008BE766 /* KronosDNSResolver.swift in Sources */, 6149FB3A2529D17F00EE387A /* InternalURLsFilter.swift in Sources */, 611529A525E3DD51004F740E /* ValuePublisher.swift in Sources */, + 61FFFB89278457D400401A28 /* KronosMonitor.swift in Sources */, 618DCFD724C7265300589570 /* RUMUUID.swift in Sources */, 61B038662527247800518F3C /* URLSessionInterceptor.swift in Sources */, 61B03898252724DE00518F3C /* TracingHTTPHeaders.swift in Sources */, @@ -4347,6 +4370,7 @@ 61F1A623249B811200075390 /* Encoding.swift in Sources */, 6114FE3B25768AA90084E372 /* ConsentProviderTests.swift in Sources */, 61133C642423990D00786299 /* LoggerTests.swift in Sources */, + 61815C06278867D1004A666C /* KronosMonitorTests.swift in Sources */, 617B953D24BF4D8F00E6F443 /* RUMMonitorTests.swift in Sources */, D244B3A3271EDACD003E1B29 /* SwiftUIExtensionsTests.swift in Sources */, 61F187FC25FA7DD60022CE9A /* InternalMonitoringFeatureTests.swift in Sources */, diff --git a/Sources/Datadog/Core/System/Time/ServerDateProvider.swift b/Sources/Datadog/Core/System/Time/ServerDateProvider.swift index 33b90628f4..fe6957bab7 100644 --- a/Sources/Datadog/Core/System/Time/ServerDateProvider.swift +++ b/Sources/Datadog/Core/System/Time/ServerDateProvider.swift @@ -20,6 +20,13 @@ internal class NTPServerDateProvider: ServerDateProvider { /// Server offset publisher. private let publisher: ValuePublisher = ValuePublisher(initialValue: nil) + /// Monitor collecting Kronos telemetry - only enabled if Internal Monitoring is configured. + private let kronosMonitor: KronosMonitor? + + init(kronosMonitor: KronosMonitor? = nil) { + self.kronosMonitor = kronosMonitor + } + /// Returns the server time offset or `nil` if not yet determined. /// This offset gets more precise while synchronization is pending. var offset: TimeInterval? { @@ -29,6 +36,7 @@ internal class NTPServerDateProvider: ServerDateProvider { func synchronize(with pool: String, completion: @escaping (TimeInterval?) -> Void) { KronosClock.sync( from: pool, + monitor: kronosMonitor, first: { [weak self] _, offset in self?.publisher.publishAsync(offset) }, @@ -36,7 +44,8 @@ internal class NTPServerDateProvider: ServerDateProvider { // Kronos only notifies for the first and last samples. // In case, the last sample does not return an offset, we calculate the offset // from the returned `now` parameter. The `now` parameter in this callback - // is `Clock.now`, so it is possible to have `now` but not `offset`. + // is `Clock.now` and it can be either offset computed from prior samples or persisted + // in user defaults from previous app session. if let offset = offset { self?.publisher.publishAsync(offset) } else if let now = now { diff --git a/Sources/Datadog/Datadog.swift b/Sources/Datadog/Datadog.swift index 0c32a83d37..f58ac1b424 100644 --- a/Sources/Datadog/Datadog.swift +++ b/Sources/Datadog/Datadog.swift @@ -172,11 +172,19 @@ public class Datadog { throw ProgrammerError(description: "SDK is already initialized.") } + let kronosMonitor: KronosMonitor? +#if DD_SDK_ENABLE_INTERNAL_MONITORING + // Collect Kronos telemetry only if internal monitoring is compiled and enabled + kronosMonitor = configuration.internalMonitoring != nil ? KronosInternalMonitor() : nil +#else + kronosMonitor = nil +#endif + let consentProvider = ConsentProvider(initialConsent: initialTrackingConsent) let dateProvider = SystemDateProvider() let dateCorrector = DateCorrector( deviceDateProvider: dateProvider, - serverDateProvider: NTPServerDateProvider() + serverDateProvider: NTPServerDateProvider(kronosMonitor: kronosMonitor) ) let userInfoProvider = UserInfoProvider() let networkConnectionInfoProvider = NetworkConnectionInfoProvider() @@ -305,6 +313,12 @@ public class Datadog { Global.crashReporter = CrashReporter(crashReportingFeature: crashReportingFeature) Global.crashReporter?.sendCrashReportIfFound() } + + // If Internal Monitoring is enabled and Kronos internal monitor is configured, + // export result of NTP sync to IM. + if let internalMonitoringFeature = InternalMonitoringFeature.instance { + kronosMonitor?.export(to: internalMonitoringFeature.monitor) + } } internal init( diff --git a/Sources/Datadog/InternalMonitoring/Kronos/KronosMonitor.swift b/Sources/Datadog/InternalMonitoring/Kronos/KronosMonitor.swift new file mode 100644 index 0000000000..ae927989d8 --- /dev/null +++ b/Sources/Datadog/InternalMonitoring/Kronos/KronosMonitor.swift @@ -0,0 +1,294 @@ +/* + * Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0. + * This product includes software developed at Datadog (https://www.datadoghq.com/). + * Copyright 2019-2020 Datadog, Inc. + */ + +import Foundation +import Network + +/// Telemetry monitor for `KronosClock`. +internal protocol KronosMonitor { + // MARK: - Clock sync + + func notifySyncStart(from pool: String) + func notifySyncEnd(serverOffset: TimeInterval?) + + // MARK: - DNS resolution + + func notifyResolveDNS(to addresses: [KronosInternetAddress]) + + // MARK: - IP querying + + func notifyStartQuerying(ip address: KronosInternetAddress, numberOfSamples: Int) + func notifyReceivePacket(from address: KronosInternetAddress, isValidSample: Bool) + func notifyEndQuerying(ip address: KronosInternetAddress) + + // MARK: - Telemetry Export + + func export(to exporter: InternalMonitor) +} + +#if DD_SDK_ENABLE_INTERNAL_MONITORING + +/// `KronosMonitor` for diagnosing `KronosClock` in Internal Monitoring. Not used when Internal Monitoring is disabled (by default). +/// It is only implemented to collect extra telemetry for troubleshooting https://github.com/DataDog/dd-sdk-ios/issues/647 +internal class KronosInternalMonitor: KronosMonitor { + struct SyncResult: Encodable { + /// The pool addres used by Kronos. + let pool: String + /// The server offset reported by Kronos. + let serverOffset: TimeInterval? + /// Stats for each IP resolved by Kronos DNS. + let ips: [String: IP] + + struct IP: Encodable { + let address: String? + let connectionDuration: TimeInterval? + let connectionCheckResult: IPConnectionCheckResult? + let succeededPacketsCount: Int + let failedPacketsCount: Int + } + } + + /// Used to gather stats on each IP during Kronos sync. + private struct IPStats: Encodable { + var connectionStart: Date? = nil + var connectionEnd: Date? = nil + var succeededPacketsCount = 0 + var failedPacketsCount = 0 + var checkResult: IPConnectionCheckResult? = nil + } + + /// Queue for synchronising Kronos and `IPConnectionMonitor` callbacks. + private let queue: DispatchQueue + /// Dispatch group used to synchronize Kronos and `IPConnectionMonitor` tasks. + /// It notifies readiness of all recorded details (collected asynchronously) so the `SyncResult` can be built and sent to Datadog. + private let dispatchGroup = DispatchGroup() + + /// The address of the pool being synchronised by `KronosClock`. + private var pool: String? = nil + /// The final server offset retrieved from `KronosClock` (can be `nil` if anything went wrong). + private var serverOffset: TimeInterval? = nil + /// Stats for each IP resolved from the `pool`. + private var statsByIP: [KronosInternetAddress: IPStats] = [:] + /// Connection monitor checking additional reachability for each IP resolved from the `pool`. Only available from iOS 14.2. + private let connectionMonitor: IPConnectionMonitorType? + + /// Internal Monitor for sending the `result` to Datadog. + private var exporter: InternalMonitor? = nil + + convenience init() { + let queue = DispatchQueue(label: "com.datadoghq.kronos-monitor", qos: .utility) + if #available(iOS 14.2, *) { + self.init( + queue: queue, + connectionMonitor: IPConnectionMonitor(queue: queue) + ) + } else { + self.init( + queue: queue, + connectionMonitor: nil + ) + } + } + + init(queue: DispatchQueue, connectionMonitor: IPConnectionMonitorType?) { + self.queue = queue + self.connectionMonitor = connectionMonitor + + self.dispatchGroup.enter() // await Kronos completion + self.dispatchGroup.enter() // await exporter registration + + self.dispatchGroup.notify(queue: self.queue) { [weak self] in + self?.sendTelemetryToDatadog() + } + } + + // MARK: - Clock sync + + func notifySyncStart(from pool: String) { + queue.async { + self.pool = pool + } + } + + func notifySyncEnd(serverOffset: TimeInterval?) { + queue.async { + self.serverOffset = serverOffset + self.dispatchGroup.leave() // notify Kronos completion + } + } + + // MARK: - Exporting + + func export(to exporter: InternalMonitor) { + queue.async { + self.exporter = exporter + self.dispatchGroup.leave() // notify exporter registration + } + } + + // MARK: - DNS resolution + + func notifyResolveDNS(to addresses: [KronosInternetAddress]) { + queue.async { + addresses.forEach { ip in + self.statsByIP[ip] = IPStats() + + if let connectionMonitor = self.connectionMonitor { + self.dispatchGroup.enter() // await connection check result + connectionMonitor.checkConnection(to: ip) { [weak self] result in // completion is called on `queue` + self?.statsByIP[ip]?.checkResult = result + self?.dispatchGroup.leave() // notify connection check result + } + } + } + } + } + + // MARK: - IP querying + + func notifyStartQuerying(ip address: KronosInternetAddress, numberOfSamples: Int) { + queue.async { + self.statsByIP[address]?.connectionStart = Date() + } + } + + func notifyReceivePacket(from address: KronosInternetAddress, isValidSample: Bool) { + queue.async { + if isValidSample { + self.statsByIP[address]?.succeededPacketsCount += 1 + } else { + self.statsByIP[address]?.failedPacketsCount += 1 + } + } + } + + func notifyEndQuerying(ip address: KronosInternetAddress) { + queue.async { + self.statsByIP[address]?.connectionEnd = Date() + } + } + + // MARK: - Exporting telemetry to Internal Monitoring + + private func sendTelemetryToDatadog() { + guard let sdkLogger = exporter?.sdkLogger else { + return // cannot happen + } + + guard let pool = self.pool else { + sdkLogger.debug("Kronos pool was not registered in `KronosMonitor`") // cannot happen, but log it for sanity + return + } + + var ips: [String: SyncResult.IP] = [:] + statsByIP.forEach { address, stats in + let key = "ip\(ips.count)" + ips[key] = SyncResult.IP( + address: address.host, + connectionDuration: stats.connectionStart.flatMap { stats.connectionEnd?.timeIntervalSince($0) }, + connectionCheckResult: stats.checkResult, + succeededPacketsCount: stats.succeededPacketsCount, + failedPacketsCount: stats.failedPacketsCount + ) + } + + let syncResult = SyncResult( + pool: pool, + serverOffset: serverOffset, + ips: ips + ) + + let ipsFailedDueToLocalNetworkDenied = syncResult.ips.values.filter { $0.connectionCheckResult?.isLocalNetworkDenied ?? false } + + if !ipsFailedDueToLocalNetworkDenied.isEmpty { + // Send error, as this indicates the issue reported in https://github.com/DataDog/dd-sdk-ios/issues/647. + sdkLogger.error("Kronos sync to \(pool) was blocked on trying to connect to local network", attributes: ["sync-result": syncResult]) + } else if syncResult.serverOffset != nil { + // Send info - everything went fine + sdkLogger.info("Kronos resolved \(pool) with receiving server offset", attributes: ["sync-result": syncResult]) + } else { + // Send info - something went wrong, but it could be due to network unreachability or other env factors + sdkLogger.info("Kronos resolved \(pool) but received no server offset", attributes: ["sync-result": syncResult]) + } + } +} + +// MARK: - IPConnectionMonitor + +internal struct IPConnectionCheckResult: Encodable { + let isLocalNetworkDenied: Bool + let details: String +} + +/// Checks connection to certain IP. Not used when Internal Monitoring is disabled (by default). +/// It is only implemented to collect extra telemetry for troubleshooting https://github.com/DataDog/dd-sdk-ios/issues/647 +internal protocol IPConnectionMonitorType { + func checkConnection(to ip: KronosInternetAddress, resultCallback: @escaping (IPConnectionCheckResult) -> Void) +} + +@available(iOS 14.2, *) +internal class IPConnectionMonitor: IPConnectionMonitorType { + /// Timeout for checking each connection. + private let timeout: TimeInterval = 20 + + private let queue: DispatchQueue + private var pendingConnections: [NWConnection] = [] + + init(queue: DispatchQueue) { + self.queue = queue + } + + func checkConnection(to ip: KronosInternetAddress, resultCallback: @escaping (IPConnectionCheckResult) -> Void) { + guard let host = ip.host else { + return + } + + let connection = NWConnection(host: .init(host), port: 123, using: .udp) + queue.async { self.pendingConnections.append(connection) } + queue.asyncAfter(deadline: .now() + timeout) { [weak self] in + guard let self = self else { + return + } + + if self.pendingConnections.contains(where: { $0 === connection }) { + self.cancelCheck(for: connection) + } + } + + connection.pathUpdateHandler = { [weak self] latestPath in + let checkResult: IPConnectionCheckResult + + switch latestPath.status { + case .unsatisfied: + // Here we check if the connection won't lead to reaching host in local network + // Ref.: 'How do I use the unsatisfied reason property?' + // https://developer.apple.com/forums/thread/663769 + switch latestPath.unsatisfiedReason { + case .localNetworkDenied: checkResult = .init(isLocalNetworkDenied: true, details: "unsatisfied: localNetworkDenied") + case .notAvailable: checkResult = .init(isLocalNetworkDenied: false, details: "unsatisfied: notAvailable") + case .cellularDenied: checkResult = .init(isLocalNetworkDenied: false, details: "unsatisfied: cellularDenied") + case .wifiDenied: checkResult = .init(isLocalNetworkDenied: false, details: "unsatisfied: wifiDenied") + @unknown default: checkResult = .init(isLocalNetworkDenied: false, details: "unsatisfied: unknown") + } + case .satisfied: checkResult = .init(isLocalNetworkDenied: false, details: "satisfied") + case .requiresConnection: checkResult = .init(isLocalNetworkDenied: false, details: "requiresConnection") + @unknown default: checkResult = .init(isLocalNetworkDenied: false, details: "unknown") + } + + resultCallback(checkResult) + self?.cancelCheck(for: connection) + } + + connection.start(queue: queue) + } + + private func cancelCheck(for connection: NWConnection) { + connection.cancel() + pendingConnections = pendingConnections.filter { $0 !== connection } + } +} + +#endif diff --git a/Sources/Datadog/Kronos/KronosClock.swift b/Sources/Datadog/Kronos/KronosClock.swift index e56299831e..2fc49ba1f6 100644 --- a/Sources/Datadog/Kronos/KronosClock.swift +++ b/Sources/Datadog/Kronos/KronosClock.swift @@ -73,17 +73,20 @@ internal struct KronosClock { /// - parameter pool: NTP pool that will be resolved into multiple NTP servers that will be used for /// the synchronization. /// - parameter samples: The number of samples to be acquired from each server (default 4). - /// - parameter completion: A closure that will be called after _all_ the NTP calls are finished. + /// - parameter monitor: Monitor collecting Kronos telemetry - only enabled if Internal Monitoring is configured. /// - parameter first: A closure that will be called after the first valid date is calculated. + /// - parameter completion: A closure that will be called after _all_ the NTP calls are finished. static func sync( from pool: String = "time.apple.com", samples: Int = 4, + monitor: KronosMonitor? = nil, first: ((Date, TimeInterval) -> Void)? = nil, completion: ((Date?, TimeInterval?) -> Void)? = nil ) { self.loadFromDefaults() - KronosNTPClient().query(pool: pool, numberOfSamples: samples) { offset, done, total in + monitor?.notifySyncStart(from: pool) + KronosNTPClient().query(pool: pool, numberOfSamples: samples, monitor: monitor) { offset, done, total in if let offset = offset { self.stableTime = KronosTimeFreeze(offset: offset) @@ -93,6 +96,7 @@ internal struct KronosClock { } if done == total { + monitor?.notifySyncEnd(serverOffset: offset) completion?(self.now, offset) } } diff --git a/Sources/Datadog/Kronos/KronosNTPClient.swift b/Sources/Datadog/Kronos/KronosNTPClient.swift index a720bca00d..6c2f3abe67 100644 --- a/Sources/Datadog/Kronos/KronosNTPClient.swift +++ b/Sources/Datadog/Kronos/KronosNTPClient.swift @@ -31,6 +31,7 @@ internal final class KronosNTPClient { /// - parameter numberOfSamples: The number of samples to be acquired from each server (default 4). /// - parameter maximumServers: The maximum number of servers to be queried (default 5). /// - parameter timeout: The individual timeout for each of the NTP operations. + /// - parameter monitor: Monitor collecting Kronos telemetry - only enabled if Internal Monitoring is configured. /// - parameter completion: A closure that will be response PDU on success or nil on error. func query( pool: String = "time.apple.com", @@ -39,13 +40,15 @@ internal final class KronosNTPClient { numberOfSamples: Int = kDefaultSamples, maximumServers: Int = kMaximumNTPServers, timeout: CFTimeInterval = kronosDefaultTimeout, + monitor: KronosMonitor? = nil, progress: @escaping (TimeInterval?, Int, Int) -> Void ) { var servers: [KronosInternetAddress: [KronosNTPPacket]] = [:] var completed: Int = 0 let queryIPAndStoreResult = { (address: KronosInternetAddress, totalQueries: Int) -> Void in - self.query(ip: address, port: port, version: version, timeout: timeout, numberOfSamples: numberOfSamples) { packet in + monitor?.notifyStartQuerying(ip: address, numberOfSamples: numberOfSamples) + self.query(ip: address, port: port, version: version, timeout: timeout, numberOfSamples: numberOfSamples, monitor: monitor) { packet in defer { completed += 1 @@ -71,7 +74,11 @@ internal final class KronosNTPClient { } let totalServers = min(addresses.count, maximumServers) - for address in addresses[0 ..< totalServers] { + let addressesToQuery = Array(addresses[0 ..< totalServers]) + + monitor?.notifyResolveDNS(to: addressesToQuery) + + for address in addressesToQuery { queryIPAndStoreResult(address, totalServers * numberOfSamples) } } @@ -84,6 +91,7 @@ internal final class KronosNTPClient { /// - parameter version: NTP version to use (default 3). /// - parameter timeout: Timeout on socket operations. /// - parameter numberOfSamples: The number of samples to be acquired from the server (default 4). + /// - parameter monitor: Monitor collecting Kronos telemetry - only enabled if Internal Monitoring is configured. /// - parameter completion: A closure that will be response PDU on success or nil on error. func query( ip: KronosInternetAddress, @@ -91,6 +99,7 @@ internal final class KronosNTPClient { version: Int8 = 3, timeout: CFTimeInterval = kronosDefaultTimeout, numberOfSamples: Int = kDefaultSamples, + monitor: KronosMonitor? = nil, completion: @escaping (KronosNTPPacket?) -> Void ) { var timer: Timer? @@ -98,19 +107,26 @@ internal final class KronosNTPClient { defer { // If we still have samples left; we'll keep querying the same server if numberOfSamples > 1 { - self.query(ip: ip, port: port, version: version, timeout: timeout, numberOfSamples: numberOfSamples - 1, completion: completion) + self.query(ip: ip, port: port, version: version, timeout: timeout, numberOfSamples: numberOfSamples - 1, monitor: monitor, completion: completion) } } - timer?.invalidate() guard let data = data, let PDU = try? KronosNTPPacket(data: data, destinationTime: destinationTime), PDU.isValidResponse() else { + monitor?.notifyReceivePacket(from: ip, isValidSample: false) + if numberOfSamples == 1 { + monitor?.notifyEndQuerying(ip: ip) + } completion(nil) return } + monitor?.notifyReceivePacket(from: ip, isValidSample: true) + if numberOfSamples == 1 { + monitor?.notifyEndQuerying(ip: ip) + } completion(PDU) } diff --git a/Tests/DatadogTests/Datadog/InternalMonitoring/Kronos/KronosMonitorTests.swift b/Tests/DatadogTests/Datadog/InternalMonitoring/Kronos/KronosMonitorTests.swift new file mode 100644 index 0000000000..6d27b952f9 --- /dev/null +++ b/Tests/DatadogTests/Datadog/InternalMonitoring/Kronos/KronosMonitorTests.swift @@ -0,0 +1,156 @@ +/* + * Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0. + * This product includes software developed at Datadog (https://www.datadoghq.com/). + * Copyright 2019-2020 Datadog, Inc. + */ + +import XCTest +@testable import Datadog + +private struct IPConnectionMonitorMock: IPConnectionMonitorType { + let queue: DispatchQueue + let results: [KronosInternetAddress: IPConnectionCheckResult] + let resultDelay: (KronosInternetAddress) -> TimeInterval + + func checkConnection(to ip: KronosInternetAddress, resultCallback: @escaping (IPConnectionCheckResult) -> Void) { + queue.asyncAfter(deadline: .now() + resultDelay(ip)) { + resultCallback(results[ip]!) + } + } +} + +class KronosMonitorTests: XCTestCase { + private let randomNTPPool: String = .mockRandom(among: .decimalDigits) + ".ntp.org" + private let ip1: KronosInternetAddress = .mockWith(ipString: "10.0.0.1") + private let ip2: KronosInternetAddress = .mockWith(ipString: "10.0.0.2") + private let ip3: KronosInternetAddress = .mockWith(ipString: "10.0.0.3") + + func testWhenNTPPoolIsResolvedToRemoteIPAddresses_andServerOffsetIsRetrieved_itChecksAllConnections_andSendsINFOLog() throws { + let (recordedLog, recordedSyncResult) = try simulateNTPSynchronisation( + to: randomNTPPool, + withDNSResolvingTo: [ + ip1: .mockWith(isLocalNetworkDenied: false), // remote IP + ip2: .mockWith(isLocalNetworkDenied: false), // remote IP + ip3: .mockWith(isLocalNetworkDenied: false), // remote IP + ], + andRetrievingServerOffset: .mockRandom() // server offset retrieved + ) + + XCTAssertEqual(recordedLog.status, .info, "It must send INFO log") + XCTAssertEqual(recordedLog.message, "Kronos resolved \(randomNTPPool) with receiving server offset") + + XCTAssertEqual(recordedSyncResult.ips.count, 3, "It must record connection status for all resolved IPs") + XCTAssertTrue(recordedSyncResult.ips.values.contains { $0.address == ip1.host! }, "It must check connection status for \(ip1.host!)") + XCTAssertTrue(recordedSyncResult.ips.values.contains { $0.address == ip2.host! }, "It must check connection status for \(ip2.host!)") + XCTAssertTrue(recordedSyncResult.ips.values.contains { $0.address == ip3.host! }, "It must check connection status for \(ip3.host!)") + XCTAssertFalse(recordedSyncResult.ips.values.contains { $0.connectionCheckResult!.isLocalNetworkDenied }, "It must report all IPs as remote") + } + + func testWhenNTPPoolIsResolvedToRemoteIPAddresses_butServerOffsetIsNotRetrieved_itChecksAllConnections_andSendsINFOLog() throws { + let (recordedLog, recordedSyncResult) = try simulateNTPSynchronisation( + to: randomNTPPool, + withDNSResolvingTo: [ + ip1: .mockWith(isLocalNetworkDenied: false), // remote IP + ip2: .mockWith(isLocalNetworkDenied: false), // remote IP + ip3: .mockWith(isLocalNetworkDenied: false), // remote IP + ], + andRetrievingServerOffset: nil // no server offset retrieved + ) + + XCTAssertEqual(recordedLog.status, .info, "It must send INFO log") + XCTAssertEqual(recordedLog.message, "Kronos resolved \(randomNTPPool) but received no server offset") + + XCTAssertEqual(recordedSyncResult.ips.count, 3, "It must record connection status for all resolved IPs") + XCTAssertTrue(recordedSyncResult.ips.values.contains { $0.address == ip1.host! }, "It must check connection status for \(ip1.host!)") + XCTAssertTrue(recordedSyncResult.ips.values.contains { $0.address == ip2.host! }, "It must check connection status for \(ip2.host!)") + XCTAssertTrue(recordedSyncResult.ips.values.contains { $0.address == ip3.host! }, "It must check connection status for \(ip3.host!)") + XCTAssertFalse(recordedSyncResult.ips.values.contains { $0.connectionCheckResult!.isLocalNetworkDenied }, "It must report all IPs as remote") + } + + func testWhenNTPPoolIsResolvedToLocalIPAddresses_itChecksAllConnections_andSendsERRORLog() throws { + let (recordedLog, recordedSyncResult) = try simulateNTPSynchronisation( + to: randomNTPPool, + withDNSResolvingTo: [ + ip1: .mockWith(isLocalNetworkDenied: false), // remote IP + ip2: .mockWith(isLocalNetworkDenied: false), // remote IP + ip3: .mockWith(isLocalNetworkDenied: true), // local IP, IRL this will trigger 'Local Network Permission' + ], + andRetrievingServerOffset: Bool.mockRandom() ? .mockRandom() : nil // no matter if retrieving offset + ) + + XCTAssertEqual(recordedLog.status, .error, "It must send ERROR log") + XCTAssertEqual(recordedLog.message, "Kronos sync to \(randomNTPPool) was blocked on trying to connect to local network") + + XCTAssertEqual(recordedSyncResult.ips.count, 3, "It must record connection status for all resolved IPs") + XCTAssertTrue(recordedSyncResult.ips.values.contains { $0.address == ip1.host! }, "It must check connection status for \(ip1.host!)") + XCTAssertTrue(recordedSyncResult.ips.values.contains { $0.address == ip2.host! }, "It must check connection status for \(ip2.host!)") + XCTAssertTrue(recordedSyncResult.ips.values.contains { $0.address == ip3.host! }, "It must check connection status for \(ip3.host!)") + XCTAssertTrue( + recordedSyncResult.ips.values.contains { $0.connectionCheckResult!.isLocalNetworkDenied } && + recordedSyncResult.ips.values.contains { !$0.connectionCheckResult!.isLocalNetworkDenied }, + "It must report both local and remote IPs" + ) + } + + /// Simulates `KronosClock` execution and returns telemetry uploaded to Datadog. + func simulateNTPSynchronisation( + to pool: String, + withDNSResolvingTo dnsResolution: [KronosInternetAddress: IPConnectionCheckResult], + andRetrievingServerOffset serverOffset: TimeInterval? + ) throws -> (log: LogEvent, syncResult: KronosInternalMonitor.SyncResult) { + let queue = DispatchQueue(label: "kronos-monitor-tests") + + // Given + let resolvedIPs = Array(dnsResolution.keys) + let mockIPConnectionMonitor = IPConnectionMonitorMock( + queue: queue, + results: dnsResolution, + resultDelay: { _ in .mockRandom(min: 0.1, max: 0.5) } // random delay for each connection + ) + let mockLogOutput = LogOutputMock() + let mockExporter = InternalMonitor( + sdkLogger: .mockWith(logOutput: mockLogOutput) + ) + + let monitor = KronosInternalMonitor(queue: queue, connectionMonitor: mockIPConnectionMonitor) + + // When + monitor.notifySyncStart(from: pool) + monitor.notifyResolveDNS(to: resolvedIPs) + resolvedIPs.forEach { ip in + monitor.notifyStartQuerying(ip: ip, numberOfSamples: 1) + monitor.notifyReceivePacket(from: ip, isValidSample: .mockRandom()) + monitor.notifyEndQuerying(ip: ip) + } + monitor.notifySyncEnd(serverOffset: serverOffset) + monitor.export(to: mockExporter) + + // Then + let expectation = self.expectation(description: "Send telemetry log") + mockLogOutput.onLogRecorded = { _ in expectation.fulfill() } + + waitForExpectations(timeout: 5, handler: nil) + + let recordedLog = try XCTUnwrap(mockLogOutput.recordedLog) + let recordedSyncResult = try XCTUnwrap(recordedLog.attributes.userAttributes["sync-result"] as? KronosInternalMonitor.SyncResult) + return (recordedLog, recordedSyncResult) + } +} + +// MARK: - Helpers + +private extension KronosInternetAddress { + static func mockWith(ipString: String) -> KronosInternetAddress { + var addr = sockaddr_in() + addr.sin_len = UInt8(MemoryLayout.size(ofValue: addr)) + addr.sin_family = sa_family_t(AF_INET) + addr.sin_addr.s_addr = inet_addr(ipString) + return .ipv4(addr) + } +} + +private extension IPConnectionCheckResult { + static func mockWith(isLocalNetworkDenied: Bool) -> IPConnectionCheckResult { + return .init(isLocalNetworkDenied: isLocalNetworkDenied, details: .mockRandom()) + } +} From 62b76a80617cc4d7690c7a0d4265239084c70a9b Mon Sep 17 00:00:00 2001 From: Maciek Grzybowski Date: Fri, 7 Jan 2022 16:53:34 +0100 Subject: [PATCH 2/2] RUMM-1744 Instrument Kronos E2E tests with `KronosInternalMonitor` --- Datadog/E2ETests/NTP/KronosE2ETests.swift | 33 +++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/Datadog/E2ETests/NTP/KronosE2ETests.swift b/Datadog/E2ETests/NTP/KronosE2ETests.swift index 618ddb41c5..a5b93cc633 100644 --- a/Datadog/E2ETests/NTP/KronosE2ETests.swift +++ b/Datadog/E2ETests/NTP/KronosE2ETests.swift @@ -7,8 +7,11 @@ @testable import Datadog class KronosE2ETests: E2ETests { - /// The logger sending additional telemetry on Kronos execution. These logs are available in Mobile Integrations org. + /// The logger sending logs on Kronos execution. These logs are available in Mobile Integrations org. private var logger: Logger! // swiftlint:disable:this implicitly_unwrapped_optional + /// The logger sending telemetry on internal Kronos execution. These logs are available in Mobile Integrations org. + private var telemetryLogger: Logger! // swiftlint:disable:this implicitly_unwrapped_optional + private let queue = DispatchQueue(label: "kronos-monitor-queue") override func setUp() { super.setUp() @@ -16,13 +19,33 @@ class KronosE2ETests: E2ETests { .builder .set(loggerName: "kronos-e2e") .build() + telemetryLogger = Logger.builder + .set(loggerName: "kronos-e2e-internal-telemetry") + .sendNetworkInfo(true) + .build() } override func tearDown() { logger = nil + telemetryLogger = nil super.tearDown() } + /// Creates kronos monitor for checking connections to all IPs resolved from NTP pool and sending additional telemetry on their statuses. + private func createKronosMonitor() -> KronosMonitor? { + if #available(iOS 14.2, *) { + let monitor = KronosInternalMonitor( + queue: queue, + connectionMonitor: IPConnectionMonitor(queue: queue) + ) + // Here we redirect IM's logger to E2E Kronos logger (`telemetryLogger`) to send data to Mobile Integrations org, not IM's org + monitor.export(to: InternalMonitor(sdkLogger: telemetryLogger)) + return monitor + } else { + return nil + } + } + /// TODO: RUMM-1859: Add E2E tests for monitoring Kronos in nightly tests func test_kronos_clock_performs_sync_using_datadog_ntp_pool() { // E2E:wip /// The result of `KronosClock.sync()`. @@ -58,6 +81,7 @@ class KronosE2ETests: E2ETests { KronosClock.sync( from: pool, samples: numberOfSamplesForEachIP, + monitor: createKronosMonitor(), first: { date, offset in // this closure could not be called if all samples to all servers resulted with failure result.firstReceivedDate = date result.firstReceivedOffset = offset @@ -122,6 +146,7 @@ class KronosE2ETests: E2ETests { func performKronosNTPClientQuery() -> KronosNTPClientQueryResult { let testTimeout: TimeInterval = 30 + let monitor = createKronosMonitor() // Given let pool = "2.datadog.pool.ntp.org" // a pool resolved to multiple IPv4 and IPv6 addresses (e.g. 4 + 4) @@ -136,18 +161,22 @@ class KronosE2ETests: E2ETests { let completionExpectation = expectation(description: "It completes all samples for all IPs") var result = KronosNTPClientQueryResult() + monitor?.notifySyncStart(from: pool) // must be notified by hand because normally it's called from `KronosClock.sync()` + KronosNTPClient() .query( pool: pool, numberOfSamples: numberOfSamplesForEachIP, maximumServers: .max, // query all resolved IPs in the pool - to include both IPv4 and IPv6 - timeout: timeoutForEachSample + timeout: timeoutForEachSample, + monitor: monitor ) { offset, completed, total in result.receivedOffsets.append(offset) result.numberOfCompletedSamples = completed result.expectedNumberOfSamples = total if completed == total { + monitor?.notifySyncEnd(serverOffset: offset) // must be notified by hand because normally it's called from `KronosClock.sync()` completionExpectation.fulfill() } }