Skip to content

Commit

Permalink
[Metrics] Metrics in core commissioning phase (project-chip#33247)
Browse files Browse the repository at this point in the history
* [Metrics] Metrics in core commissioning phase

- Added metrics tracing in core parts of the commissionning flow:
  - PASE
  - OperationalSessionSetup
  - Different stages of commissioning

* Fixing code review comments

* Fixed a typo

* Fixed Code Review Comment

* Update src/app/OperationalSessionSetup.cpp

Co-authored-by: Boris Zbarsky <[email protected]>

* Resytler fixes

---------

Co-authored-by: Boris Zbarsky <[email protected]>
  • Loading branch information
anush-apple and bzbarsky-apple authored May 17, 2024
1 parent 320f83e commit 443bb9c
Show file tree
Hide file tree
Showing 9 changed files with 268 additions and 41 deletions.
22 changes: 22 additions & 0 deletions src/app/OperationalSessionSetup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,13 @@
#include <lib/support/logging/CHIPLogging.h>
#include <system/SystemClock.h>
#include <system/SystemLayer.h>
#include <tracing/metric_event.h>

using namespace chip::Callback;
using chip::AddressResolve::NodeLookupRequest;
using chip::AddressResolve::Resolver;
using chip::AddressResolve::ResolveResult;
using namespace chip::Tracing;

namespace chip {

Expand Down Expand Up @@ -305,9 +307,11 @@ CHIP_ERROR OperationalSessionSetup::EstablishConnection(const ReliableMessagePro
mCASEClient = mClientPool->Allocate();
ReturnErrorCodeIf(mCASEClient == nullptr, CHIP_ERROR_NO_MEMORY);

MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESession);
CHIP_ERROR err = mCASEClient->EstablishSession(mInitParams, mPeerId, mDeviceAddress, config, this);
if (err != CHIP_NO_ERROR)
{
MATTER_LOG_METRIC_END(kMetricDeviceCASESession, err);
CleanupCASEClient();
return err;
}
Expand Down Expand Up @@ -503,6 +507,10 @@ void OperationalSessionSetup::OnSessionEstablishmentError(CHIP_ERROR error, Sess
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
}

// Session failed to be established. This is when discovery is also stopped
MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, error);
MATTER_LOG_METRIC_END(kMetricDeviceCASESession, error);

DequeueConnectionCallbacks(error, stage);
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
}
Expand All @@ -521,6 +529,11 @@ void OperationalSessionSetup::OnSessionEstablished(const SessionHandle & session
VerifyOrReturn(mState == State::Connecting,
ChipLogError(Discovery, "OnSessionEstablished was called while we were not connecting"));

// Session has been established. This is when discovery is also stopped
MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, CHIP_NO_ERROR);

MATTER_LOG_METRIC_END(kMetricDeviceCASESession, CHIP_NO_ERROR);

if (!mSecureSession.Grab(session))
{
// Got an invalid session, just dispatch an error. We have to do this
Expand Down Expand Up @@ -591,6 +604,7 @@ CHIP_ERROR OperationalSessionSetup::LookupPeerAddress()
{
--mResolveAttemptsAllowed;
}
MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone);
#endif // CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES

// NOTE: This is public API that can be used to update our stored peer
Expand All @@ -605,6 +619,10 @@ CHIP_ERROR OperationalSessionSetup::LookupPeerAddress()
return CHIP_NO_ERROR;
}

// This code can be reached multiple times, if we discover multiple addresses or do retries.
// The metric backend can handle this and always picks the earliest occurrence as the start of the event.
MATTER_LOG_METRIC_BEGIN(kMetricDeviceOperationalDiscovery);

auto const * fabricInfo = mInitParams.fabricTable->FindFabricWithIndex(mPeerId.GetFabricIndex());
VerifyOrReturnError(fabricInfo != nullptr, CHIP_ERROR_INVALID_FABRIC_INDEX);

Expand Down Expand Up @@ -676,6 +694,8 @@ void OperationalSessionSetup::OnNodeAddressResolutionFailed(const PeerId & peerI
--mAttemptsDone;
}

MATTER_LOG_METRIC(kMetricDeviceOperationalDiscoveryAttemptCount, mAttemptsDone);

CHIP_ERROR err = LookupPeerAddress();
if (err == CHIP_NO_ERROR)
{
Expand All @@ -690,6 +710,8 @@ void OperationalSessionSetup::OnNodeAddressResolutionFailed(const PeerId & peerI
}
#endif

MATTER_LOG_METRIC_END(kMetricDeviceOperationalDiscovery, reason);

// No need to modify any variables in `this` since call below releases `this`.
DequeueConnectionCallbacks(reason);
// Do not touch `this` instance anymore; it has been destroyed in DequeueConnectionCallbacks.
Expand Down
40 changes: 34 additions & 6 deletions src/controller/CHIPDeviceController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
#include <protocols/secure_channel/MessageCounterManager.h>
#include <setup_payload/QRCodeSetupPayloadParser.h>
#include <tracing/macros.h>
#include <tracing/metric_event.h>

#if CONFIG_NETWORK_LAYER_BLE
#include <ble/Ble.h>
Expand All @@ -79,6 +80,7 @@ using namespace chip::Transport;
using namespace chip::Credentials;
using namespace chip::app::Clusters;
using namespace chip::Crypto;
using namespace chip::Tracing;

namespace chip {
namespace Controller {
Expand Down Expand Up @@ -666,16 +668,20 @@ CHIP_ERROR DeviceCommissioner::PairDevice(NodeId remoteDeviceId, const char * se
CHIP_ERROR DeviceCommissioner::PairDevice(NodeId remoteDeviceId, RendezvousParameters & params)
{
MATTER_TRACE_SCOPE("PairDevice", "DeviceCommissioner");
ReturnErrorOnFailure(EstablishPASEConnection(remoteDeviceId, params));
return Commission(remoteDeviceId);
ReturnErrorOnFailureWithMetric(kMetricDeviceCommissionerCommission, EstablishPASEConnection(remoteDeviceId, params));
auto errorCode = Commission(remoteDeviceId);
VerifyOrDoWithMetric(kMetricDeviceCommissionerCommission, CHIP_NO_ERROR == errorCode, errorCode);
return errorCode;
}

CHIP_ERROR DeviceCommissioner::PairDevice(NodeId remoteDeviceId, RendezvousParameters & rendezvousParams,
CommissioningParameters & commissioningParams)
{
MATTER_TRACE_SCOPE("PairDevice", "DeviceCommissioner");
ReturnErrorOnFailure(EstablishPASEConnection(remoteDeviceId, rendezvousParams));
return Commission(remoteDeviceId, commissioningParams);
ReturnErrorOnFailureWithMetric(kMetricDeviceCommissionerCommission, EstablishPASEConnection(remoteDeviceId, rendezvousParams));
auto errorCode = Commission(remoteDeviceId, commissioningParams);
VerifyOrDoWithMetric(kMetricDeviceCommissionerCommission, CHIP_NO_ERROR == errorCode, errorCode);
return errorCode;
}

CHIP_ERROR DeviceCommissioner::EstablishPASEConnection(NodeId remoteDeviceId, const char * setUpCode, DiscoveryType discoveryType,
Expand All @@ -689,6 +695,8 @@ CHIP_ERROR DeviceCommissioner::EstablishPASEConnection(NodeId remoteDeviceId, co
CHIP_ERROR DeviceCommissioner::EstablishPASEConnection(NodeId remoteDeviceId, RendezvousParameters & params)
{
MATTER_TRACE_SCOPE("EstablishPASEConnection", "DeviceCommissioner");
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCommissionerPASESession);

CHIP_ERROR err = CHIP_NO_ERROR;
CommissioneeDeviceProxy * device = nullptr;
CommissioneeDeviceProxy * current = nullptr;
Expand Down Expand Up @@ -736,6 +744,7 @@ CHIP_ERROR DeviceCommissioner::EstablishPASEConnection(NodeId remoteDeviceId, Re
// We already have an open secure session to this device, call the callback immediately and early return.
mPairingDelegate->OnPairingComplete(CHIP_NO_ERROR);
}
MATTER_LOG_METRIC_END(kMetricDeviceCommissionerPASESession, CHIP_NO_ERROR);
return CHIP_NO_ERROR;
}
if (current->IsSessionSetupInProgress())
Expand Down Expand Up @@ -818,6 +827,7 @@ CHIP_ERROR DeviceCommissioner::EstablishPASEConnection(NodeId remoteDeviceId, Re
{
ReleaseCommissioneeDevice(device);
}
MATTER_LOG_METRIC_END(kMetricDeviceCommissionerPASESession, err);
}

return err;
Expand Down Expand Up @@ -869,8 +879,10 @@ CHIP_ERROR DeviceCommissioner::Commission(NodeId remoteDeviceId, CommissioningPa
ChipLogError(Controller, "No default commissioner is specified");
return CHIP_ERROR_INCORRECT_STATE;
}
ReturnErrorOnFailure(mDefaultCommissioner->SetCommissioningParameters(params));
return Commission(remoteDeviceId);
ReturnErrorOnFailureWithMetric(kMetricDeviceCommissionerCommission, mDefaultCommissioner->SetCommissioningParameters(params));
auto errorCode = Commission(remoteDeviceId);
VerifyOrDoWithMetric(kMetricDeviceCommissionerCommission, CHIP_NO_ERROR == errorCode, errorCode);
return errorCode;
}

CHIP_ERROR DeviceCommissioner::Commission(NodeId remoteDeviceId)
Expand Down Expand Up @@ -908,6 +920,7 @@ CHIP_ERROR DeviceCommissioner::Commission(NodeId remoteDeviceId)
mDefaultCommissioner->SetOperationalCredentialsDelegate(mOperationalCredentialsDelegate);
if (device->IsSecureConnected())
{
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCommissionerCommission);
mDefaultCommissioner->StartCommissioning(this, device);
}
else
Expand Down Expand Up @@ -1058,6 +1071,8 @@ void DeviceCommissioner::RendezvousCleanup(CHIP_ERROR status)

void DeviceCommissioner::OnSessionEstablishmentError(CHIP_ERROR err)
{
MATTER_LOG_METRIC_END(kMetricDeviceCommissionerPASESession, err);

if (mPairingDelegate != nullptr)
{
mPairingDelegate->OnStatusUpdate(DevicePairingDelegate::SecurePairingFailed);
Expand Down Expand Up @@ -1086,6 +1101,7 @@ void DeviceCommissioner::OnSessionEstablished(const SessionHandle & session)

ChipLogDetail(Controller, "Remote device completed SPAKE2+ handshake");

MATTER_LOG_METRIC_END(kMetricDeviceCommissionerPASESession, CHIP_NO_ERROR);
if (mPairingDelegate != nullptr)
{
mPairingDelegate->OnPairingComplete(CHIP_NO_ERROR);
Expand All @@ -1094,6 +1110,7 @@ void DeviceCommissioner::OnSessionEstablished(const SessionHandle & session)
if (mRunCommissioningAfterConnection)
{
mRunCommissioningAfterConnection = false;
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCommissionerCommission);
mDefaultCommissioner->StartCommissioning(this, device);
}
}
Expand Down Expand Up @@ -1916,6 +1933,8 @@ void DeviceCommissioner::CleanupDoneAfterError()

void DeviceCommissioner::SendCommissioningCompleteCallbacks(NodeId nodeId, const CompletionStatus & completionStatus)
{
MATTER_LOG_METRIC_END(kMetricDeviceCommissionerCommission, completionStatus.err);

ChipLogProgress(Controller, "Commissioning complete for node ID 0x" ChipLogFormatX64 ": %s", ChipLogValueX64(nodeId),
(completionStatus.err == CHIP_NO_ERROR ? "success" : completionStatus.err.AsString()));
mCommissioningStage = CommissioningStage::kSecurePairing;
Expand All @@ -1924,6 +1943,7 @@ void DeviceCommissioner::SendCommissioningCompleteCallbacks(NodeId nodeId, const
{
return;
}

mPairingDelegate->OnCommissioningComplete(nodeId, completionStatus.err);
PeerId peerId(GetCompressedFabricId(), nodeId);
if (completionStatus.err == CHIP_NO_ERROR)
Expand All @@ -1943,6 +1963,7 @@ void DeviceCommissioner::CommissioningStageComplete(CHIP_ERROR err, Commissionin
{
// Once this stage is complete, reset mDeviceBeingCommissioned - this will be reset when the delegate calls the next step.
MATTER_TRACE_SCOPE("CommissioningStageComplete", "DeviceCommissioner");
MATTER_LOG_METRIC_END(MetricKeyForCommissioningStage(mCommissioningStage), err);
VerifyOrDie(mDeviceBeingCommissioned);

NodeId nodeId = mDeviceBeingCommissioned->GetDeviceId();
Expand Down Expand Up @@ -1978,6 +1999,7 @@ void DeviceCommissioner::OnDeviceConnectedFn(void * context, Messaging::Exchange
const SessionHandle & sessionHandle)
{
// CASE session established.
MATTER_LOG_METRIC_END(kMetricDeviceCommissioningOperationalSetup, CHIP_NO_ERROR);
DeviceCommissioner * commissioner = static_cast<DeviceCommissioner *>(context);
VerifyOrDie(commissioner->mCommissioningStage == CommissioningStage::kFindOperationalForStayActive ||
commissioner->mCommissioningStage == CommissioningStage::kFindOperationalForCommissioningComplete);
Expand All @@ -1992,6 +2014,7 @@ void DeviceCommissioner::OnDeviceConnectedFn(void * context, Messaging::Exchange
void DeviceCommissioner::OnDeviceConnectionFailureFn(void * context, const ScopedNodeId & peerId, CHIP_ERROR error)
{
// CASE session establishment failed.
MATTER_LOG_METRIC_END(kMetricDeviceCommissioningOperationalSetup, error);
DeviceCommissioner * commissioner = static_cast<DeviceCommissioner *>(context);
VerifyOrDie(commissioner->mCommissioningStage == CommissioningStage::kFindOperationalForStayActive ||
commissioner->mCommissioningStage == CommissioningStage::kFindOperationalForCommissioningComplete);
Expand Down Expand Up @@ -2748,7 +2771,11 @@ void DeviceCommissioner::SendCommissioningReadRequest(DeviceProxy * proxy, Optio
void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, CommissioningStage step, CommissioningParameters & params,
CommissioningDelegate * delegate, EndpointId endpoint,
Optional<System::Clock::Timeout> timeout)

{
MATTER_LOG_METRIC(kMetricDeviceCommissionerCommissionStage, step);
MATTER_LOG_METRIC_BEGIN(MetricKeyForCommissioningStage(step));

if (params.GetCompletionStatus().err == CHIP_NO_ERROR)
{
ChipLogProgress(Controller, "Performing next commissioning step '%s'", StageToString(step));
Expand Down Expand Up @@ -3385,6 +3412,7 @@ void DeviceCommissioner::PerformCommissioningStep(DeviceProxy * proxy, Commissio
case CommissioningStage::kFindOperationalForCommissioningComplete: {
// If there is an error, CommissioningStageComplete will be called from OnDeviceConnectionFailureFn.
auto scopedPeerId = GetPeerScopedId(proxy->GetDeviceId());
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCommissioningOperationalSetup);
mSystemState->CASESessionMgr()->FindOrEstablishSession(scopedPeerId, &mOnDeviceConnectedCallback,
&mOnDeviceConnectionFailureCallback
#if CHIP_DEVICE_CONFIG_ENABLE_AUTOMATIC_CASE_RETRIES
Expand Down
119 changes: 119 additions & 0 deletions src/controller/CommissioningDelegate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,5 +141,124 @@ const char * StageToString(CommissioningStage stage)
}
}

#if MATTER_TRACING_ENABLED
const char * MetricKeyForCommissioningStage(CommissioningStage stage)
{
switch (stage)
{
case kError:
return "core_commissioning_stage_error";

case kSecurePairing:
return "core_commissioning_stage_secure_pairing";

case kReadCommissioningInfo:
return "core_commissioning_stage_read_commissioning_info";

case kReadCommissioningInfo2:
return "core_commissioning_stage_read_commissioning_info2";

case kArmFailsafe:
return "core_commissioning_stage_arm_failsafe";

case kScanNetworks:
return "core_commissioning_stage_scan_networks";

case kConfigRegulatory:
return "core_commissioning_stage_config_regulatory";

case kConfigureUTCTime:
return "core_commissioning_stage_configure_utc_time";

case kConfigureTimeZone:
return "core_commissioning_stage_configure_timezone";

case kConfigureDSTOffset:
return "core_commissioning_stage_configure_dst_offset";

case kConfigureDefaultNTP:
return "core_commissioning_stage_configure_default_ntp";

case kSendPAICertificateRequest:
return "core_commissioning_stage_send_pai_certificate_request";

case kSendDACCertificateRequest:
return "core_commissioning_stage_send_dac_certificate_request";

case kSendAttestationRequest:
return "core_commissioning_stage_send_attestation_request";

case kAttestationVerification:
return "core_commissioning_stage_attestation_verification";

case kSendOpCertSigningRequest:
return "core_commissioning_stage_opcert_signing_request";

case kValidateCSR:
return "core_commissioning_stage_validate_csr";

case kGenerateNOCChain:
return "core_commissioning_stage_generate_noc_chain";

case kSendTrustedRootCert:
return "core_commissioning_stage_send_trusted_root_cert";

case kSendNOC:
return "core_commissioning_stage_send_noc";

case kConfigureTrustedTimeSource:
return "core_commissioning_stage_configure_trusted_time_source";

case kICDGetRegistrationInfo:
return "core_commissioning_stage_icd_get_registration_info";

case kICDRegistration:
return "core_commissioning_stage_icd_registration";

case kWiFiNetworkSetup:
return "core_commissioning_stage_wifi_network_setup";

case kThreadNetworkSetup:
return "core_commissioning_stage_thread_network_setup";

case kFailsafeBeforeWiFiEnable:
return "core_commissioning_stage_failsafe_before_wifi_enable";

case kFailsafeBeforeThreadEnable:
return "core_commissioning_stage_failsafe_before_thread_enable";

case kWiFiNetworkEnable:
return "core_commissioning_stage_wifi_network_enable";

case kThreadNetworkEnable:
return "core_commissioning_stage_thread_network_enable";

case kEvictPreviousCaseSessions:
return "core_commissioning_stage_evict_previous_case_sessions";

case kFindOperationalForStayActive:
return "core_commissioning_stage_find_operational_for_stay_active";

case kFindOperationalForCommissioningComplete:
return "core_commissioning_stage_find_operational_for_commissioning_complete";

case kICDSendStayActive:
return "core_commissioning_stage_icd_send_stay_active";

case kSendComplete:
return "core_commissioning_stage_send_complete";

case kCleanup:
return "core_commissioning_stage_cleanup";

case kNeedsNetworkCreds:
return "core_commissioning_stage_need_network_creds";

default:
return "core_commissioning_stage_unknown";
}
}
#endif

} // namespace Controller
} // namespace chip
Loading

0 comments on commit 443bb9c

Please sign in to comment.