Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

health, grpc: Deliver health service updates through the health listener #7900

Merged
merged 5 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion balancer/pickfirst/pickfirstleaf/pickfirstleaf_ext_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1495,7 +1495,7 @@ func subConnAddresses(ctx context.Context, cc *testutils.BalancerClientConn, sub
return addresses, nil
}

// stateStoringBalancer stores the state of the SubConns being created.
// stateStoringBalancer stores the state of the subconns being created.
type stateStoringBalancer struct {
balancer.Balancer
mu sync.Mutex
Expand Down
61 changes: 58 additions & 3 deletions balancer_wrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,19 @@ func (acbw *acBalancerWrapper) closeProducers() {
}
}

// HealthCheckOptions are the options to configure the health check producer.
//
// # Experimental
//
// Notice: This type is EXPERIMENTAL and may be changed or removed in a
// later release.
type HealthCheckOptions struct {
// Name of the gRPC service running on the server for reporting health state.
// If the service name is empty, client side health checking will be disabled.
HealthServiceName string
easwars marked this conversation as resolved.
Show resolved Hide resolved
Listener func(balancer.SubConnState)
easwars marked this conversation as resolved.
Show resolved Hide resolved
}

// RegisterHealthListener accepts a health listener from the LB policy. It sends
// updates to the health listener as long as the SubConn's connectivity state
// doesn't change and a new health listener is not registered. To invalidate
Expand All @@ -432,8 +445,19 @@ func (acbw *acBalancerWrapper) RegisterHealthListener(listener func(balancer.Sub
// registered health listeners.
hd := newHealthData(connectivity.Ready)
acbw.healthData = hd
if listener == nil {
return

// Client side health checking is enabled when all the following
// conditions are satisfied:
// 1. The health check config is present in the service config.
// 2. Health checking is not disabled using the dial option.
// 3. The health package is imported.
regHealthLisFn := internal.RegisterClientHealthCheckListener
healthCheckEnabled := !acbw.ccb.cc.dopts.disableHealthCheck && regHealthLisFn != nil
var cfg *healthCheckConfig
if healthCheckEnabled {
// Avoid acquiring cc.mu unless necessary.
cfg = acbw.ac.cc.healthCheckConfig()
healthCheckEnabled = cfg != nil
easwars marked this conversation as resolved.
Show resolved Hide resolved
}

acbw.ccb.serializer.TrySchedule(func(ctx context.Context) {
Expand All @@ -447,6 +471,37 @@ func (acbw *acBalancerWrapper) RegisterHealthListener(listener func(balancer.Sub
if curHD != hd {
return
}
listener(balancer.SubConnState{ConnectivityState: connectivity.Ready})
if !healthCheckEnabled {
if listener != nil {
listener(balancer.SubConnState{ConnectivityState: connectivity.Ready})
}
return
}
// Serialize the health updates from the health producer with
// other calls into the LB policy.
listenerWrapper := func(scs balancer.SubConnState) {
acbw.ccb.serializer.TrySchedule(func(ctx context.Context) {
if ctx.Err() != nil || acbw.ccb.balancer == nil {
return
}
acbw.healthMu.Lock()
curHD := acbw.healthData
acbw.healthMu.Unlock()
if curHD != hd {
return
}
easwars marked this conversation as resolved.
Show resolved Hide resolved
listener(scs)
})
}

if listener == nil {
listenerWrapper = nil
}
easwars marked this conversation as resolved.
Show resolved Hide resolved

healthOpts := HealthCheckOptions{
HealthServiceName: cfg.ServiceName,
Listener: listenerWrapper,
}
regHealthLisFn.(func(context.Context, balancer.SubConn, HealthCheckOptions))(ctx, acbw, healthOpts)
easwars marked this conversation as resolved.
Show resolved Hide resolved
})
}
121 changes: 121 additions & 0 deletions health/producer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
*
* Copyright 2024 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package health

import (
"context"
"sync"

"google.golang.org/grpc"
"google.golang.org/grpc/balancer"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/internal"
"google.golang.org/grpc/internal/grpcsync"
"google.golang.org/grpc/status"
)

func init() {
producerBuilderSingleton = &producerBuilder{}
internal.RegisterClientHealthCheckListener = RegisterClientSideHealthCheckListener
}

type producerBuilder struct{}

var producerBuilderSingleton *producerBuilder

// Build constructs and returns a producer and its cleanup function.
func (*producerBuilder) Build(cci any) (balancer.Producer, func()) {
doneCh := make(chan struct{})
p := &healthServiceProducer{
cc: cci.(grpc.ClientConnInterface),
cancelDone: doneCh,
cancel: grpcsync.OnceFunc(func() {
close(doneCh)
}),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider a grpcsync.Event instead of having two fields here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was using the two fields, one to trigger the cancellation of the context that is passed to the health client and the second to wait till the health check client to return. Waiting for the client close isn't required. The health checking in addrConn also cancels the context and lets the client shut down asynchronously. I got rid of the channel to wait for the client to return here.

grpc-go/clientconn.go

Lines 1342 to 1354 in c1b6b37

onClose := func(r transport.GoAwayReason) {
ac.mu.Lock()
defer ac.mu.Unlock()
// adjust params based on GoAwayReason
ac.adjustParams(r)
if ctx.Err() != nil {
// Already shut down or connection attempt canceled. tearDown() or
// updateAddrs() already cleared the transport and canceled hctx
// via ac.ctx, and we expected this connection to be closed, so do
// nothing here.
return
}
hcancel()

}
return p, func() {
p.mu.Lock()
defer p.mu.Unlock()
p.cancel()
<-p.cancelDone
}
}

type healthServiceProducer struct {
// The following fields are initialized at build time and read-only after
// that and therefore do not need to be guarded by a mutex.
cc grpc.ClientConnInterface

mu sync.Mutex
cancel func()
cancelDone chan (struct{})
}

// RegisterClientSideHealthCheckListener accepts a listener to provide server
// health state via the health service.
//
// # Experimental
//
// Notice: This type is EXPERIMENTAL and may be changed or removed in a
// later release.
func RegisterClientSideHealthCheckListener(ctx context.Context, sc balancer.SubConn, opts grpc.HealthCheckOptions) {
easwars marked this conversation as resolved.
Show resolved Hide resolved
pr, _ := sc.GetOrBuildProducer(producerBuilderSingleton)
p := pr.(*healthServiceProducer)
p.mu.Lock()
defer p.mu.Unlock()
p.cancel()
<-p.cancelDone
if opts.Listener == nil {
return
}

p.cancelDone = make(chan struct{})
ctx, cancel := context.WithCancel(ctx)
p.cancel = cancel

go p.startHealthCheck(ctx, sc, opts, p.cancelDone)
}

func (p *healthServiceProducer) startHealthCheck(ctx context.Context, sc balancer.SubConn, opts grpc.HealthCheckOptions, closeCh chan struct{}) {
defer close(closeCh)
serviceName := opts.HealthServiceName
easwars marked this conversation as resolved.
Show resolved Hide resolved
newStream := func(method string) (any, error) {
return p.cc.NewStream(ctx, &grpc.StreamDesc{ServerStreams: true}, method)
}

setConnectivityState := func(state connectivity.State, err error) {
opts.Listener(balancer.SubConnState{
ConnectivityState: state,
ConnectionError: err,
})
}

// Call the function through the internal variable as tests use it for
// mocking.
err := internal.HealthCheckFunc(ctx, newStream, setConnectivityState, serviceName)
if err == nil {
return
}
if status.Code(err) == codes.Unimplemented {
logger.Errorf("Subchannel health check is unimplemented at server side, thus health check is disabled for SubConn %p", sc)
} else {
logger.Errorf("Health checking failed for SubConn %p: %v", sc, err)
}

Check warning on line 120 in health/producer.go

View check run for this annotation

Codecov / codecov/patch

health/producer.go#L119-L120

Added lines #L119 - L120 were not covered by tests
}
3 changes: 3 additions & 0 deletions internal/internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ import (
var (
// HealthCheckFunc is used to provide client-side LB channel health checking
HealthCheckFunc HealthChecker
// RegisterClientHealthCheckListener is used to provide a listener for
// updates from the client-side health checking service.
RegisterClientHealthCheckListener any // func(context.Context, balancer.SubConn, grpc.HealthCheckOptions)
// BalancerUnregister is exported by package balancer to unregister a balancer.
BalancerUnregister func(name string)
// KeepaliveMinPingTime is the minimum ping interval. This must be 10s by
Expand Down
Loading
Loading