Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

health, grpc: Deliver health service updates through the health listener #7900

Merged
merged 5 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 68 additions & 4 deletions balancer_wrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,10 +277,17 @@ type healthData struct {
// to the LB policy. This is stored to avoid sending updates when the
// SubConn has already exited connectivity state READY.
connectivityState connectivity.State
// closeHealthProducer stores function to close the ref counted health
// producer. The health producer is automatically closed when the SubConn
// state changes.
easwars marked this conversation as resolved.
Show resolved Hide resolved
closeHealthProducer func()
}

func newHealthData(s connectivity.State) *healthData {
return &healthData{connectivityState: s}
return &healthData{
connectivityState: s,
closeHealthProducer: func() {},
}
}

// updateState is invoked by grpc to push a subConn state update to the
Expand Down Expand Up @@ -413,6 +420,20 @@ func (acbw *acBalancerWrapper) closeProducers() {
}
}

// HealthCheckOptions are the options to configure the health check producer.
//
// # Experimental
//
// Notice: This type is EXPERIMENTAL and may be changed or removed in a
// later release.
type HealthCheckOptions struct {
// ServiceName of the gRPC service running on the server for reporting health state.
easwars marked this conversation as resolved.
Show resolved Hide resolved
ServiceName string
// Listener is called when the health update is received from the health
easwars marked this conversation as resolved.
Show resolved Hide resolved
// service running on the server.
Listener func(balancer.SubConnState)
}

// RegisterHealthListener accepts a health listener from the LB policy. It sends
// updates to the health listener as long as the SubConn's connectivity state
// doesn't change and a new health listener is not registered. To invalidate
Expand All @@ -421,6 +442,7 @@ func (acbw *acBalancerWrapper) closeProducers() {
func (acbw *acBalancerWrapper) RegisterHealthListener(listener func(balancer.SubConnState)) {
acbw.healthMu.Lock()
defer acbw.healthMu.Unlock()
acbw.healthData.closeHealthProducer()
// listeners should not be registered when the connectivity state
// isn't Ready. This may happen when the balancer registers a listener
// after the connectivityState is updated, but before it is notified
Expand All @@ -436,17 +458,59 @@ func (acbw *acBalancerWrapper) RegisterHealthListener(listener func(balancer.Sub
return
}

// Client side health checking is enabled when all the following
// conditions are satisfied:
// 1. Health checking is not disabled using the dial option.
// 2. The health package is imported.
// 3. The health check config is present in the service config.
healthCheckEnabled := !acbw.ccb.cc.dopts.disableHealthCheck
regHealthLisFn := internal.RegisterClientHealthCheckListener
if regHealthLisFn == nil {
// The health package is not imported.
healthCheckEnabled = false
}
var cfg *healthCheckConfig
if healthCheckEnabled {
// Avoid acquiring cc.mu unless necessary.
cfg = acbw.ac.cc.healthCheckConfig()
healthCheckEnabled = cfg != nil
}
easwars marked this conversation as resolved.
Show resolved Hide resolved

acbw.ccb.serializer.TrySchedule(func(ctx context.Context) {
if ctx.Err() != nil || acbw.ccb.balancer == nil {
return
}
// Don't send updates if a new listener is registered.
acbw.healthMu.Lock()
defer acbw.healthMu.Unlock()
curHD := acbw.healthData
if curHD != hd {
if acbw.healthData != hd {
return
}
if !healthCheckEnabled {
listener(balancer.SubConnState{ConnectivityState: connectivity.Ready})
easwars marked this conversation as resolved.
Show resolved Hide resolved
return
}
listener(balancer.SubConnState{ConnectivityState: connectivity.Ready})
// Serialize the health updates from the health producer with
// other calls into the LB policy.
listenerWrapper := func(scs balancer.SubConnState) {
acbw.ccb.serializer.TrySchedule(func(ctx context.Context) {
if ctx.Err() != nil || acbw.ccb.balancer == nil {
return
}
acbw.healthMu.Lock()
defer acbw.healthMu.Unlock()
if acbw.healthData != hd {
return
}
listener(scs)
})
}

healthOpts := HealthCheckOptions{
ServiceName: cfg.ServiceName,
Listener: listenerWrapper,
}
fn := regHealthLisFn.(func(context.Context, balancer.SubConn, HealthCheckOptions) func())
hd.closeHealthProducer = fn(ctx, acbw, healthOpts)
})
}
116 changes: 116 additions & 0 deletions health/producer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
*
* Copyright 2024 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/

package health

import (
"context"
"sync"

"google.golang.org/grpc"
"google.golang.org/grpc/balancer"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/connectivity"
"google.golang.org/grpc/internal"
"google.golang.org/grpc/internal/grpcsync"
"google.golang.org/grpc/status"
)

func init() {
producerBuilderSingleton = &producerBuilder{}
internal.RegisterClientHealthCheckListener = registerClientSideHealthCheckListener
}

type producerBuilder struct{}

var producerBuilderSingleton *producerBuilder

// Build constructs and returns a producer and its cleanup function.
func (*producerBuilder) Build(cci any) (balancer.Producer, func()) {
doneCh := make(chan struct{})
p := &healthServiceProducer{
cc: cci.(grpc.ClientConnInterface),
cancelDone: doneCh,
cancel: grpcsync.OnceFunc(func() {
close(doneCh)
}),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider a grpcsync.Event instead of having two fields here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was using the two fields, one to trigger the cancellation of the context that is passed to the health client and the second to wait till the health check client to return. Waiting for the client close isn't required. The health checking in addrConn also cancels the context and lets the client shut down asynchronously. I got rid of the channel to wait for the client to return here.

grpc-go/clientconn.go

Lines 1342 to 1354 in c1b6b37

onClose := func(r transport.GoAwayReason) {
ac.mu.Lock()
defer ac.mu.Unlock()
// adjust params based on GoAwayReason
ac.adjustParams(r)
if ctx.Err() != nil {
// Already shut down or connection attempt canceled. tearDown() or
// updateAddrs() already cleared the transport and canceled hctx
// via ac.ctx, and we expected this connection to be closed, so do
// nothing here.
return
}
hcancel()

}
return p, func() {
p.mu.Lock()
defer p.mu.Unlock()
p.cancel()
<-p.cancelDone
}
}

type healthServiceProducer struct {
// The following fields are initialized at build time and read-only after
// that and therefore do not need to be guarded by a mutex.
cc grpc.ClientConnInterface

mu sync.Mutex
cancel func()
cancelDone chan (struct{})
}

// registerClientSideHealthCheckListener accepts a listener to provide server
// health state via the health service.
func registerClientSideHealthCheckListener(ctx context.Context, sc balancer.SubConn, opts grpc.HealthCheckOptions) func() {
pr, closeFn := sc.GetOrBuildProducer(producerBuilderSingleton)
p := pr.(*healthServiceProducer)
p.mu.Lock()
defer p.mu.Unlock()
p.cancel()
<-p.cancelDone
if opts.Listener == nil {
return closeFn
}

Check warning on line 82 in health/producer.go

View check run for this annotation

Codecov / codecov/patch

health/producer.go#L81-L82

Added lines #L81 - L82 were not covered by tests

p.cancelDone = make(chan struct{})
ctx, cancel := context.WithCancel(ctx)
p.cancel = cancel

go p.startHealthCheck(ctx, sc, opts, p.cancelDone)
return closeFn
}

func (p *healthServiceProducer) startHealthCheck(ctx context.Context, sc balancer.SubConn, opts grpc.HealthCheckOptions, closeCh chan struct{}) {
defer close(closeCh)
newStream := func(method string) (any, error) {
return p.cc.NewStream(ctx, &grpc.StreamDesc{ServerStreams: true}, method)
}

setConnectivityState := func(state connectivity.State, err error) {
opts.Listener(balancer.SubConnState{
ConnectivityState: state,
ConnectionError: err,
})
}

// Call the function through the internal variable as tests use it for
// mocking.
err := internal.HealthCheckFunc(ctx, newStream, setConnectivityState, opts.ServiceName)
if err == nil {
return
}
if status.Code(err) == codes.Unimplemented {
logger.Errorf("Subchannel health check is unimplemented at server side, thus health check is disabled for SubConn %p", sc)
} else {
logger.Errorf("Health checking failed for SubConn %p: %v", sc, err)
}

Check warning on line 115 in health/producer.go

View check run for this annotation

Codecov / codecov/patch

health/producer.go#L114-L115

Added lines #L114 - L115 were not covered by tests
}
4 changes: 4 additions & 0 deletions internal/internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ import (
var (
// HealthCheckFunc is used to provide client-side LB channel health checking
HealthCheckFunc HealthChecker
// RegisterClientHealthCheckListener is used to provide a listener for
// updates from the client-side health checking service. It returns a
// function that can be called to stop the health producer.
RegisterClientHealthCheckListener any // func(context.Context, balancer.SubConn, grpc.HealthCheckOptions) func()
// BalancerUnregister is exported by package balancer to unregister a balancer.
BalancerUnregister func(name string)
// KeepaliveMinPingTime is the minimum ping interval. This must be 10s by
Expand Down
Loading
Loading