Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: cache cancellations to prevent race conditions in pod creation #230

Merged
merged 1 commit into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/remote-controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
- name: Setup correct Go version
uses: actions/setup-go@v2
with:
go-version: '1.17'
go-version: '1.20'
- name: Install kubebuilder
run: |
#kubebuilder
Expand Down Expand Up @@ -122,7 +122,6 @@ jobs:
run: |
export PATH=$PATH:/usr/local/kubebuilder/bin
export PATH=$PATH:/usr/local/go/bin
export GOPATH=$HOME/go
export OVERRIDE_BUILD_DEPLOY_DIND_IMAGE="${{matrix.lagoon_build_image}}"
export HARBOR_URL="http://harbor.$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[0].address}').nip.io:32080"
export HARBOR_API="http://harbor.$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[0].address}').nip.io:32080/api"
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ ifeq (, $(shell which controller-gen))
CONTROLLER_GEN_TMP_DIR=$$(mktemp -d) ;\
cd $$CONTROLLER_GEN_TMP_DIR ;\
go mod init tmp ;\
go get sigs.k8s.io/controller-tools/cmd/[email protected] ;\
go install sigs.k8s.io/controller-tools/cmd/[email protected] ;\
rm -rf $$CONTROLLER_GEN_TMP_DIR ;\
}
CONTROLLER_GEN=$(GOBIN)/controller-gen
Expand Down
6 changes: 3 additions & 3 deletions controllers/v1beta1/build_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,8 @@ func (r *LagoonBuildReconciler) createNamespaceBuild(ctx context.Context,

// getOrCreateBuildResource will deepcopy the lagoon build into a new resource and push it to the new namespace
// then clean up the old one.
func (r *LagoonBuildReconciler) getOrCreateBuildResource(ctx context.Context, build *lagoonv1beta1.LagoonBuild, ns string) error {
newBuild := build.DeepCopy()
func (r *LagoonBuildReconciler) getOrCreateBuildResource(ctx context.Context, lagoonBuild *lagoonv1beta1.LagoonBuild, ns string) error {
newBuild := lagoonBuild.DeepCopy()
shreddedbacon marked this conversation as resolved.
Show resolved Hide resolved
newBuild.SetNamespace(ns)
newBuild.SetResourceVersion("")
newBuild.SetLabels(
Expand All @@ -334,7 +334,7 @@ func (r *LagoonBuildReconciler) getOrCreateBuildResource(ctx context.Context, bu
return err
}
}
err = r.Delete(ctx, build)
err = r.Delete(ctx, lagoonBuild)
if err != nil {
return err
}
Expand Down
18 changes: 13 additions & 5 deletions controllers/v1beta1/podmonitor_buildhandlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,20 @@ func (r *LagoonMonitorReconciler) handleBuildMonitor(ctx context.Context,
if err != nil {
return err
}
cancel := false
if cancelBuild, ok := jobPod.ObjectMeta.Labels["lagoon.sh/cancelBuild"]; ok {
cancel, _ := strconv.ParseBool(cancelBuild)
if cancel {
opLog.Info(fmt.Sprintf("Attempting to cancel build %s", lagoonBuild.ObjectMeta.Name))
return r.updateDeploymentWithLogs(ctx, req, lagoonBuild, jobPod, nil, cancel)
}
cancel, _ = strconv.ParseBool(cancelBuild)
}
_, ok := r.Cache.Get(lagoonBuild.ObjectMeta.Name)
if ok {
opLog.Info(fmt.Sprintf("Cached cancellation exists for: %s", lagoonBuild.ObjectMeta.Name))
// this object exists in the cache meaning the task has been cancelled, set cancel to true and remove from cache
r.Cache.Remove(lagoonBuild.ObjectMeta.Name)
cancel = true
}
if cancel {
opLog.Info(fmt.Sprintf("Attempting to cancel build %s", lagoonBuild.ObjectMeta.Name))
return r.updateDeploymentWithLogs(ctx, req, lagoonBuild, jobPod, nil, cancel)
}
// check if the build pod is in pending, a container in the pod could be failed in this state
if jobPod.Status.Phase == corev1.PodPending {
Expand Down
2 changes: 2 additions & 0 deletions controllers/v1beta1/podmonitor_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"strconv"

"github.com/go-logr/logr"
"github.com/hashicorp/golang-lru/v2/expirable"
lagoonv1beta1 "github.com/uselagoon/remote-controller/apis/lagoon/v1beta1"
"github.com/uselagoon/remote-controller/internal/helpers"
"github.com/uselagoon/remote-controller/internal/messenger"
Expand Down Expand Up @@ -50,6 +51,7 @@ type LagoonMonitorReconciler struct {
LagoonTargetName string
LFFQoSEnabled bool
BuildQoS BuildQoS
Cache *expirable.LRU[string, string]
}

// slice of the different failure states of pods that we care about
Expand Down
17 changes: 13 additions & 4 deletions controllers/v1beta1/podmonitor_taskhandlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,20 @@ func (r *LagoonMonitorReconciler) handleTaskMonitor(ctx context.Context, opLog l
if err != nil {
return err
}
cancel := false
if cancelTask, ok := jobPod.ObjectMeta.Labels["lagoon.sh/cancelTask"]; ok {
cancel, _ := strconv.ParseBool(cancelTask)
if cancel {
return r.updateTaskWithLogs(ctx, req, lagoonTask, jobPod, nil, cancel)
}
cancel, _ = strconv.ParseBool(cancelTask)
}
_, ok := r.Cache.Get(lagoonTask.ObjectMeta.Name)
if ok {
opLog.Info(fmt.Sprintf("Cached cancellation exists for: %s", lagoonTask.ObjectMeta.Name))
// this object exists in the cache meaning the task has been cancelled, set cancel to true and remove from cache
r.Cache.Remove(lagoonTask.ObjectMeta.Name)
cancel = true
}
if cancel {
opLog.Info(fmt.Sprintf("Attempting to cancel task %s", lagoonTask.ObjectMeta.Name))
return r.updateTaskWithLogs(ctx, req, lagoonTask, jobPod, nil, cancel)
}
if jobPod.Status.Phase == corev1.PodPending {
for _, container := range jobPod.Status.ContainerStatuses {
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/go-logr/logr v1.2.4
github.com/google/go-cmp v0.5.9
github.com/hashicorp/go-version v1.6.0
github.com/hashicorp/golang-lru/v2 v2.0.6
github.com/k8up-io/k8up/v2 v2.7.1
github.com/mittwald/goharbor-client/v3 v3.3.0
github.com/mittwald/goharbor-client/v5 v5.3.1
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,8 @@ github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/hashicorp/golang-lru/v2 v2.0.6 h1:3xi/Cafd1NaoEnS/yDssIiuVeDVywU0QdFGl3aQaQHM=
github.com/hashicorp/golang-lru/v2 v2.0.6/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64=
github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ=
Expand Down
2 changes: 2 additions & 0 deletions internal/messenger/consumer.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ func (m *Messenger) Consumer(targetName string) { //error {
namespace,
),
)
m.Cache.Add(jobSpec.Misc.Name, jobSpec.Project.Name)
err := m.CancelBuild(namespace, jobSpec)
if err != nil {
//@TODO: send msg back to lagoon and update task to failed?
Expand All @@ -327,6 +328,7 @@ func (m *Messenger) Consumer(targetName string) { //error {
namespace,
),
)
m.Cache.Add(jobSpec.Task.TaskName, jobSpec.Project.Name)
err := m.CancelTask(namespace, jobSpec)
if err != nil {
//@TODO: send msg back to lagoon and update task to failed?
Expand Down
4 changes: 4 additions & 0 deletions internal/messenger/messenger.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package messenger

import (
"github.com/cheshir/go-mq/v2"
"github.com/hashicorp/golang-lru/v2/expirable"
"github.com/uselagoon/remote-controller/internal/utilities/deletions"
"sigs.k8s.io/controller-runtime/pkg/client"
)
Expand Down Expand Up @@ -35,6 +36,7 @@ type Messenger struct {
AdvancedTaskDeployTokenInjection bool
DeletionHandler *deletions.Deletions
EnableDebug bool
Cache *expirable.LRU[string, string]
}

// New returns a messaging with config and controller-runtime client.
Expand All @@ -49,6 +51,7 @@ func New(config mq.Config,
advancedTaskDeployTokenInjection bool,
deletionHandler *deletions.Deletions,
enableDebug bool,
cache *expirable.LRU[string, string],
) *Messenger {
return &Messenger{
Config: config,
Expand All @@ -62,5 +65,6 @@ func New(config mq.Config,
AdvancedTaskDeployTokenInjection: advancedTaskDeployTokenInjection,
DeletionHandler: deletionHandler,
EnableDebug: enableDebug,
Cache: cache,
}
}
7 changes: 7 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import (

cron "gopkg.in/robfig/cron.v2"

"github.com/hashicorp/golang-lru/v2/expirable"
k8upv1 "github.com/k8up-io/k8up/v2/api/v1"
lagoonv1beta1 "github.com/uselagoon/remote-controller/apis/lagoon/v1beta1"
lagoonv1beta1ctrl "github.com/uselagoon/remote-controller/controllers/v1beta1"
Expand Down Expand Up @@ -470,6 +471,9 @@ func main() {
os.Exit(1)
}

// create the cache
cache := expirable.NewLRU[string, string](1000, nil, time.Minute*60)

config := mq.Config{
ReconnectDelay: time.Duration(rabbitRetryInterval) * time.Second,
Exchanges: mq.Exchanges{
Expand Down Expand Up @@ -640,10 +644,12 @@ func main() {
advancedTaskDeployToken,
deletion,
enableDebug,
cache,
)

c := cron.New()
// if we are running with MQ support, then start the consumer handler

if enableMQ {
setupLog.Info("starting messaging handler")
go messaging.Consumer(lagoonTargetName)
Expand Down Expand Up @@ -812,6 +818,7 @@ func main() {
LagoonTargetName: lagoonTargetName,
LFFQoSEnabled: lffQoSEnabled,
BuildQoS: buildQoSConfig,
Cache: cache,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "LagoonMonitor")
os.Exit(1)
Expand Down
Loading