diff --git a/bin/experiment/experiment.go b/bin/experiment/experiment.go index 232eceaaa..faae6377c 100755 --- a/bin/experiment/experiment.go +++ b/bin/experiment/experiment.go @@ -56,6 +56,7 @@ import ( ebsLossByTag "github.com/litmuschaos/litmus-go/experiments/kube-aws/ebs-loss-by-tag/experiment" ec2TerminateByID "github.com/litmuschaos/litmus-go/experiments/kube-aws/ec2-terminate-by-id/experiment" ec2TerminateByTag "github.com/litmuschaos/litmus-go/experiments/kube-aws/ec2-terminate-by-tag/experiment" + springBootChaos "github.com/litmuschaos/litmus-go/experiments/spring-boot/spring-boot-chaos/experiment" vmpoweroff "github.com/litmuschaos/litmus-go/experiments/vmware/vm-poweroff/experiment" "github.com/litmuschaos/litmus-go/pkg/clients" @@ -181,7 +182,8 @@ func main() { gcpVMInstanceStopByLabel.GCPVMInstanceStopByLabel(clients) case "gcp-vm-disk-loss-by-label": gcpVMDiskLossByLabel.GCPVMDiskLossByLabel(clients) - + case "spring-boot-chaos": + springBootChaos.Experiment(clients) default: log.Errorf("Unsupported -name %v, please provide the correct value of -name args", *experimentName) return diff --git a/chaoslib/litmus/network-chaos/helper/netem.go b/chaoslib/litmus/network-chaos/helper/netem.go index 1ddde01ca..83ae9aff7 100644 --- a/chaoslib/litmus/network-chaos/helper/netem.go +++ b/chaoslib/litmus/network-chaos/helper/netem.go @@ -147,6 +147,7 @@ func injectChaos(experimentDetails *experimentTypes.ExperimentDetails, pid int) for j := range uniqueIps { if ips[i] == uniqueIps[j] { isPresent = true + break } } if !isPresent { diff --git a/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go b/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go new file mode 100644 index 000000000..9b0b614a5 --- /dev/null +++ b/chaoslib/litmus/spring-boot-chaos/lib/spring-boot-chaos.go @@ -0,0 +1,396 @@ +package lib + +import ( + "bytes" + "encoding/json" + "fmt" + corev1 "k8s.io/api/core/v1" + "net/http" + "os" + "os/signal" + "strings" + "syscall" + "time" + + "github.com/litmuschaos/litmus-go/pkg/clients" + "github.com/litmuschaos/litmus-go/pkg/events" + "github.com/litmuschaos/litmus-go/pkg/log" + "github.com/litmuschaos/litmus-go/pkg/probe" + "github.com/litmuschaos/litmus-go/pkg/result" + experimentTypes "github.com/litmuschaos/litmus-go/pkg/spring-boot/spring-boot-chaos/types" + "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +var revertAssault = experimentTypes.ChaosMonkeyAssaultRevert{ + LatencyActive: false, + KillApplicationActive: false, + CPUActive: false, + MemoryActive: false, + ExceptionsActive: false, +} + +// SetTargetPodList selects the targeted pod and add them to the experimentDetails +func SetTargetPodList(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { + // Get the target pod details for the chaos execution + // if the target pod is not defined it will derive the random target pod list using pod affected percentage + var err error + + if experimentsDetails.TargetPods == "" && chaosDetails.AppDetail.Label == "" { + return errors.Errorf("please provide one of the appLabel or TARGET_PODS") + } + if experimentsDetails.TargetPodList, err = common.GetPodList(experimentsDetails.TargetPods, experimentsDetails.PodsAffectedPerc, clients, chaosDetails); err != nil { + return err + } + return nil + +} + +// PrepareChaos contains the preparation steps before chaos injection +func PrepareChaos(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, resultDetails *types.ResultDetails, eventsDetails *types.EventDetails, chaosDetails *types.ChaosDetails) error { + // Waiting for the ramp time before chaos injection + if experimentsDetails.RampTime != 0 { + log.Infof("[Ramp]: Waiting for the %vs ramp time before injecting chaos", experimentsDetails.RampTime) + common.WaitForDuration(experimentsDetails.RampTime) + } + + log.InfoWithValues("[Info]: Chaos monkeys watchers will be injected to the target pods as follows", logrus.Fields{ + "WebClient": experimentsDetails.ChaosMonkeyWatchers.WebClient, + "Service": experimentsDetails.ChaosMonkeyWatchers.Service, + "Component": experimentsDetails.ChaosMonkeyWatchers.Component, + "Repository": experimentsDetails.ChaosMonkeyWatchers.Repository, + "Controller": experimentsDetails.ChaosMonkeyWatchers.Controller, + "RestController": experimentsDetails.ChaosMonkeyWatchers.RestController, + }) + log.InfoWithValues("[Info]: Chaos monkeys assaults will be injected to the target pods as follows", logrus.Fields{ + "CPU Assault": experimentsDetails.ChaosMonkeyAssault.CPUActive, + "Memory Assault": experimentsDetails.ChaosMonkeyAssault.MemoryActive, + "Kill App Assault": experimentsDetails.ChaosMonkeyAssault.KillApplicationActive, + "Latency Assault": experimentsDetails.ChaosMonkeyAssault.LatencyActive, + "Exception Assault": experimentsDetails.ChaosMonkeyAssault.ExceptionsActive, + }) + + switch strings.ToLower(experimentsDetails.Sequence) { + case "serial": + if err := injectChaosInSerialMode(experimentsDetails, clients, chaosDetails, eventsDetails, resultDetails); err != nil { + return err + } + case "parallel": + if err := injectChaosInParallelMode(experimentsDetails, clients, chaosDetails, eventsDetails, resultDetails); err != nil { + return err + } + default: + return errors.Errorf("%v sequence is not supported", experimentsDetails.Sequence) + } + + // Waiting for the ramp time after chaos injection + if experimentsDetails.RampTime != 0 { + log.Infof("[Ramp]: Waiting for the %vs ramp time after injecting chaos", experimentsDetails.RampTime) + common.WaitForDuration(experimentsDetails.RampTime) + } + return nil +} + +// CheckChaosMonkey verifies if chaos monkey for spring boot is available in the selected pods +// All pods are checked, even if some errors occur. But in case of one pod in error, the check will be in error +func CheckChaosMonkey(chaosMonkeyPort string, chaosMonkeyPath string, targetPods corev1.PodList) (bool, error) { + hasErrors := false + + for _, pod := range targetPods.Items { + endpoint := "http://" + pod.Status.PodIP + ":" + chaosMonkeyPort + chaosMonkeyPath + log.Infof("[Check]: Checking pod: %v (endpoint: %v)", pod.Name, endpoint) + + resp, err := http.Get(endpoint) + if err != nil { + log.Errorf("failed to request chaos monkey endpoint on pod %v (err: %v)", pod.Name, resp.StatusCode) + hasErrors = true + continue + } + + if resp.StatusCode != 200 { + log.Errorf("failed to get chaos monkey endpoint on pod %v (status: %v)", pod.Name, resp.StatusCode) + hasErrors = true + } + } + + if hasErrors { + return false, errors.Errorf("failed to check chaos moonkey on at least one pod, check logs for details") + } + return true, nil +} + +// enableChaosMonkey enables chaos monkey on selected pods +func enableChaosMonkey(chaosMonkeyPort string, chaosMonkeyPath string, pod corev1.Pod) error { + log.Infof("[Chaos]: Enabling Chaos Monkey on pod: %v", pod.Name) + resp, err := http.Post("http://"+pod.Status.PodIP+":"+chaosMonkeyPort+chaosMonkeyPath+"/enable", "", nil) //nolint:bodyclose + if err != nil { + return err + } + + if resp.StatusCode != 200 { + return errors.Errorf("failed to enable chaos monkey endpoint on pod %v (status: %v)", pod.Name, resp.StatusCode) + } + + return nil +} + +func setChaosMonkeyWatchers(chaosMonkeyPort string, chaosMonkeyPath string, watchers experimentTypes.ChaosMonkeyWatchers, pod corev1.Pod) error { + log.Infof("[Chaos]: Setting Chaos Monkey watchers on pod: %v", pod.Name) + + jsonValue, err := json.Marshal(watchers) + if err != nil { + return err + } + + resp, err := http.Post("http://"+pod.Status.PodIP+":"+chaosMonkeyPort+chaosMonkeyPath+"/watchers", "application/json", bytes.NewBuffer(jsonValue)) + if err != nil { + return err + } + + if resp.StatusCode != 200 { + return errors.Errorf("failed to set assault on pod %v (status: %v)", pod.Name, resp.StatusCode) + } + + return nil +} + +func startAssault(chaosMonkeyPort string, chaosMonkeyPath string, assault experimentTypes.ChaosMonkeyAssault, pod corev1.Pod) error { + jsonValue, err := json.Marshal(assault) + if err != nil { + return err + } + if err := setChaosMonkeyAssault(chaosMonkeyPort, chaosMonkeyPath, jsonValue, pod); err != nil { + return err + } + log.Infof("[Chaos]: Activating Chaos Monkey assault on pod: %v", pod.Name) + resp, err := http.Post("http://"+pod.Status.PodIP+":"+chaosMonkeyPort+chaosMonkeyPath+"/assaults/runtime/attack", "", nil) + if err != nil { + return err + } + + if resp.StatusCode != 200 { + return errors.Errorf("failed to activate runtime attack on pod %v (status: %v)", pod.Name, resp.StatusCode) + } + return nil +} + +func setChaosMonkeyAssault(chaosMonkeyPort string, chaosMonkeyPath string, assault []byte, pod corev1.Pod) error { + log.Infof("[Chaos]: Setting Chaos Monkey assault on pod: %v", pod.Name) + + resp, err := http.Post("http://"+pod.Status.PodIP+":"+chaosMonkeyPort+chaosMonkeyPath+"/assaults", "application/json", bytes.NewBuffer(assault)) + if err != nil { + return err + } + + if resp.StatusCode != 200 { + return errors.Errorf("failed to set assault on pod %v (status: %v)", pod.Name, resp.StatusCode) + } + return nil +} + +// disableChaosMonkey disables chaos monkey on selected pods +func disableChaosMonkey(chaosMonkeyPort string, chaosMonkeyPath string, pod corev1.Pod) error { + log.Infof("[Chaos]: disabling assaults on pod %v", pod.Name) + jsonValue, err := json.Marshal(revertAssault) + if err != nil { + return err + } + if err := setChaosMonkeyAssault(chaosMonkeyPort, chaosMonkeyPath, jsonValue, pod); err != nil { + return err + } + + log.Infof("[Chaos]: disabling chaos monkey on pod %v", pod.Name) + resp, err := http.Post("http://"+pod.Status.PodIP+":"+chaosMonkeyPort+chaosMonkeyPath+"/disable", "", nil) + if err != nil { + return err + } + + if resp.StatusCode != 200 { + return errors.Errorf("failed to disable chaos monkey endpoint on pod %v (status: %v)", pod.Name, resp.StatusCode) + } + + return nil +} + +// injectChaosInSerialMode injects chaos monkey assault on pods in serial mode(one by one) +func injectChaosInSerialMode(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails, eventsDetails *types.EventDetails, resultDetails *types.ResultDetails) error { + + // run the probes during chaos + if len(resultDetails.ProbeDetails) != 0 { + if err := probe.RunProbes(chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + return err + } + } + + // signChan channel is used to transmit signal notifications. + signChan := make(chan os.Signal, 1) + // Catch and relay certain signal(s) to signChan channel. + signal.Notify(signChan, os.Interrupt, syscall.SIGTERM) + + var endTime <-chan time.Time + timeDelay := time.Duration(experimentsDetails.ChaosDuration) * time.Second + + select { + case <-signChan: + // stopping the chaos execution, if abort signal received + time.Sleep(10 * time.Second) + os.Exit(0) + default: + for _, pod := range experimentsDetails.TargetPodList.Items { + if experimentsDetails.EngineName != "" { + msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" + types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) + _ = events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + } + + log.InfoWithValues("[Chaos]: Injecting on target pod", logrus.Fields{ + "Target Pod": pod.Name, + }) + + if err := setChaosMonkeyWatchers(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, experimentsDetails.ChaosMonkeyWatchers, pod); err != nil { + log.Errorf("[Chaos]: Failed to set watchers, err: %v ", err) + return err + } + + if err := startAssault(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, experimentsDetails.ChaosMonkeyAssault, pod); err != nil { + log.Errorf("[Chaos]: Failed to set assault, err: %v ", err) + return err + } + + if err := enableChaosMonkey(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, pod); err != nil { + log.Errorf("[Chaos]: Failed to enable chaos, err: %v ", err) + return err + } + common.SetTargets(pod.Name, "injected", "pod", chaosDetails) + + log.Infof("[Chaos]: Waiting for: %vs", experimentsDetails.ChaosDuration) + + endTime = time.After(timeDelay) + loop: + for { + select { + case <-signChan: + log.Info("[Chaos]: Revert Started") + if err := disableChaosMonkey(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, pod); err != nil { + log.Errorf("Error in disabling chaos monkey, err: %v", err) + } else { + common.SetTargets(pod.Name, "reverted", "pod", chaosDetails) + } + // updating the chaosresult after stopped + failStep := "Chaos injection stopped!" + types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) + result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") + log.Info("[Chaos]: Revert Completed") + os.Exit(1) + case <-endTime: + log.Infof("[Chaos]: Time is up for experiment: %v", experimentsDetails.ExperimentName) + endTime = nil + break loop + } + } + + if err := disableChaosMonkey(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, pod); err != nil { + return fmt.Errorf("error in disabling chaos monkey, err: %v", err) + } + + common.SetTargets(pod.Name, "reverted", "pod", chaosDetails) + } + } + return nil + +} + +// injectChaosInParallelMode injects chaos monkey assault on pods in parallel mode (all at once) +func injectChaosInParallelMode(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails, eventsDetails *types.EventDetails, resultDetails *types.ResultDetails) error { + + // run the probes during chaos + if len(resultDetails.ProbeDetails) != 0 { + if err := probe.RunProbes(chaosDetails, clients, resultDetails, "DuringChaos", eventsDetails); err != nil { + return err + } + } + + // signChan channel is used to transmit signal notifications. + signChan := make(chan os.Signal, 1) + // Catch and relay certain signal(s) to signChan channel. + signal.Notify(signChan, os.Interrupt, syscall.SIGTERM) + + var endTime <-chan time.Time + timeDelay := time.Duration(experimentsDetails.ChaosDuration) * time.Second + + select { + case <-signChan: + // stopping the chaos execution, if abort signal received + time.Sleep(10 * time.Second) + os.Exit(0) + default: + for _, pod := range experimentsDetails.TargetPodList.Items { + if experimentsDetails.EngineName != "" { + msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + pod.Name + " pod" + types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) + _ = events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") + } + + log.InfoWithValues("[Chaos]: The Target application details", logrus.Fields{ + "Target Pod": pod.Name, + }) + + if err := setChaosMonkeyWatchers(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, experimentsDetails.ChaosMonkeyWatchers, pod); err != nil { + return errors.Errorf("[Chaos]: Failed to set watchers, err: %v ", err) + } + + if err := startAssault(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, experimentsDetails.ChaosMonkeyAssault, pod); err != nil { + log.Errorf("[Chaos]: Failed to set assault, err: %v ", err) + return err + } + + if err := enableChaosMonkey(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, pod); err != nil { + log.Errorf("[Chaos]: Failed to enable chaos, err: %v ", err) + return err + } + common.SetTargets(pod.Name, "injected", "pod", chaosDetails) + } + log.Infof("[Chaos]: Waiting for: %vs", experimentsDetails.ChaosDuration) + } +loop: + for { + endTime = time.After(timeDelay) + select { + case <-signChan: + log.Info("[Chaos]: Revert Started") + for _, pod := range experimentsDetails.TargetPodList.Items { + if err := disableChaosMonkey(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, pod); err != nil { + log.Errorf("Error in disabling chaos monkey, err: %v", err) + } else { + common.SetTargets(pod.Name, "reverted", "pod", chaosDetails) + } + } + // updating the chaosresult after stopped + failStep := "Chaos injection stopped!" + types.SetResultAfterCompletion(resultDetails, "Stopped", "Stopped", failStep) + result.ChaosResult(chaosDetails, clients, resultDetails, "EOT") + log.Info("[Chaos]: Revert Completed") + os.Exit(1) + case <-endTime: + log.Infof("[Chaos]: Time is up for experiment: %v", experimentsDetails.ExperimentName) + endTime = nil + break loop + } + } + + var errorList []string + for _, pod := range experimentsDetails.TargetPodList.Items { + if err := disableChaosMonkey(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, pod); err != nil { + errorList = append(errorList, err.Error()) + continue + } + common.SetTargets(pod.Name, "reverted", "pod", chaosDetails) + } + + if len(errorList) != 0 { + return fmt.Errorf("error in disabling chaos monkey, err: %v", strings.Join(errorList, ", ")) + } + return nil +} diff --git a/experiments/spring-boot/README.md b/experiments/spring-boot/README.md new file mode 100644 index 000000000..1b3a9608d --- /dev/null +++ b/experiments/spring-boot/README.md @@ -0,0 +1,15 @@ +## Experiment Metadata + + + + + + + + + + + + +
Name Description Documentation Link
Chaos Monkey Spring Boot This experiment allows injecting Chaos Monkey assaults on Spring Boot applications which have the [Chaos Monkey for Spring Boot](https://codecentric.github.io/chaos-monkey-spring-boot/) in their classpath. It can target random pods with a Spring Boot application and allows configuring the assaults to inject (exception, latency, CPU, memory). It tests the resiliency of the system when some applications are having unexpected faulty behavior. TODO
+ diff --git a/experiments/spring-boot/rbac.yaml b/experiments/spring-boot/rbac.yaml new file mode 100644 index 000000000..924d97246 --- /dev/null +++ b/experiments/spring-boot/rbac.yaml @@ -0,0 +1,36 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: spring-boot-chaos-sa + namespace: podtato + labels: + name: spring-boot-chaos-sa +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: spring-boot-chaos-sa + labels: + name: spring-boot-chaos-sa +rules: + - apiGroups: [ "","litmuschaos.io","batch","apps" ] + resources: [ "pods","deployments","pods/log","events","jobs","pods/exec","statefulsets","configmaps","chaosengines","chaosexperiments","chaosresults" ] + verbs: [ "create","list","get","patch","delete","update" ] + - apiGroups: [ "" ] + resources: [ "nodes" ] + verbs: [ "get","list" ] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: spring-boot-chaos-sa + labels: + name: spring-boot-chaos-sa +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: spring-boot-chaos-sa +subjects: + - kind: ServiceAccount + name: spring-boot-chaos-sa + namespace: podtato diff --git a/experiments/spring-boot/spring-boot-chaos/experiment/spring-boot-chaos.go b/experiments/spring-boot/spring-boot-chaos/experiment/spring-boot-chaos.go new file mode 100644 index 000000000..f0cb0c90a --- /dev/null +++ b/experiments/spring-boot/spring-boot-chaos/experiment/spring-boot-chaos.go @@ -0,0 +1,213 @@ +package experiment + +import ( + "github.com/litmuschaos/chaos-operator/api/litmuschaos/v1alpha1" + litmusLIB "github.com/litmuschaos/litmus-go/chaoslib/litmus/spring-boot-chaos/lib" + "github.com/litmuschaos/litmus-go/pkg/clients" + "github.com/litmuschaos/litmus-go/pkg/events" + "github.com/litmuschaos/litmus-go/pkg/log" + "github.com/litmuschaos/litmus-go/pkg/probe" + "github.com/litmuschaos/litmus-go/pkg/result" + experimentEnv "github.com/litmuschaos/litmus-go/pkg/spring-boot/spring-boot-chaos/environment" + experimentTypes "github.com/litmuschaos/litmus-go/pkg/spring-boot/spring-boot-chaos/types" + "github.com/litmuschaos/litmus-go/pkg/status" + "github.com/litmuschaos/litmus-go/pkg/types" + "github.com/litmuschaos/litmus-go/pkg/utils/common" + "github.com/sirupsen/logrus" + "os" +) + +// Experiment contains steps to inject chaos +func Experiment(clients clients.ClientSets) { + + experimentsDetails := experimentTypes.ExperimentDetails{} + resultDetails := types.ResultDetails{} + eventsDetails := types.EventDetails{} + chaosDetails := types.ChaosDetails{} + + //Fetching all the ENV passed from the runner pod + log.Infof("[PreReq]: Getting the ENV for the %v experiment", os.Getenv("EXPERIMENT_NAME")) + experimentEnv.GetENV(&experimentsDetails) + + // Initialize the chaos attributes + types.InitialiseChaosVariables(&chaosDetails) + + // Initialize Chaos Result Parameters + types.SetResultAttributes(&resultDetails, chaosDetails) + + if experimentsDetails.EngineName != "" { + // Initialize the probe details. Bail out upon error, as we haven't entered exp business logic yet + if err := probe.InitializeProbesInChaosResultDetails(&chaosDetails, clients, &resultDetails); err != nil { + log.Errorf("Unable to initialize the probes, err: %v", err) + return + } + } + + //Updating the chaos result in the beginning of experiment + log.Infof("[PreReq]: Updating the chaos result of %v experiment (SOT)", experimentsDetails.ExperimentName) + if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "SOT"); err != nil { + log.Errorf("Unable to Create the Chaos Result, err: %v", err) + failStep := "[pre-chaos]: Failed to update the chaos result of spring-boot-chaos experiment (SOT), err: " + err.Error() + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + // Set the chaos result uid + _ = result.SetResultUID(&resultDetails, clients, &chaosDetails) + + // generating the event in chaosResult to mark the verdict as awaited + msg := "experiment: " + experimentsDetails.ExperimentName + ", Result: Awaited" + types.SetResultEventAttributes(&eventsDetails, types.AwaitedVerdict, msg, "Normal", &resultDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") + + //DISPLAY THE APP INFORMATION + log.InfoWithValues("[Info]: The application information is as follows", logrus.Fields{ + "Namespace": experimentsDetails.AppNS, + "Label": experimentsDetails.AppLabel, + "Chaos Duration": experimentsDetails.ChaosDuration, + }) + + // Calling AbortWatcher go routine, it will continuously watch for the abort signal and generate the required events and result + go common.AbortWatcherWithoutExit(experimentsDetails.ExperimentName, clients, &resultDetails, &chaosDetails, &eventsDetails) + + // Select targeted pods + log.Infof("[PreCheck]: Geting targeted pods list") + if err := litmusLIB.SetTargetPodList(&experimentsDetails, clients, &chaosDetails); err != nil { + log.Errorf("Failed to get target pod list, err: %v", err) + failStep := "[pre-chaos]: Failed to get pod list, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "Pods: Not Found", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + podNames := make([]string, 0, 1) + for _, pod := range experimentsDetails.TargetPodList.Items { + podNames = append(podNames, pod.Name) + } + log.Infof("[PreCheck]: Target pods list for chaos, %v", podNames) + + // Check if the targeted pods have the chaos monkey endpoint + log.Infof("[PreCheck]: Checking for ChaosMonkey endpoint in target pods") + if _, err := litmusLIB.CheckChaosMonkey(experimentsDetails.ChaosMonkeyPort, experimentsDetails.ChaosMonkeyPath, experimentsDetails.TargetPodList); err != nil { + log.Errorf("Some target pods don't have the chaos monkey endpoint, err: %v", err) + failStep := "[pre-chaos]: Some target pods don't have the chaos monkey endpoint, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "ChaosMonkey: Not Found", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + //PRE-CHAOS APPLICATION STATUS CHECK + if chaosDetails.DefaultAppHealthCheck { + log.Info("[Status]: Verify that the AUT (Application Under Test) is running (pre-chaos)") + if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { + log.Errorf("Application status check failed, err: %v", err) + failStep := "[pre-chaos]: Failed to verify that the AUT (Application Under Test) is in running state, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + } + + if experimentsDetails.EngineName != "" { + // marking AUT as running, as we already checked the status of application under test + msg := "AUT: Running" + + // run the probes in the pre-chaos check + if len(resultDetails.ProbeDetails) != 0 { + if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PreChaos", &eventsDetails); err != nil { + log.Errorf("Probe Failed, err: %v", err) + failStep := "[pre-chaos]: Failed while running probes, err: " + err.Error() + msg := "AUT: Running, Probes: Unsuccessful" + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + msg = "AUT: Running, Probes: Successful" + } + // generating the events for the pre-chaos check + types.SetEngineEventAttributes(&eventsDetails, types.PreChaosCheck, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + + // Including the litmus lib + switch experimentsDetails.ChaosLib { + case "litmus": + if err := litmusLIB.PrepareChaos(&experimentsDetails, clients, &resultDetails, &eventsDetails, &chaosDetails); err != nil { + log.Errorf("Chaos injection failed, err: %v", err) + failStep := "[chaos]: Failed inside the chaoslib, err: " + err.Error() + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + default: + log.Error("[Invalid]: Please Provide the correct LIB") + failStep := "[chaos]: no match found for specified lib" + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + + log.Infof("[Confirmation]: %v chaos has been injected successfully", experimentsDetails.ExperimentName) + resultDetails.Verdict = v1alpha1.ResultVerdictPassed + + // POST-CHAOS APPLICATION STATUS CHECK + if chaosDetails.DefaultAppHealthCheck { + log.Info("[Status]: Verify that the AUT (Application Under Test) is running (post-chaos)") + if err := status.AUTStatusCheck(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.TargetContainer, experimentsDetails.Timeout, experimentsDetails.Delay, clients, &chaosDetails); err != nil { + log.Errorf("Application status check failed, err: %v", err) + failStep := "[post-chaos]: Failed to verify that the AUT (Application Under Test) is running, err: " + err.Error() + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, "AUT: Not Running", "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + } + + if experimentsDetails.EngineName != "" { + // marking AUT as running, as we already checked the status of application under test + msg := "AUT: Running" + + // run the probes in the post-chaos check + if len(resultDetails.ProbeDetails) != 0 { + if err := probe.RunProbes(&chaosDetails, clients, &resultDetails, "PostChaos", &eventsDetails); err != nil { + log.Errorf("Probes Failed, err: %v", err) + failStep := "[post-chaos]: Failed while running probes, err: " + err.Error() + msg := "AUT: Running, Probes: Unsuccessful" + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Warning", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + result.RecordAfterFailure(&chaosDetails, &resultDetails, failStep, clients, &eventsDetails) + return + } + msg = "AUT: Running, Probes: Successful" + } + + // generating post chaos event + types.SetEngineEventAttributes(&eventsDetails, types.PostChaosCheck, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } + + //Updating the chaosResult in the end of experiment + log.Infof("[The End]: Updating the chaos result of %v experiment (EOT)", experimentsDetails.ExperimentName) + if err := result.ChaosResult(&chaosDetails, clients, &resultDetails, "EOT"); err != nil { + log.Errorf("Unable to Update the Chaos Result, err: %v", err) + return + } + + // generating the event in chaosResult to mark the verdict as pass/fail + msg = "experiment: " + experimentsDetails.ExperimentName + ", Result: " + string(resultDetails.Verdict) + reason := types.PassVerdict + eventType := "Normal" + if resultDetails.Verdict != "Pass" { + reason = types.FailVerdict + eventType = "Warning" + } + types.SetResultEventAttributes(&eventsDetails, reason, msg, eventType, &resultDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosResult") + + if experimentsDetails.EngineName != "" { + msg := experimentsDetails.ExperimentName + " experiment has been " + string(resultDetails.Verdict) + "ed" + types.SetEngineEventAttributes(&eventsDetails, types.Summary, msg, "Normal", &chaosDetails) + _ = events.GenerateEvents(&eventsDetails, clients, &chaosDetails, "ChaosEngine") + } +} diff --git a/experiments/spring-boot/spring-boot-chaos/test/test.yml b/experiments/spring-boot/spring-boot-chaos/test/test.yml new file mode 100644 index 000000000..0aac7005e --- /dev/null +++ b/experiments/spring-boot/spring-boot-chaos/test/test.yml @@ -0,0 +1,140 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: litmus-experiment +spec: + replicas: 1 + selector: + matchLabels: + app: litmus-experiment + template: + metadata: + labels: + app: litmus-experiment + spec: + serviceAccountName: spring-boot-chaos-sa + containers: + - name: gotest + image: litmusgodev:latest + imagePullPolicy: IfNotPresent + command: + - sleep + - "3600" + env: + # provide application namespace + - name: APP_NAMESPACE + value: 'podtato' + - name: EXPERIMENT_NAME + value: 'spring-boot-chaos' + # provide application labels + - name: APP_LABEL + value: 'app=spring-boot-demo' + - name: SEQUENCE + value: 'serial' + # provide application kind + - name: APP_KIND + value: 'deployment' + # provide the chaos namespace + - name: CHAOS_NAMESPACE + value: 'podtato' + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: CHAOS_SERVICE_ACCOUNT + valueFrom: + fieldRef: + fieldPath: spec.serviceAccountName + + - name: TOTAL_CHAOS_DURATION + value: '600' + # provide auxiliary application details - namespace and labels of the applications + # sample input is - "ns1:app=percona,ns2:name=nginx" + - name: AUXILIARY_APPINFO + value: '' + ## Period to wait before injection of chaos in sec + - name: RAMP_TIME + value: '' + ## env var that describes the library used to execute the chaos + ## default: litmus. Supported values: litmus, powerfulseal, chaoskube + - name: LIB + value: 'litmus' + + # Chaos Monkey Spring Boot configuration + # Level determines at which frequency the assault happens on the application. For a value N, the assaults happens every N requests + - name: CM_LEVEL + value: '1' + + # Whether the level should be used as a deterministic value (attack every x requests) or a chance (on average, 1 in x requests will be attacked) + - name: CM_DETERMINISTIC + value: 'true' + + # Comma separated list of watched Java services. Ex: com.example.application.controller.HelloController.sayHelle + - name: CM_WATCHED_CUSTOM_SERVICES + value: '' + + # Comma separated list of watchers. Possible values: controller, restController, service, repository, component, restTemplate, webClient, actuatorHealth + - name: CM_WATCHERS + value: 'restController' + + # AppKiller assault active + - name: CM_KILL_APPLICATION_ACTIVE + value: 'false' + # Memory cron. Cron expression like */1 * * * * ? can be set to enable chaos monkey AppKiller assault on a schedule + - name: CM_KILL_APPLICATION_CRON + value: 'OFF' + + # Latency assault active + - name: CM_LATENCY_ACTIVE + value: 'true' + # Minimum latency (ms) + - name: CM_LATENCY_RANGE_START + value: '500' + # Maxiumu latency (ms) + - name: CM_LATENCY_RANGE_END + value: '500' + + # Exception assault active + - name: CM_EXCEPTIONS_ACTIVE + value: 'false' + # Type of raised exception + - name: CM_EXCEPTIONS_TYPE + value: 'java.lang.IllegalArgumentException' + # Argument of raised exception + - name: CM_EXCEPTIONS_ARGUMENTS + value: 'java.lang.String:custom illegal argument exception' + + # Memory assault active + - name: CM_MEMORY_ACTIVE + value: 'false' + # Duration to assault memory when requested fill amount is reached in ms. + - name: CM_MEMORY_MS_HOLD_FILLED_MEM + value: '90000' + # Time in ms between increases of memory usage. + - name: CM_MEMORY_MS_NEXT_INCREASE + value: '10000' + # Fraction of one individual memory increase iteration. 1.0 equals 100 %. + - name: CM_MEMORY_FILL_INC_FRACTION + value: '1.0' + # Final fraction of used memory by assault. 0.95 equals 95 %. + - name: CM_MEMORY_FILL_TARGET_FRACTION + value: '0.95' + # Memory cron. Cron expression like */1 * * * * ? can be set to enable chaos monkey memory assault on a schedule + - name: CM_MEMORY_CRON + value: 'OFF' + + # CPU assault active + - name: CM_CPU_ACTIVE + value: 'false' + # Duration to assault cpu when requested load is reached in ms. + - name: CM_CPU_MS_HOLD_LOAD + value: '90000' + # Final fraction of used cpu by assault. 0.95 equals 95 %. + - name: CM_CPU_LOAD_TARGET_FRACTION + value: '0.9' + # CPU cron. Cron expression like */1 * * * * ? can be set to enable chaos monkey cpu assault on a schedule + - name: CM_CPU_CRON + value: 'OFF' + + diff --git a/go.mod b/go.mod index effe51dbc..ee09877a0 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/aws/aws-sdk-go v1.38.59 github.com/containerd/cgroups v1.0.1 github.com/kyokomi/emoji v2.2.4+incompatible - github.com/litmuschaos/chaos-operator v0.0.0-20220824040614-88dbe3eb960c + github.com/litmuschaos/chaos-operator v0.0.0-20220920112443-591193ec22c9 github.com/pkg/errors v0.9.1 github.com/sirupsen/logrus v1.7.0 github.com/spf13/cobra v1.1.1 diff --git a/go.sum b/go.sum index 0e9363f8c..a65f3a886 100644 --- a/go.sum +++ b/go.sum @@ -712,8 +712,8 @@ github.com/libopenstorage/openstorage v1.0.0/go.mod h1:Sp1sIObHjat1BeXhfMqLZ14wn github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE= github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc= github.com/litmuschaos/chaos-operator v0.0.0-20210610071657-a58dbd939e73/go.mod h1:QMjfAVIfwcpj/P1jikyz5+C5vWICiUXsFZMR7Ihnzro= -github.com/litmuschaos/chaos-operator v0.0.0-20220824040614-88dbe3eb960c h1:rZuxZxYMa0Oqlqtq1AfExJhXxLaMr4GKqj4ZLMSARzE= -github.com/litmuschaos/chaos-operator v0.0.0-20220824040614-88dbe3eb960c/go.mod h1:TxsfFIIjM3/atf2fLm3AYVtoUucBQK46EJvLwRITnE4= +github.com/litmuschaos/chaos-operator v0.0.0-20220920112443-591193ec22c9 h1:3Y0TFhyc0PafkxkmbS5HRO5trnW96vGQnQmstRigBlk= +github.com/litmuschaos/chaos-operator v0.0.0-20220920112443-591193ec22c9/go.mod h1:TxsfFIIjM3/atf2fLm3AYVtoUucBQK46EJvLwRITnE4= github.com/litmuschaos/elves v0.0.0-20201107015738-552d74669e3c/go.mod h1:DsbHGNUq/78NZozWVVI9Q6eBei4I+JjlkkD5aibJ3MQ= github.com/litmuschaos/litmus-go v0.0.0-20210705063441-babf0c4aa57d/go.mod h1:MNO+1u4jBPjLtFO56bckIv87EhwTkppJxDf8+6PbLRY= github.com/logrusorgru/aurora v0.0.0-20181002194514-a7b3b318ed4e/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4= diff --git a/pkg/probe/probe.go b/pkg/probe/probe.go index 1c6554258..1df0dc770 100644 --- a/pkg/probe/probe.go +++ b/pkg/probe/probe.go @@ -68,36 +68,27 @@ func RunProbes(chaosDetails *types.ChaosDetails, clients clients.ClientSets, res //setProbeVerdict mark the verdict of the probe in the chaosresult as passed // on the basis of phase(pre/post chaos) -func setProbeVerdict(resultDetails *types.ResultDetails, probe v1alpha1.ProbeAttributes, verdict, phase string) { - +func setProbeVerdict(resultDetails *types.ResultDetails, probe v1alpha1.ProbeAttributes, verdict v1alpha1.ProbeVerdict, description string) { for index, probes := range resultDetails.ProbeDetails { if probes.Name == probe.Name && probes.Type == probe.Type { - switch strings.ToLower(probe.Mode) { - case "sot", "edge", "eot": - if verdict == "Passed" { - resultDetails.ProbeDetails[index].Status[phase] = verdict + emoji.Sprint(" :thumbsup:") - } else { - resultDetails.ProbeDetails[index].Status[phase] = "Better Luck Next Time" + emoji.Sprint(" :thumbsdown:") - } - case "continuous", "onchaos": - if verdict == "Passed" { - resultDetails.ProbeDetails[index].Status[probe.Mode] = verdict + emoji.Sprint(" :thumbsup:") - } else { - resultDetails.ProbeDetails[index].Status[probe.Mode] = "Better Luck Next Time" + emoji.Sprint(" :thumbsdown:") - } + if probes.Mode == "Edge" && probes.Status.Verdict == v1alpha1.ProbeVerdictFailed { + return + } + resultDetails.ProbeDetails[index].Status = v1alpha1.ProbeStatus{ + Verdict: verdict, + Description: description, } - resultDetails.ProbeDetails[index].Phase = verdict + break } } } //SetProbeVerdictAfterFailure mark the verdict of all the failed/unrun probes as failed -func SetProbeVerdictAfterFailure(resultDetails *types.ResultDetails) { - for index := range resultDetails.ProbeDetails { - for _, phase := range []string{"PreChaos", "PostChaos", "Continuous", "OnChaos"} { - if resultDetails.ProbeDetails[index].Status[phase] == "Awaited" { - resultDetails.ProbeDetails[index].Status[phase] = "N/A" + emoji.Sprint(" :prohibited:") - } +func SetProbeVerdictAfterFailure(result *v1alpha1.ChaosResult) { + for index := range result.Status.ProbeStatuses { + if result.Status.ProbeStatuses[index].Status.Verdict == v1alpha1.ProbeVerdictAwaited { + result.Status.ProbeStatuses[index].Status.Verdict = v1alpha1.ProbeVerdictNA + result.Status.ProbeStatuses[index].Status.Description = "either probe is not executed or not evaluated" } } } @@ -133,7 +124,7 @@ func getProbesFromEngine(chaosDetails *types.ChaosDetails, clients clients.Clien // it fetch the probe details from the chaosengine and set into the chaosresult func InitializeProbesInChaosResultDetails(chaosDetails *types.ChaosDetails, clients clients.ClientSets, chaosresult *types.ResultDetails) error { - probeDetails := []types.ProbeDetails{} + var probeDetails []types.ProbeDetails // get the probes from the chaosengine probes, err := getProbesFromEngine(chaosDetails, clients) if err != nil { @@ -145,9 +136,12 @@ func InitializeProbesInChaosResultDetails(chaosDetails *types.ChaosDetails, clie tempProbe := types.ProbeDetails{} tempProbe.Name = probe.Name tempProbe.Type = probe.Type + tempProbe.Mode = probe.Mode tempProbe.Phase = "N/A" tempProbe.RunCount = 0 - setProbeInitialStatus(&tempProbe, probe.Mode) + tempProbe.Status = v1alpha1.ProbeStatus{ + Verdict: "Awaited", + } probeDetails = append(probeDetails, tempProbe) } @@ -167,33 +161,6 @@ func getAndIncrementRunCount(resultDetails *types.ResultDetails, probeName strin return 0 } -//setProbeInitialStatus sets the initial status inside chaosresult -func setProbeInitialStatus(probeDetails *types.ProbeDetails, mode string) { - switch strings.ToLower(mode) { - case "sot": - probeDetails.Status = map[string]string{ - "PreChaos": "Awaited", - } - case "eot": - probeDetails.Status = map[string]string{ - "PostChaos": "Awaited", - } - case "edge": - probeDetails.Status = map[string]string{ - "PreChaos": "Awaited", - "PostChaos": "Awaited", - } - case "continuous": - probeDetails.Status = map[string]string{ - "Continuous": "Awaited", - } - case "onchaos": - probeDetails.Status = map[string]string{ - "OnChaos": "Awaited", - } - } -} - //getRunIDFromProbe return the run_id for the dedicated probe // which will used in the continuous cmd probe, run_id is used as suffix in the external pod name func getRunIDFromProbe(resultDetails *types.ResultDetails, probeName, probeType string) string { @@ -220,13 +187,14 @@ func setRunIDForProbe(resultDetails *types.ResultDetails, probeName, probeType, // markedVerdictInEnd add the probe status in the chaosresult func markedVerdictInEnd(err error, resultDetails *types.ResultDetails, probe v1alpha1.ProbeAttributes, phase string) error { - probeVerdict := "Passed" + probeVerdict := v1alpha1.ProbeVerdictPassed + var description string if err != nil { - probeVerdict = "Failed" + probeVerdict = v1alpha1.ProbeVerdictFailed } switch probeVerdict { - case "Passed": + case v1alpha1.ProbeVerdictPassed: log.InfoWithValues("[Probe]: "+probe.Name+" probe has been Passed "+emoji.Sprint(":smile:"), logrus.Fields{ "ProbeName": probe.Name, "ProbeType": probe.Type, @@ -254,15 +222,25 @@ func markedVerdictInEnd(err error, resultDetails *types.ResultDetails, probe v1a "ProbeInstance": phase, "ProbeStatus": probeVerdict, }) + description = getDescription(strings.ToLower(probe.Mode), phase) } - setProbeVerdict(resultDetails, probe, probeVerdict, phase) + setProbeVerdict(resultDetails, probe, probeVerdict, description) if !probe.RunProperties.StopOnFailure { return nil } return err } +func getDescription(mode, phase string) string { + switch mode { + case "edge": + return fmt.Sprintf("'%v' Probe didn't met the passing criteria", phase) + default: + return "Probe didn't met the passing criteria" + } +} + //CheckForErrorInContinuousProbe check for the error in the continuous probes func checkForErrorInContinuousProbe(resultDetails *types.ResultDetails, probeName string) error { diff --git a/pkg/result/chaosresult.go b/pkg/result/chaosresult.go index 4c2789768..7ceb999de 100644 --- a/pkg/result/chaosresult.go +++ b/pkg/result/chaosresult.go @@ -27,17 +27,15 @@ func ChaosResult(chaosDetails *types.ChaosDetails, clients clients.ClientSets, r // It try to get the chaosresult, if available // it will retries until it got chaos result or met the timeout(3 mins) - var result *v1alpha1.ChaosResult isResultAvailable := false if err := retry. Times(90). Wait(2 * time.Second). Try(func(attempt uint) error { - resultObj, err := clients.LitmusClient.ChaosResults(chaosDetails.ChaosNamespace).Get(context.Background(), resultDetails.Name, v1.GetOptions{}) + _, err := clients.LitmusClient.ChaosResults(chaosDetails.ChaosNamespace).Get(context.Background(), resultDetails.Name, v1.GetOptions{}) if err != nil && !k8serrors.IsNotFound(err) { return errors.Errorf("unable to get %v chaosresult in %v namespace, err: %v", resultDetails.Name, chaosDetails.ChaosNamespace, err) } else if err == nil { - result = resultObj isResultAvailable = true } return nil @@ -65,13 +63,12 @@ func ChaosResult(chaosDetails *types.ChaosDetails, clients clients.ClientSets, r // the chaos-result is already present with matching labels // it will patch the new parameters in the same chaos-result if state == "SOT" { - updateHistory(result) - return PatchChaosResult(result, clients, chaosDetails, resultDetails, experimentLabel) + return PatchChaosResult(clients, chaosDetails, resultDetails, experimentLabel) } // it will patch the chaos-result in the end of experiment resultDetails.Phase = v1alpha1.ResultPhaseCompleted - return PatchChaosResult(result, clients, chaosDetails, resultDetails, experimentLabel) + return PatchChaosResult(clients, chaosDetails, resultDetails, experimentLabel) } //InitializeChaosResult create the chaos result @@ -95,7 +92,7 @@ func InitializeChaosResult(chaosDetails *types.ChaosDetails, clients clients.Cli Verdict: resultDetails.Verdict, ProbeSuccessPercentage: "Awaited", }, - ProbeStatus: probeStatus, + ProbeStatuses: probeStatus, History: &v1alpha1.HistoryDetails{ PassedRuns: 0, FailedRuns: 0, @@ -120,7 +117,7 @@ func InitializeChaosResult(chaosDetails *types.ChaosDetails, clients clients.Cli } // updating the chaosresult with new values - if err = PatchChaosResult(chaosResult, clients, chaosDetails, resultDetails, chaosResultLabel); err != nil { + if err = PatchChaosResult(clients, chaosDetails, resultDetails, chaosResultLabel); err != nil { return err } } @@ -128,14 +125,15 @@ func InitializeChaosResult(chaosDetails *types.ChaosDetails, clients clients.Cli } //GetProbeStatus fetch status of all probes -func GetProbeStatus(resultDetails *types.ResultDetails) (bool, []v1alpha1.ProbeStatus) { +func GetProbeStatus(resultDetails *types.ResultDetails) (bool, []v1alpha1.ProbeStatuses) { isAllProbePassed := true - probeStatus := []v1alpha1.ProbeStatus{} + probeStatus := []v1alpha1.ProbeStatuses{} for _, probe := range resultDetails.ProbeDetails { - probes := v1alpha1.ProbeStatus{} + probes := v1alpha1.ProbeStatuses{} probes.Name = probe.Name probes.Type = probe.Type + probes.Mode = probe.Mode probes.Status = probe.Status probeStatus = append(probeStatus, probes) if probe.Phase == "Failed" { @@ -145,23 +143,21 @@ func GetProbeStatus(resultDetails *types.ResultDetails) (bool, []v1alpha1.ProbeS return isAllProbePassed, probeStatus } -//PatchChaosResult Update the chaos result -func PatchChaosResult(result *v1alpha1.ChaosResult, clients clients.ClientSets, chaosDetails *types.ChaosDetails, resultDetails *types.ResultDetails, chaosResultLabel map[string]string) error { - - annotations, err := GetChaosStatus(resultDetails, chaosDetails, clients) +func updateResultAttributes(clients clients.ClientSets, chaosDetails *types.ChaosDetails, resultDetails *types.ResultDetails, chaosResultLabel map[string]string) (*v1alpha1.ChaosResult, error) { + result, err := GetChaosStatus(resultDetails, chaosDetails, clients) if err != nil { - return err + return nil, err } + updateHistory(result) var isAllProbePassed bool result.Status.ExperimentStatus.Phase = resultDetails.Phase result.Spec.InstanceID = chaosDetails.InstanceID result.Status.ExperimentStatus.FailStep = resultDetails.FailStep // for existing chaos result resource it will patch the label result.ObjectMeta.Labels = chaosResultLabel - result.ObjectMeta.Annotations = annotations result.Status.History.Targets = chaosDetails.Targets - isAllProbePassed, result.Status.ProbeStatus = GetProbeStatus(resultDetails) + isAllProbePassed, result.Status.ProbeStatuses = GetProbeStatus(resultDetails) result.Status.ExperimentStatus.Verdict = resultDetails.Verdict switch strings.ToLower(string(resultDetails.Phase)) { @@ -176,7 +172,7 @@ func PatchChaosResult(result *v1alpha1.ChaosResult, clients clients.ClientSets, result.Status.History.PassedRuns++ case "fail": result.Status.History.FailedRuns++ - probe.SetProbeVerdictAfterFailure(resultDetails) + probe.SetProbeVerdictAfterFailure(result) if len(resultDetails.ProbeDetails) != 0 { result.Status.ExperimentStatus.ProbeSuccessPercentage = strconv.Itoa((resultDetails.PassedProbeCount * 100) / len(resultDetails.ProbeDetails)) } else { @@ -184,7 +180,7 @@ func PatchChaosResult(result *v1alpha1.ChaosResult, clients clients.ClientSets, } case "stopped": result.Status.History.StoppedRuns++ - probe.SetProbeVerdictAfterFailure(resultDetails) + probe.SetProbeVerdictAfterFailure(result) if len(resultDetails.ProbeDetails) != 0 { result.Status.ExperimentStatus.ProbeSuccessPercentage = strconv.Itoa((resultDetails.PassedProbeCount * 100) / len(resultDetails.ProbeDetails)) } else { @@ -194,16 +190,32 @@ func PatchChaosResult(result *v1alpha1.ChaosResult, clients clients.ClientSets, default: result.Status.ExperimentStatus.ProbeSuccessPercentage = "Awaited" } + return result, nil +} + +//PatchChaosResult Update the chaos result +func PatchChaosResult(clients clients.ClientSets, chaosDetails *types.ChaosDetails, resultDetails *types.ResultDetails, chaosResultLabel map[string]string) error { + + result, err := updateResultAttributes(clients, chaosDetails, resultDetails, chaosResultLabel) + if err != nil { + return err + } // It will update the existing chaos-result CR with new values - // it will retries until it will able to update successfully or met the timeout(3 mins) + // it will retries until it will be able to update successfully or met the timeout(3 mins) return retry. - Times(90). - Wait(2 * time.Second). + Times(uint(chaosDetails.Timeout / chaosDetails.Delay)). + Wait(time.Duration(chaosDetails.Delay) * time.Second). Try(func(attempt uint) error { - _, err := clients.LitmusClient.ChaosResults(result.Namespace).Update(context.Background(), result, v1.UpdateOptions{}) - if err != nil { - return errors.Errorf("Unable to update the chaosresult, err: %v", err) + _, updateErr := clients.LitmusClient.ChaosResults(result.Namespace).Update(context.Background(), result, v1.UpdateOptions{}) + if updateErr != nil { + if k8serrors.IsConflict(updateErr) { + result, err = updateResultAttributes(clients, chaosDetails, resultDetails, chaosResultLabel) + if err != nil { + return err + } + } + return errors.Errorf("Unable to update the chaosresult, err: %v", updateErr) } return nil }) @@ -268,29 +280,42 @@ func AnnotateChaosResult(resultName, namespace, status, kind, name string) error } // GetChaosStatus get the chaos status based on annotations in chaosresult -func GetChaosStatus(resultDetails *types.ResultDetails, chaosDetails *types.ChaosDetails, clients clients.ClientSets) (map[string]string, error) { +func GetChaosStatus(resultDetails *types.ResultDetails, chaosDetails *types.ChaosDetails, clients clients.ClientSets) (*v1alpha1.ChaosResult, error) { result, err := clients.LitmusClient.ChaosResults(chaosDetails.ChaosNamespace).Get(context.Background(), resultDetails.Name, v1.GetOptions{}) if err != nil { return nil, err } annotations := result.ObjectMeta.Annotations - targetList := []v1alpha1.TargetDetails{} + targetList := chaosDetails.Targets for k, v := range annotations { switch strings.ToLower(v) { case "injected", "reverted", "targeted": kind := strings.TrimSpace(strings.Split(k, "/")[0]) name := strings.TrimSpace(strings.Split(k, "/")[1]) - target := v1alpha1.TargetDetails{ - Name: name, - Kind: kind, - ChaosStatus: v, + if !updateTargets(name, v, targetList) { + targetList = append(targetList, v1alpha1.TargetDetails{ + Name: name, + Kind: kind, + ChaosStatus: v, + }) } - targetList = append(targetList, target) delete(annotations, k) } } - chaosDetails.Targets = append(chaosDetails.Targets, targetList...) - return annotations, nil + chaosDetails.Targets = targetList + result.Annotations = annotations + return result, nil +} + +// updates the chaos status of targets which is already present inside history.targets +func updateTargets(name, status string, data []v1alpha1.TargetDetails) bool { + for i := range data { + if data[i].Name == name { + data[i].ChaosStatus = status + return true + } + } + return false } diff --git a/pkg/spring-boot/spring-boot-chaos/environment/environment.go b/pkg/spring-boot/spring-boot-chaos/environment/environment.go new file mode 100644 index 000000000..52a4706a2 --- /dev/null +++ b/pkg/spring-boot/spring-boot-chaos/environment/environment.go @@ -0,0 +1,126 @@ +package environment + +import ( + "strconv" + "strings" + + clientTypes "k8s.io/apimachinery/pkg/types" + + experimentTypes "github.com/litmuschaos/litmus-go/pkg/spring-boot/spring-boot-chaos/types" + "github.com/litmuschaos/litmus-go/pkg/types" +) + +// GetENV fetches all the env variables from the runner pod +func GetENV(experimentDetails *experimentTypes.ExperimentDetails) { + experimentDetails.ExperimentName = types.Getenv("EXPERIMENT_NAME", "spring-boot-chaos") + experimentDetails.ChaosNamespace = types.Getenv("CHAOS_NAMESPACE", "litmus") + experimentDetails.EngineName = types.Getenv("CHAOSENGINE", "") + experimentDetails.ChaosDuration, _ = strconv.Atoi(types.Getenv("TOTAL_CHAOS_DURATION", "30")) + experimentDetails.ChaosInterval, _ = strconv.Atoi(types.Getenv("CHAOS_INTERVAL", "10")) + experimentDetails.RampTime, _ = strconv.Atoi(types.Getenv("RAMP_TIME", "0")) + experimentDetails.ChaosLib = types.Getenv("LIB", "litmus") + experimentDetails.AppNS = types.Getenv("APP_NAMESPACE", "") + experimentDetails.AppLabel = types.Getenv("APP_LABEL", "") + experimentDetails.AppKind = types.Getenv("APP_KIND", "") + experimentDetails.ChaosUID = clientTypes.UID(types.Getenv("CHAOS_UID", "")) + experimentDetails.InstanceID = types.Getenv("INSTANCE_ID", "") + experimentDetails.ChaosPodName = types.Getenv("POD_NAME", "") + experimentDetails.Delay, _ = strconv.Atoi(types.Getenv("STATUS_CHECK_DELAY", "2")) + experimentDetails.Timeout, _ = strconv.Atoi(types.Getenv("STATUS_CHECK_TIMEOUT", "180")) + experimentDetails.TargetContainer = types.Getenv("TARGET_CONTAINER", "") + experimentDetails.TargetPods = types.Getenv("TARGET_PODS", "") + experimentDetails.PodsAffectedPerc, _ = strconv.Atoi(types.Getenv("PODS_AFFECTED_PERC", "0")) + experimentDetails.Sequence = types.Getenv("SEQUENCE", "serial") + + // Chaos monkey assault parameters + experimentDetails.ChaosMonkeyPath = types.Getenv("CM_PATH", "/actuator/chaosmonkey") + experimentDetails.ChaosMonkeyPort = types.Getenv("CM_PORT", "8080") + + // Basic assault parameters + assault := experimentTypes.ChaosMonkeyAssault{} + assault.Level, _ = strconv.Atoi(types.Getenv("CM_LEVEL", "1")) + assault.Deterministic, _ = strconv.ParseBool(types.Getenv("CM_DETERMINISTIC", "true")) + assault.WatchedCustomServices = strings.Split(types.Getenv("CM_WATCHED_CUSTOM_SERVICES", ""), ",") + + // kill application assault + assault.KillApplicationActive, _ = strconv.ParseBool(types.Getenv("CM_KILL_APPLICATION_ACTIVE", "false")) + assault.KillApplicationCron = types.Getenv("CM_KILL_APPLICATION_CRON", "OFF") + + // Latency assault + assault.LatencyActive, _ = strconv.ParseBool(types.Getenv("CM_LATENCY_ACTIVE", "false")) + assault.LatencyRangeStart, _ = strconv.Atoi(types.Getenv("CM_LATENCY_RANGE_START", "500")) + assault.LatencyRangeEnd, _ = strconv.Atoi(types.Getenv("CM_LATENCY_RANGE_END", "500")) + + // Memory assault + assault.MemoryActive, _ = strconv.ParseBool(types.Getenv("CM_MEMORY_ACTIVE", "false")) + assault.MemoryMillisecondsHoldFilledMemory, _ = strconv.Atoi(types.Getenv("CM_MEMORY_MS_HOLD_FILLED_MEM", "90000")) + assault.MemoryMillisecondsWaitNextIncrease, _ = strconv.Atoi(types.Getenv("CM_MEMORY_MS_NEXT_INCREASE", "1000")) + assault.MemoryFillIncrementFraction, _ = strconv.ParseFloat(types.Getenv("CM_MEMORY_FILL_INC_FRACTION", "0.15"), 64) + assault.MemoryFillTargetFraction, _ = strconv.ParseFloat(types.Getenv("CM_MEMORY_FILL_TARGET_FRACTION", "0.25"), 64) + assault.MemoryCron = types.Getenv("CM_MEMORY_CRON", "OFF") + + // CPU assault + assault.CPUActive, _ = strconv.ParseBool(types.Getenv("CM_CPU_ACTIVE", "false")) + assault.CPUMillisecondsHoldLoad, _ = strconv.Atoi(types.Getenv("CM_CPU_MS_HOLD_LOAD", "90000")) + assault.CPULoadTargetFraction, _ = strconv.ParseFloat(types.Getenv("CM_CPU_LOAD_TARGET_FRACTION", "0.9"), 64) + assault.CPUCron = types.Getenv("CM_CPU_CRON", "OFF") + + // Exception assault + assault.ExceptionsActive, _ = strconv.ParseBool(types.Getenv("CM_EXCEPTIONS_ACTIVE", "false")) + + // Exception structure, will be like : {type: "", arguments: [{className: "", value: ""]} + assaultException := experimentTypes.AssaultException{} + assaultExceptionArguments := make([]experimentTypes.AssaultExceptionArgument, 0) + + assaultException.Type = types.Getenv("CM_EXCEPTIONS_TYPE", "") + + envAssaultExceptionArguments := strings.Split(types.Getenv("CM_EXCEPTIONS_ARGUMENTS", ""), ",") + + for _, argument := range envAssaultExceptionArguments { + splitArgument := strings.Split(argument, ":") + assaultExceptionArgument := experimentTypes.AssaultExceptionArgument{ + ClassName: splitArgument[0], + Value: "", + } + if len(splitArgument) > 0 { + assaultExceptionArgument.Value = splitArgument[1] + } + assaultExceptionArguments = append(assaultExceptionArguments, assaultExceptionArgument) + } + assaultException.Arguments = assaultExceptionArguments + assault.Exception = assaultException + + // End of assault building + experimentDetails.ChaosMonkeyAssault = assault + + // Building watchers + watchers := experimentTypes.ChaosMonkeyWatchers{ + Controller: false, + RestController: false, + Service: false, + Repository: false, + Component: false, + RestTemplate: false, + WebClient: false, + } + + envWatchers := strings.Split(types.Getenv("CM_WATCHERS", ""), ",") + for _, watcher := range envWatchers { + switch watcher { + case "controller": + watchers.Controller = true + case "restController": + watchers.RestController = true + case "service": + watchers.Service = true + case "repository": + watchers.Repository = true + case "component": + watchers.Component = true + case "webClient": + watchers.WebClient = true + default: + } + } + experimentDetails.ChaosMonkeyWatchers = watchers +} diff --git a/pkg/spring-boot/spring-boot-chaos/types/types.go b/pkg/spring-boot/spring-boot-chaos/types/types.go new file mode 100644 index 000000000..e96f1502b --- /dev/null +++ b/pkg/spring-boot/spring-boot-chaos/types/types.go @@ -0,0 +1,88 @@ +package types + +import ( + "k8s.io/api/core/v1" + clientTypes "k8s.io/apimachinery/pkg/types" +) + +// ExperimentDetails is for collecting all the experiment-related details +type ExperimentDetails struct { + ExperimentName string + EngineName string + ChaosDuration int + ChaosInterval int + RampTime int + ChaosLib string + AppNS string + AppLabel string + AppKind string + ChaosUID clientTypes.UID + InstanceID string + ChaosNamespace string + ChaosPodName string + Timeout int + Delay int + TargetContainer string + PodsAffectedPerc int + TargetPods string + LIBImagePullPolicy string + Sequence string + TargetPodList v1.PodList + + // Chaos monkey parameters + ChaosMonkeyAssault ChaosMonkeyAssault + ChaosMonkeyWatchers ChaosMonkeyWatchers + ChaosMonkeyPath string + ChaosMonkeyPort string +} + +type ChaosMonkeyAssaultRevert struct { + LatencyActive bool `json:"latencyActive"` + KillApplicationActive bool `json:"killApplicationActive"` + MemoryActive bool `json:"memoryActive"` + CPUActive bool `json:"cpuActive"` + ExceptionsActive bool `json:"exceptionsActive"` +} + +type ChaosMonkeyAssault struct { + Level int `json:"level"` + Deterministic bool `json:"deterministic"` + LatencyRangeStart int `json:"latencyRangeStart"` + LatencyRangeEnd int `json:"latencyRangeEnd"` + LatencyActive bool `json:"latencyActive"` + ExceptionsActive bool `json:"exceptionsActive"` + Exception AssaultException `json:"exceptions"` + KillApplicationActive bool `json:"killApplicationActive"` + KillApplicationCron string `json:"killApplicationCronExpression"` + WatchedCustomServices []string `json:"watchedCustomServices"` + MemoryActive bool `json:"memoryActive"` + MemoryMillisecondsHoldFilledMemory int `json:"memoryMillisecondsHoldFilledMemory"` + MemoryMillisecondsWaitNextIncrease int `json:"memoryMillisecondsWaitNextIncrease"` + MemoryFillIncrementFraction float64 `json:"memoryFillIncrementFraction"` + MemoryFillTargetFraction float64 `json:"memoryFillTargetFraction"` + MemoryCron string `json:"memoryCronExpression"` + CPUActive bool `json:"cpuActive"` + CPUMillisecondsHoldLoad int `json:"cpuMillisecondsHoldLoad"` + CPULoadTargetFraction float64 `json:"cpuLoadTargetFraction"` + CPUCron string `json:"cpuCronExpression"` +} + +type ChaosMonkeyWatchers struct { + Controller bool `json:"controller"` + RestController bool `json:"restController"` + Service bool `json:"service"` + Repository bool `json:"repository"` + Component bool `json:"component"` + RestTemplate bool `json:"restTemplate"` + WebClient bool `json:"webClient"` +} + +type AssaultException struct { + Type string `json:"type"` + Arguments []AssaultExceptionArgument `json:"arguments"` +} + +type AssaultExceptionArgument struct { + ClassName string `json:"className"` + Value string `json:"value"` +} diff --git a/pkg/status/application.go b/pkg/status/application.go index fa0283f6c..96d064375 100644 --- a/pkg/status/application.go +++ b/pkg/status/application.go @@ -251,7 +251,7 @@ func WaitForCompletion(appNs, appLabel string, clients clients.ClientSets, durat if err != nil { return errors.Errorf("Unable to find the pods with matching labels, err: %v", err) } else if len(podList.Items) == 0 { - errors.Errorf("Unable to find the pods with matching labels") + return errors.Errorf("Unable to find the pods with matching labels") } // it will check for the status of helper pod, if it is Succeeded and target container is completed then it will marked it as completed and return // if it is still running then it will check for the target container, as we can have multiple container inside helper pod (istio) diff --git a/pkg/types/types.go b/pkg/types/types.go index 0decdefc8..39a113a2b 100644 --- a/pkg/types/types.go +++ b/pkg/types/types.go @@ -53,9 +53,10 @@ type RegisterDetails struct { // ProbeDetails is for collecting all the probe details type ProbeDetails struct { Name string - Phase string Type string - Status map[string]string + Mode string + Phase string + Status v1alpha1.ProbeStatus IsProbeFailedWithError error RunID string RunCount int