Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test #18

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open

test #18

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 28 additions & 25 deletions src/assisted_installer_controller/assisted_installer_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,7 @@ func logHostsStatus(log logrus.FieldLogger, hosts map[string]inventory_client.Ho
log.Infof("Hosts status: %v", hostsStatus)
}

func (c *controller) WaitAndUpdateNodesStatus(status *ControllerStatus) {

func (c *controller) WaitAndUpdateNodesStatus(status *ControllerStatus) error {
c.log.Infof("Waiting till all nodes will join and update status to assisted installer")
ignoreStatuses := []string{models.HostStatusDisabled}
var hostsInError int
Expand All @@ -141,7 +140,7 @@ func (c *controller) WaitAndUpdateNodesStatus(status *ControllerStatus) {
//if all hosts are in error, mark the failure and finish
if hostsInError > 0 && hostsInError == len(hostsInProgressMap) {
status.Error()
break
return errors.Errorf("return error")
}
//if all hosts are successfully installed, finish
if len(hostsInProgressMap) == 0 {
Expand Down Expand Up @@ -182,6 +181,7 @@ func (c *controller) WaitAndUpdateNodesStatus(status *ControllerStatus) {
c.updateConfiguringStatusIfNeeded(assistedNodesMap)
}
c.log.Infof("Done waiting for all the nodes. Nodes in error status: %d\n", hostsInError)
return nil
}

func (c *controller) HackDNSAddressConflict(wg *sync.WaitGroup) {
Expand Down Expand Up @@ -318,25 +318,28 @@ func isCsrApproved(csr *certificatesv1.CertificateSigningRequest) bool {
return false
}

func (c controller) PostInstallConfigs(wg *sync.WaitGroup, status *ControllerStatus) {
func (c controller) PostInstallConfigs(ctx context.Context, wg *sync.WaitGroup, status *ControllerStatus) {
defer wg.Done()
for {
time.Sleep(GeneralWaitInterval)
err := utils.WaitForPredicateWithContext(ctx, time.Duration(1<<63 - 1), GeneralWaitInterval, func() bool {
ctx := utils.GenerateRequestContext()
cluster, err := c.ic.GetCluster(ctx)
if err != nil {
utils.RequestIDLogger(ctx, c.log).WithError(err).Errorf("Failed to get cluster %s from assisted-service", c.ClusterID)
continue
return false
}
// waiting till cluster will be installed(3 masters must be installed)
if *cluster.Status != models.ClusterStatusFinalizing {
continue
return false
}
break
return true
})
if err != nil {
return
}

errMessage := ""
err := c.postInstallConfigs()
// TODO veridy if ctx was cancelled
err = c.postInstallConfigs(ctx)
if err != nil {
errMessage = err.Error()
status.Error()
Expand All @@ -345,19 +348,19 @@ func (c controller) PostInstallConfigs(wg *sync.WaitGroup, status *ControllerSta
c.sendCompleteInstallation(success, errMessage)
}

func (c controller) postInstallConfigs() error {
func (c controller) postInstallConfigs(ctx context.Context) error {
var err error

c.log.Infof("Waiting for cluster version operator: %t", c.WaitForClusterVersion)

if c.WaitForClusterVersion {
err = c.waitingForClusterVersion()
err = c.waitingForClusterVersion(ctx)
if err != nil {
return err
}
}

err = utils.WaitForPredicate(WaitTimeout, GeneralWaitInterval, c.addRouterCAToClusterCA)
err = utils.WaitForPredicateWithContext(ctx, WaitTimeout, GeneralWaitInterval, c.addRouterCAToClusterCA)
if err != nil {
return errors.Errorf("Timeout while waiting router ca data")
}
Expand All @@ -367,21 +370,21 @@ func (c controller) postInstallConfigs() error {
return err
}
if unpatch && c.HighAvailabilityMode != models.ClusterHighAvailabilityModeNone {
err = utils.WaitForPredicate(WaitTimeout, GeneralWaitInterval, c.unpatchEtcd)
err = utils.WaitForPredicateWithContext(ctx, WaitTimeout, GeneralWaitInterval, c.unpatchEtcd)
if err != nil {
return errors.Errorf("Timeout while trying to unpatch etcd")
}
} else {
c.log.Infof("Skipping etcd unpatch for cluster version %s", c.ControllerConfig.OpenshiftVersion)
}

err = utils.WaitForPredicate(WaitTimeout, GeneralWaitInterval, c.validateConsoleAvailability)
err = utils.WaitForPredicateWithContext(ctx, WaitTimeout, GeneralWaitInterval, c.validateConsoleAvailability)
if err != nil {
return errors.Errorf("Timeout while waiting for console to become available")
}

waitTimeout := c.getMaximumOLMTimeout()
err = utils.WaitForPredicate(waitTimeout, GeneralWaitInterval, c.waitForOLMOperators)
err = utils.WaitForPredicateWithContext(ctx, waitTimeout, GeneralWaitInterval, c.waitForOLMOperators)
if err != nil {
// In case the timeout occur, we have to update the pending OLM operators to failed state,
// so the assisted-service can update the cluster state to completed.
Expand All @@ -394,32 +397,32 @@ func (c controller) postInstallConfigs() error {
return nil
}

func (c controller) UpdateBMHs(wg *sync.WaitGroup) {
func (c controller) UpdateBMHs(ctx context.Context, wg *sync.WaitGroup) {
defer wg.Done()
for {
time.Sleep(GeneralWaitInterval)
_ = utils.WaitForPredicateWithContext(ctx, time.Duration(1<<63 - 1), GeneralWaitInterval, func() bool {
bmhs, err := c.kc.ListBMHs()
if err != nil {
c.log.WithError(err).Errorf("Failed to list BMH hosts")
continue
return false
}

c.log.Infof("Number of BMHs is %d", len(bmhs.Items))

machines, err := c.unallocatedMachines(bmhs)
if err != nil {
c.log.WithError(err).Errorf("Failed to find unallocated machines")
continue
return false
}

c.log.Infof("Number of unallocated Machines is %d", len(machines.Items))

allUpdated := c.updateBMHs(&bmhs, machines)
if allUpdated {
c.log.Infof("Updated all the BMH CRs, finished successfully")
return
return true
}
}
return false
})
}

func (c controller) unallocatedMachines(bmhList metal3v1alpha1.BareMetalHostList) (*mapiv1beta1.MachineList, error) {
Expand Down Expand Up @@ -766,7 +769,7 @@ func (c controller) validateConsoleAvailability() bool {
//
// This function would be aligned with the console operator reporting workflow
// as part of the deprecation of the old API in MGMT-5188.
func (c controller) waitingForClusterVersion() error {
func (c controller) waitingForClusterVersion(ctx context.Context) error {
isClusterVersionAvailable := func() bool {
c.log.Infof("Checking cluster version operator availability status")
co, err := c.kc.GetClusterVersion("version")
Expand Down Expand Up @@ -801,7 +804,7 @@ func (c controller) waitingForClusterVersion() error {
return false
}

err := utils.WaitForPredicate(WaitTimeout, GeneralProgressUpdateInt, isClusterVersionAvailable)
err := utils.WaitForPredicateWithContext(ctx, WaitTimeout, GeneralProgressUpdateInt, isClusterVersionAvailable)
if err != nil {
return errors.Errorf("Timeout while waiting for cluster version to be available")
}
Expand Down
16 changes: 12 additions & 4 deletions src/main/assisted-installer-controller/assisted_installer_main.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,11 @@ func main() {
ctxApprove, cancelApprove := context.WithCancel(context.Background())
go assistedController.ApproveCsrs(ctxApprove, &wg)
wg.Add(1)
go assistedController.PostInstallConfigs(&wg, &status)

ctxOthers, cancelOthers := context.WithCancel(context.Background())
go assistedController.PostInstallConfigs(ctxOthers, &wg, &status)
wg.Add(1)
go assistedController.UpdateBMHs(&wg)
go assistedController.UpdateBMHs(ctxOthers, &wg)
wg.Add(1)
go assistedController.HackDNSAddressConflict(&wg)
wg.Add(1)
Expand All @@ -76,13 +78,19 @@ func main() {
wgLogs.Add(1)

assistedController.SetReadyState()
assistedController.WaitAndUpdateNodesStatus(&status)
err = assistedController.WaitAndUpdateNodesStatus(&status)
logger.Infof("Sleeping for 10 minutes to give a chance to approve all csrs")
time.Sleep(10 * time.Minute)
if err == nil {
time.Sleep(10 * time.Minute)
} else {
cancelOthers()
}
cancelApprove()


logger.Infof("Waiting for all go routines to finish")
wg.Wait()
// TODO verify if canceled and cancel logs without waiting for error
if !status.HasError() {
//with error the logs are canceled within UploadLogs
logger.Infof("closing logs...")
Expand Down
6 changes: 6 additions & 0 deletions src/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,17 @@ func GetHostIpsFromInventory(inventory *models.Inventory) ([]string, error) {
}

func WaitForPredicate(timeout time.Duration, interval time.Duration, predicate func() bool) error {
return WaitForPredicateWithContext(context.TODO(), timeout, interval, predicate)
}

func WaitForPredicateWithContext(ctx context.Context, timeout time.Duration, interval time.Duration, predicate func() bool) error {
timeoutAfter := time.After(timeout)
ticker := time.NewTicker(interval)
// Keep trying until we're time out or get true
for {
select {
case <- ctx.Done():
return errors.Errorf("Cancelled")
// Got a timeout! fail with a timeout error
case <-timeoutAfter:
return errors.New("timed out")
Expand Down