Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: manage OCI instance pools #154

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/cloud_instance_manager.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { InstanceGroup } from './instance_group';
import { Context } from './context';
import { CloudRetryStrategy } from './cloud_manager';
import { InstanceState } from './instance_tracker';

export interface CloudInstance {
instanceId: string;
Expand All @@ -12,7 +13,7 @@ export interface CloudInstanceManager {
launchInstances(
ctx: Context,
group: InstanceGroup,
groupCurrentCount: number,
currentInventory: InstanceState[],
quantity: number,
): Promise<Array<string | boolean>>;

Expand All @@ -33,7 +34,7 @@ export abstract class AbstractCloudInstanceManager implements CloudInstanceManag
async launchInstances(
ctx: Context,
group: InstanceGroup,
groupCurrentCount: number,
currentInventory: InstanceState[],
quantity: number,
): Promise<Array<string | boolean>> {
ctx.logger.info(`[CloudInstanceManager] Launching a batch of ${quantity} instances in group ${group.name}`);
Expand Down
15 changes: 15 additions & 0 deletions src/cloud_instance_manager_selector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ import CustomInstanceManager from './custom_instance_manager';
import NomadInstanceManager from './nomad_instance_manager';
import DigitalOceanInstanceManager from './digital_ocean_instance_manager';
import { CloudInstanceManager } from './cloud_instance_manager';
import OracleInstancePoolManager from './oracle_instance_pool_manager';
import { InstanceTracker } from './instance_tracker';

export interface CloudInstanceManagerSelectorOptions {
cloudProviders: string[];
instanceTracker: InstanceTracker;
isDryRun: boolean;
ociConfigurationFilePath: string;
ociConfigurationProfile: string;
Expand All @@ -19,6 +22,7 @@ export interface CloudInstanceManagerSelectorOptions {

export class CloudInstanceManagerSelector {
private oracleInstanceManager: OracleInstanceManager;
private oracleInstancePoolManager: OracleInstancePoolManager;
private digitalOceanInstanceManager: DigitalOceanInstanceManager;
private customInstanceManager: CustomInstanceManager;
private nomadInstanceManager: NomadInstanceManager;
Expand All @@ -32,6 +36,15 @@ export class CloudInstanceManagerSelector {
});
}

if (options.cloudProviders.includes('oraclepool')) {
this.oracleInstancePoolManager = new OracleInstancePoolManager({
isDryRun: options.isDryRun,
instanceTracker: options.instanceTracker,
ociConfigurationFilePath: options.ociConfigurationFilePath,
ociConfigurationProfile: options.ociConfigurationProfile,
});
}

if (options.cloudProviders.includes('custom')) {
this.customInstanceManager = new CustomInstanceManager({
isDryRun: options.isDryRun,
Expand All @@ -57,6 +70,8 @@ export class CloudInstanceManagerSelector {
switch (cloud) {
case 'oracle':
return this.oracleInstanceManager;
case 'oraclepool':
return this.oracleInstancePoolManager;
case 'digitalocean':
return this.digitalOceanInstanceManager;
case 'nomad':
Expand Down
4 changes: 2 additions & 2 deletions src/cloud_manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ export default class CloudManager {
async scaleUp(
ctx: Context,
group: InstanceGroup,
groupCurrentCount: number,
currentInventory: InstanceState[],
quantity: number,
isScaleDownProtected: boolean,
): Promise<number> {
Expand All @@ -91,7 +91,7 @@ export default class CloudManager {
return 0;
}

const scaleUpResult = await instanceManager.launchInstances(ctx, group, groupCurrentCount, quantity);
const scaleUpResult = await instanceManager.launchInstances(ctx, group, currentInventory, quantity);

let scaleUpCount = 0;
await Promise.all(
Expand Down
3 changes: 2 additions & 1 deletion src/custom_instance_manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { execFile } from 'child_process';
import { InstanceGroup } from './instance_group';
import { Context } from './context';
import { AbstractCloudInstanceManager } from './cloud_instance_manager';
import { InstanceState } from './instance_tracker';

export interface CustomInstanceManagerOptions {
isDryRun: boolean;
Expand All @@ -27,7 +28,7 @@ export default class CustomInstanceManager extends AbstractCloudInstanceManager
async launchInstances(
ctx: Context,
group: InstanceGroup,
groupCurrentCount: number,
currentInventory: InstanceState[],
quantity: number,
): Promise<Array<string | boolean>> {
ctx.logger.info(`[custom] Launching a batch of ${quantity} instances in group ${group.name}`);
Expand Down
2 changes: 1 addition & 1 deletion src/instance_launcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ export default class InstanceLauncher {
const scaleUpCount = await this.cloudManager.scaleUp(
ctx,
group,
count,
currentInventory,
actualScaleUpQuantity,
scaleDownProtected,
);
Expand Down
4 changes: 3 additions & 1 deletion src/oracle_instance_manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { ResourceSearchClient } from 'oci-resourcesearch';
import * as resourceSearch from 'oci-resourcesearch';
import { CloudRetryStrategy } from './cloud_manager';
import { AbstractCloudInstanceManager, CloudInstanceManager, CloudInstance } from './cloud_instance_manager';
import { InstanceState } from './instance_tracker';

interface FaultDomainMap {
[key: string]: string[];
Expand Down Expand Up @@ -43,7 +44,7 @@ export default class OracleInstanceManager implements CloudInstanceManager {
async launchInstances(
ctx: Context,
group: InstanceGroup,
groupCurrentCount: number,
currentInventory: InstanceState[],
quantity: number,
): Promise<Array<string | boolean>> {
ctx.logger.info(`[oracle] Launching a batch of ${quantity} instances in group ${group.name}`);
Expand All @@ -57,6 +58,7 @@ export default class OracleInstanceManager implements CloudInstanceManager {
for (let i = 0; i < quantity; i++) {
indexes.push(i);
}
const groupCurrentCount = currentInventory.length;

const result = await Promise.all(
indexes.map(async (index) => {
Expand Down
230 changes: 230 additions & 0 deletions src/oracle_instance_pool_manager.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
import core = require('oci-core');
import common = require('oci-common');
import { InstanceGroup } from './instance_group';
import { Context } from './context';
import { CloudRetryStrategy } from './cloud_manager';
import { CloudInstanceManager, CloudInstance } from './cloud_instance_manager';
import { workrequests } from 'oci-sdk';
import { InstanceState, InstanceTracker } from './instance_tracker';

const maxLaunchTimeInSeconds = 30; // The duration for waiter configuration before failing. Currently set to 30 seconds
const launchDelayInSeconds = 5; // The max delay for the waiter configuration. Currently set to 10 seconds

const maxDetachTimeInSeconds = 180; // The duration for waiter configuration before failing. Currently set to 180 seconds
const maxDetachDelayInSeconds = 30; // The max delay for the waiter configuration. Currently set to 30 seconds

const launchWaiterConfiguration: common.WaiterConfiguration = {
terminationStrategy: new common.MaxTimeTerminationStrategy(maxLaunchTimeInSeconds),
delayStrategy: new common.FixedTimeDelayStrategy(launchDelayInSeconds),
};

const detachWaiterConfiguration: common.WaiterConfiguration = {
terminationStrategy: new common.MaxTimeTerminationStrategy(maxDetachTimeInSeconds),
delayStrategy: new common.ExponentialBackoffDelayStrategy(maxDetachDelayInSeconds),
};

export interface OracleInstancePoolManagerOptions {
isDryRun: boolean;
instanceTracker: InstanceTracker;
ociConfigurationFilePath: string;
ociConfigurationProfile: string;
}

export default class OracleInstancePoolManager implements CloudInstanceManager {
private instanceTracker: InstanceTracker;
private isDryRun: boolean;
private provider: common.ConfigFileAuthenticationDetailsProvider;
private computeManagementClient: core.ComputeManagementClient;
private workRequestClient: workrequests.WorkRequestClient;

constructor(options: OracleInstancePoolManagerOptions) {
this.isDryRun = options.isDryRun;
this.instanceTracker = options.instanceTracker;
this.provider = new common.ConfigFileAuthenticationDetailsProvider(
options.ociConfigurationFilePath,
options.ociConfigurationProfile,
);
this.computeManagementClient = new core.ComputeManagementClient({
authenticationDetailsProvider: this.provider,
});
this.workRequestClient = new workrequests.WorkRequestClient({
authenticationDetailsProvider: this.provider,
});

this.launchInstances = this.launchInstances.bind(this);
}

setComputeManagementClient(client: core.ComputeManagementClient) {
this.computeManagementClient = client;
}

getComputeManagementClient() {
return this.computeManagementClient;
}

async detachInstance(ctx: Context, group: InstanceGroup, instance: string): Promise<void> {
ctx.logger.info(`[oraclepool] Detaching instance ${instance}`);
this.computeManagementClient.regionId = group.region;

const cwaiter = this.computeManagementClient.createWaiters(this.workRequestClient, detachWaiterConfiguration);
const response = await cwaiter.forDetachInstancePoolInstance({
instancePoolId: group.instanceConfigurationId,
detachInstancePoolInstanceDetails: { instanceId: instance },
});
ctx.logger.info(`[oraclepool] Finished detaching instance ${instance}`, { response });
}

async launchInstances(
ctx: Context,
group: InstanceGroup,
currentInventory: InstanceState[],
quantity: number,
): Promise<Array<string | boolean>> {
ctx.logger.info(`[oraclepool] Launching a batch of ${quantity} instances in group ${group.name}`);

const result = <string[]>[];

this.computeManagementClient.regionId = group.region;
const poolDetails = await this.computeManagementClient.getInstancePool({
instancePoolId: group.instanceConfigurationId,
});

ctx.logger.debug(`[oraclepool] Instance Pool Details for group ${group.name}`, { poolDetails });

const poolInstances = await this.computeManagementClient.listInstancePoolInstances({
compartmentId: group.compartmentId,
instancePoolId: group.instanceConfigurationId,
});

const existingInstanceIds = poolInstances.items.map((instance) => {
return instance.id;
});

const fullInventory = await this.instanceTracker.trimCurrent(ctx, group.name, false);

const currentInstanceIds = currentInventory.map((instance) => {
return instance.instanceId;
});

const shuttingDownInstances = fullInventory
.filter((instance) => {
return !currentInstanceIds.includes(instance.instanceId);
})
.map((instance) => {
return instance.instanceId;
});

// mark any instances not previously seen as being launched now
result.push(
...existingInstanceIds.filter((instanceId) => {
return !shuttingDownInstances.includes(instanceId) && !currentInstanceIds.includes(instanceId);
}),
);

ctx.logger.debug(`[oraclepool] Instance pool ${group.name} instances`, { instances: poolInstances.items });
if (result.length > 0) {
ctx.logger.warn(`[oraclepool] Found instances in pool not in inventory, marking as launched now`, {
result,
});
}

// always use the group desired count + shutting down count for instance pools
const newSize = group.scalingOptions.desiredCount + shuttingDownInstances.length;
if (newSize == poolDetails.instancePool.size) {
// underlying pool size matches the desired count, so no need to update group
ctx.logger.info(`[oraclepool] Instance pool ${group.name} size matches desired count, no changes needed`, {
newSize,
});
return result;
}

// never scale down via size, always do so by detaching instances on shutdown confirmation
if (newSize < poolDetails.instancePool.size) {
// underlying pool size would shrink with new size, so waiting for instances to be detached after confirming shutdown
ctx.logger.warn(`[oraclepool] Instance pool ${group.name} size would shrink, no changes applied`, {
size: poolDetails.instancePool.size,
newSize,
});
return result;
}

if (this.isDryRun) {
ctx.logger.info(`[oraclepool] Dry run enabled, instance pool size change skipped`, { newSize });
} else {
const updateResult = await this.computeManagementClient.updateInstancePool({
instancePoolId: group.instanceConfigurationId,
updateInstancePoolDetails: {
size: newSize,
},
});

ctx.logger.info(`[oraclepool] Updated instance pool size for group ${group.name}`, { updateResult });
}

this.workRequestClient.regionId = group.region;
const cwaiter = this.computeManagementClient.createWaiters(this.workRequestClient, launchWaiterConfiguration);
try {
const runningPool = await cwaiter.forInstancePool(
{
instancePoolId: group.instanceConfigurationId,
},
core.models.InstancePool.LifecycleState.Running,
);

ctx.logger.info(`[oraclepool] Instance pool for ${group.name} back in running state`, { runningPool });

if (runningPool.instancePool.size == newSize) {
ctx.logger.debug(`[oraclepool] Instance pool ${group.name} size matches new size`, {
newSize,
});
} else {
ctx.logger.error(`[oraclepool] Instance pool ${group.name} size DOES NOT matches new size`, {
newSize,
});
}
} catch (err) {
ctx.logger.error(`[oraclepool] Instance pool for ${group.name} failed to return to running state`, { err });
// the next launch job will eventually see the new instances and return them
}

ctx.logger.debug(`[oraclepool] Instance pool ${group.name} listing pool instances`);

const newPoolInstances = await this.computeManagementClient.listInstancePoolInstances({
compartmentId: group.compartmentId,
instancePoolId: group.instanceConfigurationId,
});

result.push(
...newPoolInstances.items
.map((instance) => {
return instance.id;
})
.filter((instanceId) => {
return !existingInstanceIds.includes(instanceId);
}),
);

ctx.logger.info(`[oraclepool] Finished launching all the instances in group ${group.name}`, { result });

return result;
}

async getInstances(ctx: Context, group: InstanceGroup, _: CloudRetryStrategy): Promise<Array<CloudInstance>> {
const computeManagementClient = this.computeManagementClient;
computeManagementClient.regionId = group.region;

const poolInstances = await computeManagementClient.listInstancePoolInstances({
compartmentId: group.compartmentId,
instancePoolId: group.instanceConfigurationId,
});

return poolInstances.items.map((instance) => {
ctx.logger.debug('[oraclepool] Found instance in oracle pool', { instance });
return {
instanceId: instance.id,
displayName: instance.displayName,
cloudStatus: instance.state,
};
});
}
}
Loading
Loading