forked from outerbounds/terraform-aws-metaflow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
batch.tf
92 lines (76 loc) · 3.79 KB
/
batch.tf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
resource "aws_batch_compute_environment" "this" {
/* Unique name for compute environment.
We use compute_environment_name_prefix opposed to just compute_environment_name as batch compute environments must
be created and destroyed, never edited. This way when we go to make a "modification" we will stand up a new
batch compute environment with a new unique name and once that succeeds, the old one will be torn down. If we had
just used compute_environment_name, then there would be a conflict when we went to stand up the new
compute_environment that had the modifications applied and the process would fail.
*/
compute_environment_name_prefix = local.compute_env_prefix_name
# Give permissions so the batch service can make API calls.
service_role = aws_iam_role.batch_execution_role.arn
type = "MANAGED"
# On destroy, this avoids removing these policies below until compute environments are destroyed
depends_on = [
aws_iam_role_policy.grant_iam_pass_role,
aws_iam_role_policy.grant_custom_access_policy,
aws_iam_role_policy.grant_iam_custom_policies,
aws_iam_role_policy.grant_ec2_custom_policies,
]
compute_resources {
# Give permissions so the ECS container instances can make API call.
instance_role = !local.enable_fargate_on_batch ? aws_iam_instance_profile.ecs_instance_role.arn : null
# List of types that can be launched.
instance_type = !local.enable_fargate_on_batch ? var.compute_environment_instance_types : null
# Range of number of CPUs.
max_vcpus = var.compute_environment_max_vcpus
min_vcpus = !local.enable_fargate_on_batch ? var.compute_environment_min_vcpus : null
desired_vcpus = !local.enable_fargate_on_batch ? var.compute_environment_desired_vcpus : null
# Prefers cheap vCPU approaches
allocation_strategy = !local.enable_fargate_on_batch ? "BEST_FIT" : null
/* Links to a launch template who has more than the standard 8GB of disk space. So we can download training data.
Always uses the "default version", which means we can update the Launch Template to a smaller or larger disk size
and this compute environment will not have to be destroyed and then created to point to a new Launch Template.
*/
dynamic "launch_template" {
for_each = aws_launch_template.cpu
content {
launch_template_id = launch_template.value.id
version = launch_template.value.latest_version
}
}
# Security group to apply to the instances launched.
security_group_ids = [
aws_security_group.this.id,
]
# Which subnet to launch the instances into.
subnets = [
var.subnet1_id,
var.subnet2_id
]
spot_iam_fleet_role = local.enable_spot_on_batch ? aws_iam_role.spot_fleet_role[0].arn : null
bid_percentage = local.enable_spot_on_batch ? var.compute_environment_spot_bid_percentage : null
# Type of instance Amazon EC2 for on-demand. Can use "SPOT" to use unused instances at discount if available
type = upper(var.batch_type)
tags = var.standard_tags
}
tags = var.standard_tags
lifecycle {
/* From here https://github.com/terraform-providers/terraform-provider-aws/issues/11077#issuecomment-560416740
helps with "modifying" batch compute environments which requires creating new ones and deleting old ones
as no inplace modification can be made
*/
create_before_destroy = true
# To ensure terraform redeploys do not silently overwrite an up to date desired_vcpus that metaflow may modify
ignore_changes = [compute_resources.0.desired_vcpus]
}
}
resource "aws_batch_job_queue" "this" {
name = local.batch_queue_name
state = "ENABLED"
priority = 1
compute_environments = [
aws_batch_compute_environment.this.arn
]
tags = var.standard_tags
}