-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
214 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,9 @@ | ||
from .object_storage import AwsObjectStorageController | ||
from .spark import AwsSparkController, GlueComponent, GlueComponentArgs | ||
|
||
all = [ | ||
AwsObjectStorageController, | ||
AwsSparkController, | ||
GlueComponent, | ||
GlueComponentArgs, | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
import json | ||
import logging | ||
from dataclasses import dataclass | ||
from typing import Optional | ||
from functools import cache | ||
|
||
import boto3 | ||
|
||
import pulumi_aws as aws | ||
from pulumi import Resource as PulumiResource | ||
from pulumi import ComponentResource as PulumiComponentResource | ||
from pulumi import ResourceOptions | ||
|
||
# TODO: The following import will be moved to a separated framework | ||
from damavand.sparkle.data_reader import DataReader | ||
from damavand.sparkle.data_writer import DataWriter | ||
|
||
from damavand.controllers import SparkController | ||
from damavand.controllers.base_controller import buildtime | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class AwsSparkController(SparkController): | ||
def __init__( | ||
self, | ||
name, | ||
region: str, | ||
reader: DataReader, | ||
writer: DataWriter, | ||
id_: Optional[str] = None, | ||
tags: dict[str, str] = {}, | ||
**kwargs, | ||
) -> None: | ||
super().__init__(name, reader, writer, id_, tags, **kwargs) | ||
self.__glue_client = boto3.client("glue", region_name=region) | ||
|
||
@buildtime | ||
@cache | ||
def resource(self) -> PulumiResource: | ||
return GlueComponent( | ||
name=self.name, | ||
) | ||
|
||
|
||
@dataclass | ||
class GlueComponentArgs: | ||
role: Optional[aws.iam.Role] = None | ||
code_repository_bucket: Optional[aws.s3.BucketV2] = None | ||
|
||
|
||
class GlueComponent(PulumiComponentResource): | ||
@staticmethod | ||
def assume_policy() -> dict: | ||
"""Return the assume role policy for Glue jobs.""" | ||
|
||
return { | ||
"Version": "2012-10-17", | ||
"Statement": [ | ||
{ | ||
"Effect": "Allow", | ||
"Principal": { | ||
"Service": "glue.amazonaws.com", | ||
}, | ||
"Action": "sts:AssumeRole", | ||
}, | ||
], | ||
} | ||
|
||
@staticmethod | ||
def managed_policy_arns() -> list[str]: | ||
"""Return a list of managed policy ARNs that defines the permissions for Glue jobs.""" | ||
|
||
return [ | ||
aws.iam.ManagedPolicy.AWS_GLUE_SERVICE_ROLE, | ||
aws.iam.ManagedPolicy.AMAZON_S3_FULL_ACCESS, | ||
aws.iam.ManagedPolicy.CLOUD_TRAIL_FULL_ACCESS, | ||
] | ||
|
||
def __init__( | ||
self, | ||
name: str, | ||
args: GlueComponentArgs = GlueComponentArgs(), | ||
opts: Optional[ResourceOptions] = None, | ||
) -> None: | ||
super().__init__( | ||
f"Damavand:Spark:{GlueComponent.__name__}", | ||
name=f"{name}-glue-component", | ||
props={}, | ||
opts=opts, | ||
remote=False, | ||
) | ||
|
||
self.args = args | ||
self.code_repository_bucket | ||
self.iceberg_database | ||
self.jobs | ||
|
||
@property | ||
@cache | ||
def role(self) -> aws.iam.Role: | ||
"""Return an execution role for Glue jobs.""" | ||
|
||
return self.args.role or aws.iam.Role( | ||
resource_name=f"{self._name}-role", | ||
opts=ResourceOptions(parent=self), | ||
name=f"{self._name}-ExecutionRole", | ||
assume_role_policy=json.dumps(self.assume_policy()), | ||
managed_policy_arns=self.managed_policy_arns(), | ||
) | ||
|
||
@property | ||
@cache | ||
def code_repository_bucket(self) -> aws.s3.BucketV2: | ||
"""Return an S3 bucket for Glue jobs to host source codes.""" | ||
|
||
# NOTE: using `bucket_prefix` to avoid name conflict as the bucket name must be globally unique. | ||
return self.args.code_repository_bucket or aws.s3.BucketV2( | ||
resource_name=f"{self._name}-code-bucket", | ||
opts=ResourceOptions(parent=self), | ||
bucket_prefix=f"{self._name}-code-bucket", | ||
) | ||
|
||
@property | ||
@cache | ||
def iceberg_bucket(self) -> aws.s3.BucketV2: | ||
"""Return an S3 bucket for Iceberg tables to store data processed by Glue jobs.""" | ||
|
||
# NOTE: using `bucket_prefix` to avoid name conflict as the bucket name must be globally unique. | ||
return aws.s3.BucketV2( | ||
resource_name=f"{self._name}-bucket", | ||
opts=ResourceOptions(parent=self), | ||
bucket_prefix=f"{self._name}-bucket", | ||
) | ||
|
||
@property | ||
@cache | ||
def iceberg_database(self) -> aws.glue.CatalogDatabase: | ||
"""Return a Glue database for Iceberg tables to store data processed by Glue jobs.""" | ||
|
||
return aws.glue.CatalogDatabase( | ||
resource_name=f"{self._name}-database", | ||
opts=ResourceOptions(parent=self), | ||
name=f"{self._name}-database", | ||
location_uri=f"s3://{self.iceberg_bucket.bucket}/", | ||
) | ||
|
||
@property | ||
@cache | ||
def jobs(self) -> list[aws.glue.Job]: | ||
"""Return all the Glue jobs for the application.""" | ||
|
||
return [ | ||
aws.glue.Job( | ||
resource_name=f"{self._name}-job", | ||
opts=ResourceOptions(parent=self), | ||
name=f"{self._name}-job", | ||
role_arn=self.role.arn, | ||
) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,11 @@ | ||
from .base_controller import ApplicationController, runtime, buildtime | ||
from .object_storage import ObjectStorageController | ||
from .spark import SparkApplicationController | ||
from .spark import SparkController | ||
|
||
all = [ | ||
ApplicationController, | ||
ObjectStorageController, | ||
SparkApplicationController, | ||
SparkController, | ||
runtime, | ||
buildtime, | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
2 changes: 1 addition & 1 deletion
2
tests/resources/test_resource.py → ...ntrollers/test_application_controllers.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters