Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added initial defaults objects in first step to oop #64

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 27 additions & 58 deletions google_cloud_automlops/orchestration/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@
import docstring_parser

from google_cloud_automlops.utils.utils import (
get_defaults,
get_function_source_definition,
read_yaml_file
)
from google_cloud_automlops.utils.constants import (
BASE_DIR,
DEFAULT_PIPELINE_NAME,
GENERATED_DEFAULTS_FILE
)
from google_cloud_automlops.utils.enums import Parameter

T = TypeVar('T')

Expand Down Expand Up @@ -78,10 +78,7 @@ def __init__(self,
self.src_code = get_function_source_definition(self.func)

# Instantiate attributes to be set during build
self.artifact_repo_location = None
self.artifact_repo_name = None
self.project_id = None
self.naming_prefix = None
self.defaults = None

def build(self):
"""Instantiates an abstract built method to create and write task files. Also reads in
Expand All @@ -90,14 +87,7 @@ def build(self):
Raises:
NotImplementedError: The subclass has not defined the `build` method.
"""

defaults = read_yaml_file(GENERATED_DEFAULTS_FILE)
self.artifact_repo_location = defaults['gcp']['artifact_repo_location']
self.artifact_repo_name = defaults['gcp']['artifact_repo_name']
self.project_id = defaults['gcp']['project_id']
self.naming_prefix = defaults['gcp']['naming_prefix']

raise NotImplementedError('Subclass needs to define this.')
self.defaults = get_defaults()

def _get_function_return_types(self) -> list:
"""Returns a formatted list of function return types.
Expand All @@ -124,14 +114,15 @@ def _get_function_return_types(self) -> list:
if not (hasattr(annotation,'__annotations__') and isinstance(annotation.__annotations__, dict)):
raise TypeError(f'''Return type hint for function "{self.name}" must be a NamedTuple.''')

# Creates a dictionary of metadata for each object returned by component
outputs = []
# Creates a parameter object for each parameter returned by component
outputs: List[Parameter] = []
for name, type_ in annotation.__annotations__.items():
metadata = {}
metadata['name'] = name
metadata['type'] = type_
metadata['description'] = None
outputs.append(metadata)
p = Parameter(
name=name,
type=type_,
description=None
)
outputs.append(p)
return outputs

def _get_function_parameters(self) -> list:
Expand All @@ -150,18 +141,19 @@ def _get_function_parameters(self) -> list:
doc_dict = {p.arg_name: p.description for p in parsed_docstring.params}

# Extract parameter metadata
parameter_holder = []
parameter_holder: List[Parameter] = []
for param in parameters:
metadata = {}
metadata['name'] = param.name
metadata['description'] = doc_dict.get(param.name)
metadata['type'] = self.maybe_strip_optional_from_annotation(
param.annotation)
parameter_holder.append(metadata)
p = Parameter(
name=param.name,
type=self.maybe_strip_optional_from_annotation(
param.annotation),
description=doc_dict.get(param.name)
)
parameter_holder.append(p)
# pylint: disable=protected-access
if metadata['type'] == inspect._empty:
if p.type == inspect._empty:
raise TypeError(
f'''Missing type hint for parameter "{metadata['name']}". '''
f'''Missing type hint for parameter "{p.name}". '''
f'''Please specify the type for this parameter.''')
return parameter_holder

Expand Down Expand Up @@ -216,14 +208,9 @@ def __init__(self,
self.comps = self.get_pipeline_components(func, comps_dict)

# Instantiate attributes to be set at build process
self.base_image = None
self.defaults = None
self.custom_training_job_specs = None
self.pipeline_params = None
self.pubsub_topic_name = None
self.use_ci = None
self.project_id = None
self.gs_pipeline_job_spec_path = None
self.setup_model_monitoring = None

def build(self,
pipeline_params: dict,
Expand All @@ -249,15 +236,7 @@ def build(self,
self.pipeline_params = pipeline_params

# Extract additional attributes from defaults file
defaults = read_yaml_file(GENERATED_DEFAULTS_FILE)
self.project_id = defaults['gcp']['project_id']
self.gs_pipeline_job_spec_path = defaults['pipelines']['gs_pipeline_job_spec_path']
self.base_image = defaults['gcp']['base_image']
self.pubsub_topic_name = defaults['gcp']['pubsub_topic_name']
self.use_ci = defaults['tooling']['use_ci']
self.setup_model_monitoring = defaults['gcp']['setup_model_monitoring']

raise NotImplementedError('Subclass needs to define this.')
self.defaults = get_defaults()

def get_pipeline_components(self,
pipeline_func: Callable,
Expand Down Expand Up @@ -301,12 +280,7 @@ class BaseServices():
def __init__(self) -> None:
"""Instantiates a generic Services object.
"""
self.pipeline_storage_path = None
self.pipeline_job_runner_service_account = None
self.pipeline_job_submission_service_type = None
self.project_id = None
self.pipeline_job_submission_service_type = None
self.setup_model_monitoring = None
self.defaults = None

# Set directory for files to be written to
self.submission_service_base_dir = BASE_DIR + 'services/submission_service'
Expand All @@ -325,12 +299,7 @@ def build(self):
requirements.txt
"""
# Extract additional attributes from defaults file
defaults = read_yaml_file(GENERATED_DEFAULTS_FILE)
self.pipeline_storage_path = defaults['pipelines']['pipeline_storage_path']
self.pipeline_job_runner_service_account = defaults['gcp']['pipeline_job_runner_service_account']
self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type']
self.project_id = defaults['gcp']['project_id']
self.setup_model_monitoring = defaults['gcp']['setup_model_monitoring']
self.defaults = get_defaults()

# Set directory for files to be written to
self.submission_service_base_dir = BASE_DIR + 'services/submission_service'
Expand All @@ -339,7 +308,7 @@ def build(self):
self._build_submission_services()

# Setup model monitoring
if self.setup_model_monitoring:
if self.defaults.gcp.setup_model_monitoring:
self._build_monitoring()

def _build_monitoring(self):
Expand Down
51 changes: 20 additions & 31 deletions google_cloud_automlops/orchestration/kfp.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,7 @@ class KFPComponent(BaseComponent):
def build(self):
"""Constructs files for running and managing Kubeflow pipelines.
"""
defaults = read_yaml_file(GENERATED_DEFAULTS_FILE)
self.artifact_repo_location = defaults['gcp']['artifact_repo_location']
self.artifact_repo_name = defaults['gcp']['artifact_repo_name']
self.project_id = defaults['gcp']['project_id']
self.naming_prefix = defaults['gcp']['naming_prefix']
super().build()

# Set and create directory for components if it does not already exist
component_dir = BASE_DIR + 'components/' + self.name
Expand All @@ -91,10 +87,10 @@ def build(self):
BASE_DIR + 'components/component_base/src/'])

compspec_image = (
f'''{self.artifact_repo_location}-docker.pkg.dev/'''
f'''{self.project_id}/'''
f'''{self.artifact_repo_name}/'''
f'''{self.naming_prefix}/'''
f'''{self.defaults.gcp.artifact_repo_location.value}-docker.pkg.dev/'''
f'''{self.defaults.gcp.project_id}/'''
f'''{self.defaults.gcp.artifact_repo_name}/'''
f'''{self.defaults.gcp.naming_prefix}/'''
f'''components/component_base:latest''')

# Write component spec
Expand Down Expand Up @@ -192,19 +188,12 @@ def build(self,
to None.

"""
super().build(pipeline_params, custom_training_job_specs)

# Save parameters as attributes
self.custom_training_job_specs = custom_training_job_specs
self.pipeline_params = pipeline_params

# Extract additional attributes from defaults file
defaults = read_yaml_file(GENERATED_DEFAULTS_FILE)
self.project_id = defaults['gcp']['project_id']
self.gs_pipeline_job_spec_path = defaults['pipelines']['gs_pipeline_job_spec_path']
self.base_image = defaults['gcp']['base_image']
self.use_ci = defaults['tooling']['use_ci']
self.pubsub_topic_name = defaults['gcp']['pubsub_topic_name'] if self.use_ci else None
self.setup_model_monitoring = defaults['gcp']['setup_model_monitoring']

# Build necessary folders
make_dirs([
f'{BASE_DIR}scripts/pipeline_spec/',
Expand All @@ -217,16 +206,16 @@ def build(self,
filepath=f'{BASE_DIR}README.md',
text=render_jinja(
template_path=import_files(KFP_TEMPLATES_PATH) / 'README.md.j2',
setup_model_monitoring=self.setup_model_monitoring,
use_ci=self.use_ci),
setup_model_monitoring=self.defaults.gcp.setup_model_monitoring,
use_ci=self.defaults.tooling.use_ci),
mode='w')

# components/component_base/dockerfile: Write the component base Dockerfile
write_file(
filepath=f'{GENERATED_COMPONENT_BASE}/Dockerfile',
text=render_jinja(
template_path=import_files(KFP_TEMPLATES_PATH + '.components.component_base') / 'Dockerfile.j2',
base_image=self.base_image,
base_image=self.defaults.gcp.base_image,
generated_license=GENERATED_LICENSE),
mode='w')

Expand Down Expand Up @@ -278,15 +267,15 @@ def build(self,
base_dir=BASE_DIR))

# scripts/publish_to_topic.sh: If using CI, write script for publishing to pubsub topic
if self.use_ci:
if self.defaults.tooling.use_ci:
write_and_chmod(
filepath=GENERATED_PUBLISH_TO_TOPIC_FILE,
text=render_jinja(
template_path=scripts_template_path / 'publish_to_topic.sh.j2',
base_dir=BASE_DIR,
generated_license=GENERATED_LICENSE,
generated_parameter_values_path=GENERATED_PARAMETER_VALUES_PATH,
pubsub_topic_name=self.pubsub_topic_name))
pubsub_topic_name=self.defaults.gcp.pubsub_topic_name))

# pipelines/pipeline.py: Generates a Kubeflow pipeline spec from custom components.
components_list = self._get_component_list()
Expand All @@ -299,7 +288,7 @@ def build(self,
custom_training_job_specs=self.custom_training_job_specs,
generated_license=GENERATED_LICENSE,
pipeline_scaffold_contents=pipeline_scaffold_contents,
project_id=self.project_id),
project_id=self.defaults.gcp.project_id),
mode='w')

# pipelines/pipeline_runner.py: Sends a PipelineJob to Vertex AI using pipeline spec.
Expand All @@ -319,7 +308,7 @@ def build(self,
mode='w')

# pipelines/runtime_parameters/pipeline_parameter_values.json: Provides runtime parameters for the PipelineJob.
self.pipeline_params['gs_pipeline_spec_path'] = self.gs_pipeline_job_spec_path
self.pipeline_params['gs_pipeline_spec_path'] = self.defaults.pipeline_specs.gs_pipeline_job_spec_path
serialized_params = json.dumps(self.pipeline_params, indent=4)
write_file(BASE_DIR + GENERATED_PARAMETER_VALUES_PATH, serialized_params, 'w')

Expand Down Expand Up @@ -483,19 +472,19 @@ def _build_submission_services(self):
render_jinja(
template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'requirements.txt.j2',
pinned_kfp_version=PINNED_KFP_VERSION,
pipeline_job_submission_service_type=self.pipeline_job_submission_service_type),
pipeline_job_submission_service_type=self.defaults.gcp.pipeline_job_submission_service_type),
'w')

write_file(
f'{self.submission_service_base_dir}/main.py',
render_jinja(
template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'main.py.j2',
generated_license=GENERATED_LICENSE,
pipeline_root=self.pipeline_storage_path,
pipeline_job_runner_service_account=self.pipeline_job_runner_service_account,
pipeline_job_submission_service_type=self.pipeline_job_submission_service_type,
project_id=self.project_id,
setup_model_monitoring=self.setup_model_monitoring),
pipeline_root=self.defaults.pipeline_specs.pipeline_storage_path,
pipeline_job_runner_service_account=self.defaults.gcp.pipeline_job_runner_service_account,
pipeline_job_submission_service_type=self.defaults.gcp.pipeline_job_submission_service_type,
project_id=self.defaults.gcp.project_id,
setup_model_monitoring=self.defaults.gcp.setup_model_monitoring),
'w')

write_file(
Expand Down
Loading
Loading