From 9b2cd8ea9c6b4ec8bd390be8c1adb9e6d6ea3cd2 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 30 Jan 2024 13:15:40 -0500 Subject: [PATCH 01/11] Added new files for component and pipeline objects, with KFP subclasses. Incorporated code out of scaffold.py, awaiting builder.py add ins --- .../orchestration/Component.py | 142 ++++++++++++++++++ .../orchestration/Pipeline.py | 53 +++++++ .../orchestration/kfp/KFPComponent.py | 133 ++++++++++++++++ .../orchestration/kfp/KFPPipeline.py | 92 ++++++++++++ 4 files changed, 420 insertions(+) create mode 100644 google_cloud_automlops/orchestration/Component.py create mode 100644 google_cloud_automlops/orchestration/Pipeline.py create mode 100644 google_cloud_automlops/orchestration/kfp/KFPComponent.py create mode 100644 google_cloud_automlops/orchestration/kfp/KFPPipeline.py diff --git a/google_cloud_automlops/orchestration/Component.py b/google_cloud_automlops/orchestration/Component.py new file mode 100644 index 0000000..bf7fd55 --- /dev/null +++ b/google_cloud_automlops/orchestration/Component.py @@ -0,0 +1,142 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Creates a generic component object.""" + +# pylint: disable=anomalous-backslash-in-string +# pylint: disable=C0103 +# pylint: disable=line-too-long + +import docstring_parser +import inspect +import itertools +import textwrap +from typing import Callable, List, Optional, TypeVar, Union +from google_cloud_automlops.utils.utils import get_function_source_definition + +T = TypeVar('T') + + +class Component: + + def __init__(self, + func: Optional[Callable] = None, + packages_to_install: Optional[List[str]] = None): + """Initiates a component object created out of a function holding + all necessary code. + + Args: + func: The python function to create a component from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + packages_to_install: A list of optional packages to install before + executing func. These will always be installed at component runtime. + + Raises: + ValueError: Confirms that the input is an existing function. + """ + + # Confirm the input is an existing function + if not inspect.isfunction(func): + raise ValueError(f"{func} must be of type function.") + + # Set attributes of the component function + self.func = func + self.name = func.__name__ + self.parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) + self.description = self.parsed_docstring.short_description + self.packages_to_install = [] if not packages_to_install else packages_to_install + + # Process and extract details from passed function + self.parameters = self._get_function_parameters() + self.return_types = self._get_function_return_types() + self.src_code = get_function_source_definition(self.func) + + def _get_function_return_types(self) -> list: + """Returns a formatted list of function return types. + + Returns: + list: return value list with types converted to kubeflow spec. + Raises: + Exception: If return type is provided and not a NamedTuple. + """ + # TODO: COMMENT + annotation = inspect.signature(self.func).return_annotation + if maybe_strip_optional_from_annotation(annotation) is not annotation: + raise TypeError('Return type cannot be Optional.') + + # No annotations provided + # pylint: disable=protected-access + if annotation == inspect._empty: + return None + + if not (hasattr(annotation,'__annotations__') and isinstance(annotation.__annotations__, dict)): + raise TypeError(f'''Return type hint for function "{self.name}" must be a NamedTuple.''') + + # TODO: COMMENT + outputs = [] + for name, type_ in annotation.__annotations__.items(): + metadata = {} + metadata['name'] = name + metadata['type'] = type_ + metadata['description'] = None + outputs.append(metadata) + return outputs + + def _get_function_parameters(self) -> list: + """Returns a formatted list of parameters. + + Returns: + list: Params list with types converted to kubeflow spec. + Raises: + Exception: If parameter type hints are not provided. + """ + #TODO: COMMENT? + signature = inspect.signature(self.func) + parameters = list(signature.parameters.values()) + parsed_docstring = docstring_parser.parse(inspect.getdoc(self.func)) + doc_dict = {p.arg_name: p.description for p in parsed_docstring.params} + + # Extract parameter metadata + parameter_holder = [] + for param in parameters: + metadata = {} + metadata['name'] = param.name + metadata['description'] = doc_dict.get(param.name) + metadata['type'] = maybe_strip_optional_from_annotation( + param.annotation) + parameter_holder.append(metadata) + # pylint: disable=protected-access + if metadata['type'] == inspect._empty: + raise TypeError( + f'''Missing type hint for parameter "{metadata['name']}". ''' + f'''Please specify the type for this parameter.''') + return parameter_holder + +def maybe_strip_optional_from_annotation(annotation: T) -> T: + """Strips 'Optional' from 'Optional[]' if applicable. + For example:: + Optional[str] -> str + str -> str + List[int] -> List[int] + Args: + annotation: The original type annotation which may or may not has `Optional`. + Returns: + The type inside Optional[] if Optional exists, otherwise the original type. + """ + if getattr(annotation, '__origin__', None) is Union and annotation.__args__[1] is type(None): + return annotation.__args__[0] + else: + return annotation diff --git a/google_cloud_automlops/orchestration/Pipeline.py b/google_cloud_automlops/orchestration/Pipeline.py new file mode 100644 index 0000000..272150e --- /dev/null +++ b/google_cloud_automlops/orchestration/Pipeline.py @@ -0,0 +1,53 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Creates a generic pipeline object.""" + +# pylint: disable=anomalous-backslash-in-string +# pylint: disable=C0103 +# pylint: disable=line-too-long + +from typing import Callable, Optional + +from google_cloud_automlops.utils.constants import DEFAULT_PIPELINE_NAME +from google_cloud_automlops.utils.utils import get_function_source_definition + + +class Pipeline(): + def __init__(self, + func: Optional[Callable] = None, + *, + name: Optional[str] = None, + description: Optional[str] = None): + """Initiates a pipeline object created out of a function holding + all necessary code. + + Args: + func: The python function to create a pipeline from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + name: The name of the pipeline. + description: Short description of what the pipeline does. + """ + self.func = func + self.func_name = func.__name__ + self.name = DEFAULT_PIPELINE_NAME if not name else name + self.description = description + self.src_code = get_function_source_definition(self.func) + +class FuturePipeline(): + def __init__(self, comps: list) -> None: + self.comps = comps + self.names = [comp.name for comp in self.comps] diff --git a/google_cloud_automlops/orchestration/kfp/KFPComponent.py b/google_cloud_automlops/orchestration/kfp/KFPComponent.py new file mode 100644 index 0000000..e1fec0f --- /dev/null +++ b/google_cloud_automlops/orchestration/kfp/KFPComponent.py @@ -0,0 +1,133 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Creates a KFP component object.""" + +# pylint: disable=anomalous-backslash-in-string +# pylint: disable=C0103 +# pylint: disable=line-too-long + +from typing import Callable, List, Optional +from google_cloud_automlops.orchestration.Component import Component + +from google_cloud_automlops.utils.constants import ( + PLACEHOLDER_IMAGE, + CACHE_DIR +) +from google_cloud_automlops.utils.utils import ( + make_dirs, + write_yaml_file +) + + +class KFPComponent(Component): + def __init__(self, + func: Optional[Callable] = None, + packages_to_install: Optional[List[str]] = None): + """Initiates a KFP component object created out of a function holding + all necessary code. + + Args: + func: The python function to create a component from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + packages_to_install: A list of optional packages to install before + executing func. These will always be installed at component runtime. + """ + super().__init__(func, packages_to_install) + self.parameters = update_params(self.parameters) + self.return_types = update_params(self.return_types) + self.packages_to_install_command = self._get_packages_to_install_command() + self.component_spec = self._create_component_spec() + + def build(self): + """Constructs files for running and managing Kubeflow pipelines. + """ + # Write component yaml + filename = CACHE_DIR + f'/{self.name}.yaml' + make_dirs([CACHE_DIR]) + write_yaml_file(filename, self.component_spec, 'w') + + def _get_packages_to_install_command(self): + """Returns a list of formatted list of commands, including code for tmp storage. + """ + newline = '\n' + concat_package_list = ' '.join([repr(str(package)) for package in self.packages_to_install]) + install_python_packages_script = ( + f'''if ! [ -x "$(command -v pip)" ]; then{newline}''' + f''' python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip{newline}''' + f'''fi{newline}''' + f'''PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \{newline}''' + f''' --no-warn-script-location {concat_package_list} && "$0" "$@"{newline}''' + f'''{newline}''') + return ['sh', '-c', install_python_packages_script, self.src_code] + + def _create_component_spec(self): + """Creates a tmp component scaffold which will be used by the formalize function. + Code is temporarily stored in component_spec['implementation']['container']['command']. + + Returns: + _type_: _description_ #TODO: FILL OUT + """ + # Instantiate component yaml attributes + component_spec = {} + component_spec['name'] = self.name + if self.description: + component_spec['description'] = self.description + outputs = self.return_types + if outputs: + component_spec['outputs'] = outputs + component_spec['inputs'] = self.parameters + component_spec['implementation'] = {} + component_spec['implementation']['container'] = {} + component_spec['implementation']['container']['image'] = PLACEHOLDER_IMAGE + component_spec['implementation']['container']['command'] = self.packages_to_install_command + component_spec['implementation']['container']['args'] = ['--executor_input', + {'executorInput': None}, + '--function_to_execute', + self.name] + return component_spec + +def update_params(params: list) -> list: + """Converts the parameter types from Python types + to Kubeflow types. Currently only supports + Python primitive types. + + Args: + params: Pipeline parameters. A list of dictionaries, + each param is a dict containing keys: + 'name': required, str param name. + 'type': required, python primitive type. + 'description': optional, str param desc. + Returns: + list: Params list with converted types. + Raises: + Exception: If an inputted type is not a primitive. + """ + python_kfp_types_mapper = { + int: 'Integer', + str: 'String', + float: 'Float', + bool: 'Bool', + list: 'JsonArray', + dict: 'JsonObject' + } + for param in params: + try: + param['type'] = python_kfp_types_mapper[param['type']] + except KeyError as err: + raise ValueError(f'Unsupported python type - we only support ' + f'primitive types at this time. {err}') from err + return params diff --git a/google_cloud_automlops/orchestration/kfp/KFPPipeline.py b/google_cloud_automlops/orchestration/kfp/KFPPipeline.py new file mode 100644 index 0000000..5ea2e12 --- /dev/null +++ b/google_cloud_automlops/orchestration/kfp/KFPPipeline.py @@ -0,0 +1,92 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Creates a KFP pipeline object.""" + +# pylint: disable=anomalous-backslash-in-string +# pylint: disable=C0103 +# pylint: disable=line-too-long + +from typing import Callable, Optional +from google_cloud_automlops.orchestration.Pipeline import Pipeline + +from google_cloud_automlops.utils.constants import ( + CACHE_DIR, + PIPELINE_CACHE_FILE +) +from google_cloud_automlops.utils.utils import ( + make_dirs, + write_file +) + + +class KFPPipeline(Pipeline): + def __init__(self, + func: Optional[Callable] = None, + *, + name: Optional[str] = None, + description: Optional[str] = None) -> None: + """Initiates a KFP pipeline object created out of a function holding + all necessary code. + + Args: + func: The python function to create a pipeline from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + name: The name of the pipeline. + description: Short description of what the pipeline does. + """ + super().__init__(func, name, description) + self.pipeline_scaffold = (self._get_pipeline_decorator() + + self.src_code + + self._get_compile_step()) + + def build(self): + """Constructs files for running and managing Kubeflow pipelines. + """ + make_dirs([CACHE_DIR]) # if it doesn't already exist + write_file(PIPELINE_CACHE_FILE, self.pipeline_scaffold, 'w') + + def _get_pipeline_decorator(self): + """Creates the kfp pipeline decorator. + + Args: + name: The name of the pipeline. + description: Short description of what the pipeline does. + + Returns: + str: Python compile function call. + """ + name_str = f'''(\n name='{self.name}',\n''' + desc_str = f''' description='{self.description}',\n''' if self.description else '' + ending_str = ')\n' + return '@dsl.pipeline' + name_str + desc_str + ending_str + + def _get_compile_step(self): + """Creates the compile function call. + + Args: + func_name: The name of the pipeline function. + + Returns: + str: Python compile function call. + """ + return ( + f'\n' + f'compiler.Compiler().compile(\n' + f' pipeline_func={self.func_name},\n' + f' package_path=pipeline_job_spec_path)\n' + f'\n' + ) From 27ad8edcafc36798616f906ac5c3d386fa8414af Mon Sep 17 00:00:00 2001 From: Allegra Noto Date: Wed, 14 Feb 2024 17:12:36 -0500 Subject: [PATCH 02/11] First pass at orchestration component, pipeline, and services objects. --- .../orchestration/Component.py | 40 ++- .../orchestration/Pipeline.py | 57 +++- .../orchestration/Services.py | 74 +++++ .../orchestration/configs.py | 39 --- google_cloud_automlops/orchestration/enums.py | 36 --- .../orchestration/kfp/KFPComponent.py | 83 +++++- .../orchestration/kfp/KFPPipeline.py | 281 +++++++++++++++++- .../orchestration/kfp/KFPServices.py | 85 ++++++ .../orchestration/kfp/scaffold.py | 253 ---------------- 9 files changed, 583 insertions(+), 365 deletions(-) create mode 100644 google_cloud_automlops/orchestration/Services.py delete mode 100644 google_cloud_automlops/orchestration/configs.py delete mode 100644 google_cloud_automlops/orchestration/enums.py create mode 100644 google_cloud_automlops/orchestration/kfp/KFPServices.py delete mode 100644 google_cloud_automlops/orchestration/kfp/scaffold.py diff --git a/google_cloud_automlops/orchestration/Component.py b/google_cloud_automlops/orchestration/Component.py index bf7fd55..bf91204 100644 --- a/google_cloud_automlops/orchestration/Component.py +++ b/google_cloud_automlops/orchestration/Component.py @@ -18,22 +18,31 @@ # pylint: disable=C0103 # pylint: disable=line-too-long +from abc import ABC, abstractmethod import docstring_parser import inspect -import itertools -import textwrap from typing import Callable, List, Optional, TypeVar, Union -from google_cloud_automlops.utils.utils import get_function_source_definition + +from google_cloud_automlops.utils.constants import GENERATED_DEFAULTS_FILE +from google_cloud_automlops.utils.utils import ( + get_function_source_definition, + read_yaml_file +) T = TypeVar('T') -class Component: +class Component(ABC): + """The Component object represents a component defined by the user. - def __init__(self, - func: Optional[Callable] = None, + Args: + ABC: Abstract class + """ + + def __init__(self, + func: Optional[Callable] = None, packages_to_install: Optional[List[str]] = None): - """Initiates a component object created out of a function holding + """Initiates a generic Component object created out of a function holding all necessary code. Args: @@ -52,18 +61,31 @@ def __init__(self, if not inspect.isfunction(func): raise ValueError(f"{func} must be of type function.") - # Set attributes of the component function + # Set simple attributes of the component function self.func = func self.name = func.__name__ + self.packages_to_install = [] if not packages_to_install else packages_to_install + + # Parse the docstring for description self.parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) self.description = self.parsed_docstring.short_description - self.packages_to_install = [] if not packages_to_install else packages_to_install # Process and extract details from passed function self.parameters = self._get_function_parameters() self.return_types = self._get_function_return_types() self.src_code = get_function_source_definition(self.func) + @abstractmethod + def build(self): + """Instantiates an abstract built method to create and write task files. Also + reads in defaults file to save default arguments to attributes. + """ + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + self.artifact_repo_location = defaults['gcp']['artifact_repo_location'] + self.artifact_repo_name = defaults['gcp']['artifact_repo_name'] + self.project_id = defaults['gcp']['project_id'] + self.naming_prefix = defaults['gcp']['naming_prefix'] + def _get_function_return_types(self) -> list: """Returns a formatted list of function return types. diff --git a/google_cloud_automlops/orchestration/Pipeline.py b/google_cloud_automlops/orchestration/Pipeline.py index 272150e..112c0bb 100644 --- a/google_cloud_automlops/orchestration/Pipeline.py +++ b/google_cloud_automlops/orchestration/Pipeline.py @@ -18,14 +18,27 @@ # pylint: disable=C0103 # pylint: disable=line-too-long +from abc import ABC, abstractmethod from typing import Callable, Optional -from google_cloud_automlops.utils.constants import DEFAULT_PIPELINE_NAME -from google_cloud_automlops.utils.utils import get_function_source_definition +from google_cloud_automlops.utils.constants import ( + DEFAULT_PIPELINE_NAME, + GENERATED_DEFAULTS_FILE +) +from google_cloud_automlops.utils.utils import ( + get_function_source_definition, + read_yaml_file +) -class Pipeline(): - def __init__(self, +class Pipeline(ABC): + """The Pipeline object represents a component defined by the user. + + Args: + ABC: Abstract class + """ + + def __init__(self, func: Optional[Callable] = None, *, name: Optional[str] = None, @@ -47,7 +60,43 @@ def __init__(self, self.description = description self.src_code = get_function_source_definition(self.func) + @abstractmethod + def build(self, + base_image, + custom_training_job_specs, + pipeline_params, + pubsub_topic_name, + use_ci): + """Instantiates an abstract built method to create and write pipeline files. Also + reads in defaults file to save default arguments to attributes. + + Files created must include: + 1. README.md + 2. Dockerfile + 3. Requirements.txt + + Args: + base_image (_type_): _description_ + custom_training_job_specs (_type_): _description_ + pipeline_params (_type_): _description_ + pubsub_topic_name (_type_): _description_ + use_ci (_type_): _description_ + """ + # Save parameters as attributes + self.base_image = base_image + self.custom_training_job_specs = custom_training_job_specs + self.pipeline_params = pipeline_params + self.pubsub_topic_name = pubsub_topic_name + self.use_ci = use_ci + + # Extract additional attributes from defaults file + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + self.project_id = defaults['gcp']['project_id'] + self.gs_pipeline_job_spec_path = defaults['pipelines']['gs_pipeline_job_spec_path'] + class FuturePipeline(): + """Placeholder for future pipeline object that will be created out of a list of components. + """ def __init__(self, comps: list) -> None: self.comps = comps self.names = [comp.name for comp in self.comps] diff --git a/google_cloud_automlops/orchestration/Services.py b/google_cloud_automlops/orchestration/Services.py new file mode 100644 index 0000000..e59fca8 --- /dev/null +++ b/google_cloud_automlops/orchestration/Services.py @@ -0,0 +1,74 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Creates a generic services object.""" + +# pylint: disable=anomalous-backslash-in-string +# pylint: disable=C0103 +# pylint: disable=line-too-long + +from abc import ABC, abstractmethod + +from google_cloud_automlops.utils.utils import read_yaml_file +from google_cloud_automlops.utils.constants import ( + BASE_DIR, + GENERATED_DEFAULTS_FILE +) + + +class Services(ABC): + """The Services object will contain TODO: fill out what this does + + Args: + ABC: Abstract class + """ + + def __init__(self) -> None: + """Instantiates a generic Services object. + """ + + def build(self): + """Constructs and writes a Dockerfile, requirements.txt, and + main.py to the services/submission_service directory. + """ + + # Read in defaults params + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + self.pipeline_storage_path = defaults['pipelines']['pipeline_storage_path'] + self.pipeline_job_runner_service_account = defaults['gcp']['pipeline_job_runner_service_account'] + self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] + self.project_id = defaults['gcp']['project_id'] + self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] + + # Set directory for files to be written to + self.submission_service_base_dir = BASE_DIR + 'services/submission_service' + + self._build_main() + self._build_dockerfile() + self._build_requirements() + + @abstractmethod + def _build_dockerfile(self): + """Abstract method to create the Dockerfile file of the services/submission_service directory. + """ + + @abstractmethod + def _build_requirements(self): + """Abstract method to create the requirements.txt file of the services/submission_service directory. + """ + + @abstractmethod + def _build_main(self): + """Abstract method to create the main.py file of the services/submission_service directory. + """ diff --git a/google_cloud_automlops/orchestration/configs.py b/google_cloud_automlops/orchestration/configs.py deleted file mode 100644 index 27c674b..0000000 --- a/google_cloud_automlops/orchestration/configs.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Model classes for AutoMLOps Orchestration Frameworks.""" - -# pylint: disable=C0103 -# pylint: disable=line-too-long - -from typing import Dict, List, Optional - -from pydantic import BaseModel - - -class KfpConfig(BaseModel): - """Model representing the KFP config. - - Args: - base_image: The image to use in the component base dockerfile. - custom_training_job_specs: Specifies the specs to run the training job with. - pipeline_params: Dictionary containing runtime pipeline parameters. - pubsub_topic_name: The name of the pubsub topic to publish to. - use_ci: Flag that determines whether to use Cloud Run CI/CD. - """ - base_image: str - custom_training_job_specs: Optional[List] - pipeline_params: Dict - pubsub_topic_name: str - use_ci: bool diff --git a/google_cloud_automlops/orchestration/enums.py b/google_cloud_automlops/orchestration/enums.py deleted file mode 100644 index 7894af0..0000000 --- a/google_cloud_automlops/orchestration/enums.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Sets global enums.""" - -# pylint: disable=C0103 -# pylint: disable=line-too-long - -from enum import Enum - - -class Orchestrator(Enum): - """Enum representing the available options for orchestration management.""" - - KFP = 'kfp' - # ARGO_WORKFLOWS = 'argo-workflows' # roadmap item - # TFX = 'tfx' # roadmap item - # AIRFLOW = 'airflow' # roadmap item - # RAY = 'ray' # roadmap item - -class PipelineJobSubmitter(Enum): - """Enum representing the available options for the Pipeline Job submission service.""" - - CLOUD_FUNCTIONS = 'cloud-functions' - CLOUD_RUN = 'cloud-run' diff --git a/google_cloud_automlops/orchestration/kfp/KFPComponent.py b/google_cloud_automlops/orchestration/kfp/KFPComponent.py index e1fec0f..7dbf874 100644 --- a/google_cloud_automlops/orchestration/kfp/KFPComponent.py +++ b/google_cloud_automlops/orchestration/kfp/KFPComponent.py @@ -12,30 +12,46 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Creates a KFP component object.""" +"""Creates a KFP component subclass.""" # pylint: disable=anomalous-backslash-in-string # pylint: disable=C0103 # pylint: disable=line-too-long from typing import Callable, List, Optional -from google_cloud_automlops.orchestration.Component import Component +try: + from importlib.resources import files as import_files +except ImportError: + # Try backported to PY<37 `importlib_resources` + from importlib_resources import files as import_files + +from google_cloud_automlops.orchestration.Component import Component from google_cloud_automlops.utils.constants import ( + BASE_DIR, + GENERATED_LICENSE, + KFP_TEMPLATES_PATH, PLACEHOLDER_IMAGE, - CACHE_DIR ) from google_cloud_automlops.utils.utils import ( make_dirs, + render_jinja, + write_file, write_yaml_file ) class KFPComponent(Component): - def __init__(self, + """Creates a KFP specific Component object for #TODO: add more + + Args: + Component (object): Generic Component object. + """ + + def __init__(self, func: Optional[Callable] = None, packages_to_install: Optional[List[str]] = None): - """Initiates a KFP component object created out of a function holding + """Initiates a KFP Component object created out of a function holding all necessary code. Args: @@ -47,18 +63,67 @@ def __init__(self, executing func. These will always be installed at component runtime. """ super().__init__(func, packages_to_install) + + # Update parameters and return types to reflect KFP data types self.parameters = update_params(self.parameters) self.return_types = update_params(self.return_types) + + # Set packages to install and component spec attributes self.packages_to_install_command = self._get_packages_to_install_command() self.component_spec = self._create_component_spec() def build(self): """Constructs files for running and managing Kubeflow pipelines. """ - # Write component yaml - filename = CACHE_DIR + f'/{self.name}.yaml' - make_dirs([CACHE_DIR]) - write_yaml_file(filename, self.component_spec, 'w') + super().build() + + # TODO: can this be removed? + kfp_spec_bool = self.component_spec['implementation']['container']['image'] != PLACEHOLDER_IMAGE + + # Read in component specs + custom_code_contents = self.component_spec['implementation']['container']['command'][-1] + compspec_image = ( + f'''{self.artifact_repo_location}-docker.pkg.dev/''' + f'''{self.project_id}/''' + f'''{self.artifact_repo_name}/''' + f'''{self.naming_prefix}/''' + f'''components/component_base:latest''') + + # If using kfp, remove spaces in name and convert to lowercase + if kfp_spec_bool: + self.component_spec['name'] = self.component_spec['name'].replace(' ', '_').lower() + + # Set and create directory for components if it does not already exist + # TODO: make this only happen for the first component? or pull into automlops.py + component_dir = BASE_DIR + 'components/' + self.component_spec['name'] + make_dirs([component_dir]) + + # Write task script to component base + write_file( + filepath=BASE_DIR + 'components/component_base/src/' + self.component_spec['name'] + '.py', + text=render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.components.component_base.src') / 'task.py.j2', + generated_license=GENERATED_LICENSE, + kfp_spec_bool=kfp_spec_bool, + custom_code_content=custom_code_contents), + mode='w') + + # Update component_spec to include correct image and startup command + self.component_spec['implementation']['container']['image'] = compspec_image + self.component_spec['implementation']['container']['command'] = [ + 'python3', + f'''/pipelines/component/src/{self.component_spec['name']+'.py'}'''] + + # Write license and component spec to the appropriate component.yaml file + comp_yaml_path = component_dir + '/component.yaml' + write_file( + filepath=comp_yaml_path, + text=GENERATED_LICENSE, + mode='w') + write_yaml_file( + filepath=comp_yaml_path, + contents=self.component_spec, + mode='a') def _get_packages_to_install_command(self): """Returns a list of formatted list of commands, including code for tmp storage. diff --git a/google_cloud_automlops/orchestration/kfp/KFPPipeline.py b/google_cloud_automlops/orchestration/kfp/KFPPipeline.py index 5ea2e12..f50e7a1 100644 --- a/google_cloud_automlops/orchestration/kfp/KFPPipeline.py +++ b/google_cloud_automlops/orchestration/kfp/KFPPipeline.py @@ -12,27 +12,60 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Creates a KFP pipeline object.""" +"""Creates a KFP pipeline subclass.""" # pylint: disable=anomalous-backslash-in-string # pylint: disable=C0103 # pylint: disable=line-too-long +import json +import re +import textwrap from typing import Callable, Optional -from google_cloud_automlops.orchestration.Pipeline import Pipeline +try: + from importlib.resources import files as import_files +except ImportError: + # Try backported to PY<37 `importlib_resources` + from importlib_resources import files as import_files + +from google_cloud_automlops.orchestration.Pipeline import Pipeline +from google_cloud_automlops.utils.utils import ( + execute_process, + get_components_list, + read_file, + read_yaml_file, + render_jinja, + write_and_chmod, + write_file, +) from google_cloud_automlops.utils.constants import ( - CACHE_DIR, + BASE_DIR, + GENERATED_BUILD_COMPONENTS_SH_FILE, + GENERATED_COMPONENT_BASE, + GENERATED_LICENSE, + GENERATED_PARAMETER_VALUES_PATH, + GENERATED_PIPELINE_FILE, + GENERATED_PIPELINE_REQUIREMENTS_FILE, + GENERATED_PIPELINE_RUNNER_FILE, + GENERATED_PIPELINE_SPEC_SH_FILE, + GENERATED_PUBLISH_TO_TOPIC_FILE, + GENERATED_RUN_PIPELINE_SH_FILE, + GENERATED_RUN_ALL_SH_FILE, + KFP_TEMPLATES_PATH, + PINNED_KFP_VERSION, PIPELINE_CACHE_FILE ) -from google_cloud_automlops.utils.utils import ( - make_dirs, - write_file -) class KFPPipeline(Pipeline): - def __init__(self, + """Creates a KFP specific Pipeline object for #TODO: add more + + Args: + Pipeline (object): Generic Pipeline object. + """ + + def __init__(self, func: Optional[Callable] = None, *, name: Optional[str] = None, @@ -48,16 +81,159 @@ def __init__(self, name: The name of the pipeline. description: Short description of what the pipeline does. """ - super().__init__(func, name, description) - self.pipeline_scaffold = (self._get_pipeline_decorator() + - self.src_code + - self._get_compile_step()) + super().__init__( + func=func, + name=name, + description=description) + + # Create pipeline scaffold attribute # TODO: more descriptive + self.pipeline_scaffold = ( + self._get_pipeline_decorator() + + self.src_code + + self._get_compile_step()) - def build(self): + def build(self, + base_image, + custom_training_job_specs, + pipeline_params, + pubsub_topic_name, + use_ci): """Constructs files for running and managing Kubeflow pipelines. + + Files created under AutoMLOps/: + README.md + scripts/ + pipeline_spec/.gitkeep + build_components.sh + build_pipeline_spec.sh + run_pipeline.sh + publish_to_topic.sh + run_all.sh + components/ + component_base/Dockerfile + component_base/requirements.txt + pipelines/ + pipeline.py + pipeline_runner.py + requirements.txt + runtime_parameters/pipeline_parameter_values.json """ - make_dirs([CACHE_DIR]) # if it doesn't already exist - write_file(PIPELINE_CACHE_FILE, self.pipeline_scaffold, 'w') + super().build(base_image, + custom_training_job_specs, + pipeline_params, + pubsub_topic_name, + use_ci) + + # README.md: Write description of the contents of the directory + write_file( + filepath=f'{BASE_DIR}README.md', + text=render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH) / 'README.md.j2', + use_ci=self.use_ci), + mode='w') + + # components/component_base/dockerfile: Write the component base Dockerfile + write_file( + filepath=f'{GENERATED_COMPONENT_BASE}/Dockerfile', + text=render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.components.component_base') / 'Dockerfile.j2', + base_image=self.base_image, + generated_license=GENERATED_LICENSE), + mode='w') + + # components/component_base/requirements.txt: Write the component base requirements file + write_file( + filepath=f'{GENERATED_COMPONENT_BASE}/requirements.txt', + text=self._create_component_base_requirements(), + mode='w') + + # Save scripts template path + scripts_template_path = import_files(KFP_TEMPLATES_PATH + '.scripts') + + # scripts/pipeline_spec/.gitkeep: Write gitkeep to pipeline_spec directory + write_file( + filepath=f'{BASE_DIR}scripts/pipeline_spec/.gitkeep', + text='', + mode='w') + + # scripts/build_components.sh: Write script for building components + write_and_chmod( + filepath=GENERATED_BUILD_COMPONENTS_SH_FILE, + text=render_jinja( + template_path=scripts_template_path / 'build_components.sh.j2', + generated_license=GENERATED_LICENSE, + base_dir=BASE_DIR)) + + # scripts/build_pipeline_spec.sh: Write script for building pipeline specs + write_and_chmod( + filepath=GENERATED_PIPELINE_SPEC_SH_FILE, + text=render_jinja( + template_path=scripts_template_path / 'build_pipeline_spec.sh.j2', + generated_license=GENERATED_LICENSE, + base_dir=BASE_DIR)) + + # scripts/run_pipline: Write script for running pipeline + write_and_chmod( + filepath=GENERATED_RUN_PIPELINE_SH_FILE, + text=render_jinja( + template_path=scripts_template_path / 'run_pipeline.sh.j2', + generated_license=GENERATED_LICENSE, + base_dir=BASE_DIR)) + + # scripts/run_all.sh: Write script for running all files + write_and_chmod( + filepath=GENERATED_RUN_ALL_SH_FILE, + text=render_jinja( + template_path=scripts_template_path / 'run_all.sh.j2', + generated_license=GENERATED_LICENSE, + base_dir=BASE_DIR)) + + # scripts/publish_to_topic.sh: If using CI, write script for publishing to pubsub topic + if self.use_ci: + write_and_chmod( + filepath=GENERATED_PUBLISH_TO_TOPIC_FILE, + text=render_jinja( + template_path=scripts_template_path / 'publish_to_topic.sh.j2', + base_dir=BASE_DIR, + generated_license=GENERATED_LICENSE, + generated_parameter_values_path=GENERATED_PARAMETER_VALUES_PATH, + pubsub_topic_name=self.pubsub_topic_name)) + + # pipelines/pipeline.py: Generates a Kubeflow pipeline spec from custom components. + components_list = get_components_list(full_path=False) + pipeline_scaffold_contents = read_file(PIPELINE_CACHE_FILE) + pipeline_scaffold_contents = textwrap.indent(pipeline_scaffold_contents, 4 * ' ') + write_file( + filepath=GENERATED_PIPELINE_FILE, + text=render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.pipelines') / 'pipeline.py.j2', + components_list=components_list, + custom_training_job_specs=self.custom_training_job_specs, + generated_license=GENERATED_LICENSE, + pipeline_scaffold_contents=pipeline_scaffold_contents, + project_id=self.project_id), + mode='w') + + # pipelines/pipeline_runner.py: Sends a PipelineJob to Vertex AI using pipeline spec. + write_file( + filepath=GENERATED_PIPELINE_RUNNER_FILE, + text=render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.pipelines') / 'pipeline_runner.py.j2', + generated_license=GENERATED_LICENSE), + mode='w') + + # pipelines/requirements.txt + write_file( + filepath=GENERATED_PIPELINE_REQUIREMENTS_FILE, + text=render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.pipelines') / 'requirements.txt.j2', + pinned_kfp_version=PINNED_KFP_VERSION), + mode='w') + + # pipelines/runtime_parameters/pipeline_parameter_values.json: Provides runtime parameters for the PipelineJob. + self.pipeline_params['gs_pipeline_spec_path'] = self.gs_pipeline_job_spec_path + serialized_params = json.dumps(self.pipeline_params, indent=4) + write_file(BASE_DIR + GENERATED_PARAMETER_VALUES_PATH, serialized_params, 'w') def _get_pipeline_decorator(self): """Creates the kfp pipeline decorator. @@ -90,3 +266,78 @@ def _get_compile_step(self): f' package_path=pipeline_job_spec_path)\n' f'\n' ) + + def _create_component_base_requirements(self): + """Writes a requirements.txt to the component_base directory. + Infers pip requirements from the python srcfiles using + pipreqs. Takes user-inputted requirements, and addes some + default gcp packages as well as packages that are often missing + in setup.py files (e.g db_types, pyarrow, gcsfs, fsspec). + """ + reqs_filename = f'{GENERATED_COMPONENT_BASE}/requirements.txt' + default_gcp_reqs = [ + 'google-cloud-aiplatform', + 'google-cloud-appengine-logging', + 'google-cloud-audit-log', + 'google-cloud-bigquery', + 'google-cloud-bigquery-storage', + 'google-cloud-bigtable', + 'google-cloud-core', + 'google-cloud-dataproc', + 'google-cloud-datastore', + 'google-cloud-dlp', + 'google-cloud-firestore', + 'google-cloud-kms', + 'google-cloud-language', + 'google-cloud-logging', + 'google-cloud-monitoring', + 'google-cloud-notebooks', + 'google-cloud-pipeline-components', + 'google-cloud-pubsub', + 'google-cloud-pubsublite', + 'google-cloud-recommendations-ai', + 'google-cloud-resource-manager', + 'google-cloud-scheduler', + 'google-cloud-spanner', + 'google-cloud-speech', + 'google-cloud-storage', + 'google-cloud-tasks', + 'google-cloud-translate', + 'google-cloud-videointelligence', + 'google-cloud-vision', + 'db_dtypes', + 'pyarrow', + 'gcsfs', + 'fsspec'] + + # Get user-inputted requirements from the cache dir + user_inp_reqs = [] + components_path_list = get_components_list() + for component_path in components_path_list: + component_spec = read_yaml_file(component_path) + reqs = component_spec['implementation']['container']['command'][2] + formatted_reqs = re.findall('\'([^\']*)\'', reqs) + user_inp_reqs.extend(formatted_reqs) + + # Check if user inputted requirements + if user_inp_reqs: + # Remove duplicates + set_of_requirements = set(user_inp_reqs) + else: + # If user did not input requirements, then infer reqs using pipreqs + execute_process(f'python3 -m pipreqs.pipreqs {GENERATED_COMPONENT_BASE} --mode no-pin --force', to_null=True) + pipreqs = read_file(reqs_filename).splitlines() + set_of_requirements = set(pipreqs + default_gcp_reqs) + + # Remove empty string + if '' in set_of_requirements: + set_of_requirements.remove('') + + # Pin kfp version + if 'kfp' in set_of_requirements: + set_of_requirements.remove('kfp') + set_of_requirements.add(PINNED_KFP_VERSION) + + # Stringify and sort + reqs_str = ''.join(r+'\n' for r in sorted(set_of_requirements)) + return reqs_str diff --git a/google_cloud_automlops/orchestration/kfp/KFPServices.py b/google_cloud_automlops/orchestration/kfp/KFPServices.py new file mode 100644 index 0000000..0de439e --- /dev/null +++ b/google_cloud_automlops/orchestration/kfp/KFPServices.py @@ -0,0 +1,85 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Creates a KFP services subclass.""" + +# pylint: disable=anomalous-backslash-in-string +# pylint: disable=C0103 +# pylint: disable=line-too-long + +try: + from importlib.resources import files as import_files +except ImportError: + # Try backported to PY<37 `importlib_resources` + from importlib_resources import files as import_files + +from google_cloud_automlops.orchestration.Services import Services +from google_cloud_automlops.utils.utils import ( + render_jinja, + write_file +) +from google_cloud_automlops.utils.constants import ( + BASE_DIR, + GENERATED_LICENSE, + KFP_TEMPLATES_PATH, + PINNED_KFP_VERSION +) + + +class KFPServices(Services): + """Creates a KFP specific Services object for #TODO: add more + + Args: + Services (object): Generic Services object. + """ + + def __init__(self) -> None: + """Initializes KFPServices Object. + """ + + def _build_dockerfile(self): + """Writes the services/submission_service/Dockerfile #TODO add more + """ + write_file( + f'{self.submission_service_base_dir}/Dockerfile', + render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'Dockerfile.j2', + base_dir=BASE_DIR, + generated_license=GENERATED_LICENSE), + 'w') + + def _build_requirements(self): + """Writes the services/submission_service/requirements.txt #TODO add more + """ + write_file( + f'{self.submission_service_base_dir}/requirements.txt', + render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'requirements.txt.j2', + pinned_kfp_version=PINNED_KFP_VERSION, + pipeline_job_submission_service_type=self.pipeline_job_submission_service_type), + 'w') + + def _build_main(self): + """Writes the services/submission_service/main.py file to #TODO add more + """ + write_file( + f'{self.submission_service_base_dir}/main.py', + render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'main.py.j2', + generated_license=GENERATED_LICENSE, + pipeline_root=self.pipeline_storage_path, + pipeline_job_runner_service_account=self.pipeline_job_runner_service_account, + pipeline_job_submission_service_type=self.pipeline_job_submission_service_type, + project_id=self.project_id), + 'w') diff --git a/google_cloud_automlops/orchestration/kfp/scaffold.py b/google_cloud_automlops/orchestration/kfp/scaffold.py deleted file mode 100644 index cf2b7a7..0000000 --- a/google_cloud_automlops/orchestration/kfp/scaffold.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Builds temporary component scaffold yaml files.""" - -# pylint: disable=anomalous-backslash-in-string -# pylint: disable=C0103 -# pylint: disable=line-too-long - -import inspect -from typing import Callable, List, Optional, TypeVar, Union - -import docstring_parser - -from google_cloud_automlops.utils.constants import ( - DEFAULT_PIPELINE_NAME, - PLACEHOLDER_IMAGE, - PIPELINE_CACHE_FILE, - CACHE_DIR -) -from google_cloud_automlops.utils.utils import ( - get_function_source_definition, - make_dirs, - update_params, - write_file, - write_yaml_file -) - -T = TypeVar('T') - - -def create_component_scaffold(func: Optional[Callable] = None, - *, - packages_to_install: Optional[List[str]] = None): - """Creates a tmp component scaffold which will be used by the formalize function. - Code is temporarily stored in component_spec['implementation']['container']['command']. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - packages_to_install: A list of optional packages to install before - executing func. These will always be installed at component runtime. - """ - # Extract name, docstring, and component description - name = func.__name__ - parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) - description = parsed_docstring.short_description - - # Instantiate component yaml attributes - component_spec = {} - component_spec['name'] = name - if description: - component_spec['description'] = description - outputs = get_function_return_types(func) - if outputs: - component_spec['outputs'] = outputs - component_spec['inputs'] = get_function_parameters(func) - component_spec['implementation'] = {} - component_spec['implementation']['container'] = {} - component_spec['implementation']['container']['image'] = PLACEHOLDER_IMAGE - component_spec['implementation']['container']['command'] = get_packages_to_install_command(func, packages_to_install) - component_spec['implementation']['container']['args'] = ['--executor_input', - {'executorInput': None}, - '--function_to_execute', - name] - # Write component yaml - filename = CACHE_DIR + f'/{name}.yaml' - make_dirs([CACHE_DIR]) - write_yaml_file(filename, component_spec, 'w') - - -def get_packages_to_install_command(func: Optional[Callable] = None, - packages_to_install: Optional[List[str]] = None): - """Returns a list of formatted list of commands, including code for tmp storage. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - packages_to_install: A list of optional packages to install before - executing func. These will always be installed at component runtime. - """ - newline = '\n' - if not packages_to_install: - packages_to_install = [] - concat_package_list = ' '.join([repr(str(package)) for package in packages_to_install]) - install_python_packages_script = ( - f'''if ! [ -x "$(command -v pip)" ]; then{newline}''' - f''' python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip{newline}''' - f'''fi{newline}''' - f'''PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \{newline}''' - f''' --no-warn-script-location {concat_package_list} && "$0" "$@"{newline}''' - f'''{newline}''') - src_code = get_function_source_definition(func) - return ['sh', '-c', install_python_packages_script, src_code] - - -def get_function_return_types(func: Callable) -> list: - """Returns a formatted list of function return types. - - Args: - func: The python function to create a component from. The function - can optionally have type annotations for its return values. - Returns: - list: return value list with types converted to kubeflow spec. - Raises: - Exception: If return type is provided and not a NamedTuple. - """ - annotation = inspect.signature(func).return_annotation - if maybe_strip_optional_from_annotation(annotation) is not annotation: - raise TypeError('Return type cannot be Optional.') - - # No annotations provided - # pylint: disable=protected-access - if annotation == inspect._empty: - return None - - if not (hasattr(annotation,'__annotations__') and isinstance(annotation.__annotations__, dict)): - raise TypeError(f'''Return type hint for function "{func.__name__}" must be a NamedTuple.''') - - outputs = [] - for name, type_ in annotation.__annotations__.items(): - metadata = {} - metadata['name'] = name - metadata['type'] = type_ - metadata['description'] = None - outputs.append(metadata) - return update_params(outputs) - - -def get_function_parameters(func: Callable) -> list: - """Returns a formatted list of parameters. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - Returns: - list: Params list with types converted to kubeflow spec. - Raises: - Exception: If parameter type hints are not provided. - """ - signature = inspect.signature(func) - parameters = list(signature.parameters.values()) - parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) - doc_dict = {p.arg_name: p.description for p in parsed_docstring.params} - - # Extract parameter metadata - parameter_holder = [] - for param in parameters: - metadata = {} - metadata['name'] = param.name - metadata['description'] = doc_dict.get(param.name) - metadata['type'] = maybe_strip_optional_from_annotation( - param.annotation) - parameter_holder.append(metadata) - # pylint: disable=protected-access - if metadata['type'] == inspect._empty: - raise TypeError( - f'''Missing type hint for parameter "{metadata['name']}". ''' - f'''Please specify the type for this parameter.''') - return update_params(parameter_holder) - - -def maybe_strip_optional_from_annotation(annotation: T) -> T: - """Strips 'Optional' from 'Optional[]' if applicable. - For example:: - Optional[str] -> str - str -> str - List[int] -> List[int] - Args: - annotation: The original type annotation which may or may not has `Optional`. - Returns: - The type inside Optional[] if Optional exists, otherwise the original type. - """ - if getattr(annotation, '__origin__', None) is Union and annotation.__args__[1] is type(None): - return annotation.__args__[0] - else: - return annotation - - -def create_pipeline_scaffold(func: Optional[Callable] = None, - *, - name: Optional[str] = None, - description: Optional[str] = None): - """Creates a temporary pipeline scaffold which will - be used by the formalize function. - - Args: - func: The python function to create a pipeline from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - name: The name of the pipeline. - description: Short description of what the pipeline does. - """ - pipeline_scaffold = (get_pipeline_decorator(name, description) + - get_function_source_definition(func) + - get_compile_step(func.__name__)) - make_dirs([CACHE_DIR]) # if it doesn't already exist - write_file(PIPELINE_CACHE_FILE, pipeline_scaffold, 'w') - - -def get_pipeline_decorator(name: Optional[str] = None, - description: Optional[str] = None): - """Creates the kfp pipeline decorator. - - Args: - name: The name of the pipeline. - description: Short description of what the pipeline does. - - Returns: - str: Python compile function call. - """ - default_name = DEFAULT_PIPELINE_NAME if not name else name - name_str = f'''(\n name='{default_name}',\n''' - desc_str = f''' description='{description}',\n''' if description else '' - ending_str = ')\n' - return '@dsl.pipeline' + name_str + desc_str + ending_str - - -def get_compile_step(func_name: str): - """Creates the compile function call. - - Args: - func_name: The name of the pipeline function. - - Returns: - str: Python compile function call. - """ - return ( - f'\n' - f'compiler.Compiler().compile(\n' - f' pipeline_func={func_name},\n' - f' package_path=pipeline_job_spec_path)\n' - f'\n' - ) - From 4a36b33ee88e0bb1f0c924d75e83d412049d7d42 Mon Sep 17 00:00:00 2001 From: Allegra Noto Date: Thu, 15 Feb 2024 12:13:17 -0500 Subject: [PATCH 03/11] Restored some files to allow it to run, tested and cleaned code --- .../orchestration/builder.py | 522 ++++++++++++++++++ .../orchestration/configs.py | 39 ++ google_cloud_automlops/orchestration/enums.py | 36 ++ .../orchestration/kfp/KFPComponent.py | 20 +- .../orchestration/kfp/KFPPipeline.py | 13 +- .../orchestration/kfp/scaffold.py | 253 +++++++++ .../orchestration/scaffold.py | 253 +++++++++ 7 files changed, 1126 insertions(+), 10 deletions(-) create mode 100644 google_cloud_automlops/orchestration/builder.py create mode 100644 google_cloud_automlops/orchestration/configs.py create mode 100644 google_cloud_automlops/orchestration/enums.py create mode 100644 google_cloud_automlops/orchestration/kfp/scaffold.py create mode 100644 google_cloud_automlops/orchestration/scaffold.py diff --git a/google_cloud_automlops/orchestration/builder.py b/google_cloud_automlops/orchestration/builder.py new file mode 100644 index 0000000..c121954 --- /dev/null +++ b/google_cloud_automlops/orchestration/builder.py @@ -0,0 +1,522 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Builds KFP components and pipeline.""" + +# pylint: disable=line-too-long + +import json +try: + from importlib.resources import files as import_files +except ImportError: + # Try backported to PY<37 `importlib_resources` + from importlib_resources import files as import_files +import re +import textwrap + +from jinja2 import Template + +from google_cloud_automlops.utils.utils import ( + execute_process, + get_components_list, + make_dirs, + read_file, + read_yaml_file, + is_using_kfp_spec, + write_and_chmod, + write_file, + write_yaml_file +) +from google_cloud_automlops.utils.constants import ( + BASE_DIR, + GENERATED_BUILD_COMPONENTS_SH_FILE, + GENERATED_DEFAULTS_FILE, + GENERATED_COMPONENT_BASE, + GENERATED_LICENSE, + GENERATED_PARAMETER_VALUES_PATH, + GENERATED_PIPELINE_FILE, + GENERATED_PIPELINE_REQUIREMENTS_FILE, + GENERATED_PIPELINE_RUNNER_FILE, + GENERATED_PIPELINE_SPEC_SH_FILE, + GENERATED_PUBLISH_TO_TOPIC_FILE, + GENERATED_RUN_PIPELINE_SH_FILE, + GENERATED_RUN_ALL_SH_FILE, + KFP_TEMPLATES_PATH, + PINNED_KFP_VERSION, + PIPELINE_CACHE_FILE +) +from google_cloud_automlops.orchestration.configs import KfpConfig + +def build(config: KfpConfig): + """Constructs files for running and managing Kubeflow pipelines. + + Args: + config.base_image: The image to use in the component base dockerfile. + config.custom_training_job_specs: Specifies the specs to run the training job with. + config.pipeline_params: Dictionary containing runtime pipeline parameters. + config.pubsub_topic_name: The name of the pubsub topic to publish to. + config.use_ci: Flag that determines whether to use Cloud Run CI/CD. + """ + + # Write scripts for building pipeline, building components, running pipeline, and running all files + write_and_chmod(GENERATED_PIPELINE_SPEC_SH_FILE, build_pipeline_spec_jinja()) + write_and_chmod(GENERATED_BUILD_COMPONENTS_SH_FILE, build_components_jinja()) + write_and_chmod(GENERATED_RUN_PIPELINE_SH_FILE, run_pipeline_jinja()) + write_and_chmod(GENERATED_RUN_ALL_SH_FILE, run_all_jinja()) + if config.use_ci: + write_and_chmod(GENERATED_PUBLISH_TO_TOPIC_FILE, publish_to_topic_jinja(pubsub_topic_name=config.pubsub_topic_name)) + + # Create components and pipelines + components_path_list = get_components_list(full_path=True) + for path in components_path_list: + build_component(path) + build_pipeline(config.custom_training_job_specs, config.pipeline_params) + + # Write empty .gitkeep to pipeline_spec directory + write_file(f'{BASE_DIR}scripts/pipeline_spec/.gitkeep', '', 'w') + + # Write readme.md to description the contents of the directory + write_file(f'{BASE_DIR}README.md', readme_jinja(config.use_ci), 'w') + + # Write dockerfile to the component base directory + write_file(f'{GENERATED_COMPONENT_BASE}/Dockerfile', component_base_dockerfile_jinja(config.base_image), 'w') + + # Write requirements.txt to the component base directory + write_file(f'{GENERATED_COMPONENT_BASE}/requirements.txt', create_component_base_requirements(), 'w') + + # Build the submission service files + if config.use_ci: + build_services() + + +def build_component(component_path: str): + """Constructs and writes component.yaml and {component_name}.py files. + component.yaml: Contains the Kubeflow custom component definition. + {component_name}.py: Contains the python code from the Jupyter cell. + + Args: + component_path: Path to the temporary component yaml. This file + is used to create the permanent component.yaml, and deleted + after calling AutoMLOps.generate(). + """ + # Retrieve defaults vars + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + + # Read in component specs + component_spec = read_yaml_file(component_path) + kfp_spec_bool = is_using_kfp_spec(component_spec['implementation']['container']['image']) + custom_code_contents = component_spec['implementation']['container']['command'][-1] + compspec_image = ( + f'''{defaults['gcp']['artifact_repo_location']}-docker.pkg.dev/''' + f'''{defaults['gcp']['project_id']}/''' + f'''{defaults['gcp']['artifact_repo_name']}/''' + f'''{defaults['gcp']['naming_prefix']}/''' + f'''components/component_base:latest''') + + # If using kfp, remove spaces in name and convert to lowercase + if kfp_spec_bool: + component_spec['name'] = component_spec['name'].replace(' ', '_').lower() + + # Set and create directory for component, and set directory for task + component_dir = BASE_DIR + 'components/' + component_spec['name'] + make_dirs([component_dir]) + task_filepath = (BASE_DIR + + 'components/component_base/src/' + + component_spec['name'] + + '.py') + + # Write task script to component base + write_file(task_filepath, component_base_task_file_jinja(custom_code_contents, kfp_spec_bool), 'w') + + # Update component_spec to include correct image and startup command + component_spec['implementation']['container']['image'] = compspec_image + component_spec['implementation']['container']['command'] = [ + 'python3', + f'''/pipelines/component/src/{component_spec['name']+'.py'}'''] + + # Write license and component spec to the appropriate component.yaml file + filename = component_dir + '/component.yaml' + write_file(filename, GENERATED_LICENSE, 'w') + write_yaml_file(filename, component_spec, 'a') + + +def build_pipeline(custom_training_job_specs: list, + pipeline_parameter_values: dict): + """Constructs and writes pipeline.py, pipeline_runner.py, and pipeline_parameter_values.json files. + pipeline.py: Generates a Kubeflow pipeline spec from custom components. + pipeline_runner.py: Sends a PipelineJob to Vertex AI using pipeline spec. + pipeline_parameter_values.json: Provides runtime parameters for the PipelineJob. + + Args: + custom_training_job_specs: Specifies the specs to run the training job with. + pipeline_parameter_values: Dictionary of runtime parameters for the PipelineJob. + """ + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + # Get the names of the components + components_list = get_components_list(full_path=False) + # Read pipeline definition + pipeline_scaffold_contents = read_file(PIPELINE_CACHE_FILE) + # Add indentation + pipeline_scaffold_contents = textwrap.indent(pipeline_scaffold_contents, 4 * ' ') + # Construct pipeline.py + project_id = defaults['gcp']['project_id'] + write_file(GENERATED_PIPELINE_FILE, pipeline_jinja( + components_list, + custom_training_job_specs, + pipeline_scaffold_contents, + project_id=project_id), 'w') + # Construct pipeline_runner.py + write_file(GENERATED_PIPELINE_RUNNER_FILE, pipeline_runner_jinja(), 'w') + # Construct requirements.txt + write_file(GENERATED_PIPELINE_REQUIREMENTS_FILE, pipeline_requirements_jinja(), 'w') + # Add pipeline_spec_path to dict + pipeline_parameter_values['gs_pipeline_spec_path'] = defaults['pipelines']['gs_pipeline_job_spec_path'] + # Construct pipeline_parameter_values.json + serialized_params = json.dumps(pipeline_parameter_values, indent=4) + write_file(BASE_DIR + GENERATED_PARAMETER_VALUES_PATH, serialized_params, 'w') + + +def build_services(): + """Constructs and writes a Dockerfile, requirements.txt, and + main.py to the services/submission_service directory. + """ + # Retrieve defaults vars + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + + # Set new folders as variables + submission_service_base = BASE_DIR + 'services/submission_service' + + # Write cloud run dockerfile + write_file(f'{submission_service_base}/Dockerfile', submission_service_dockerfile_jinja(), 'w') + + # Write requirements files for cloud run base and queueing svc + write_file(f'{submission_service_base}/requirements.txt', submission_service_requirements_jinja( + pipeline_job_submission_service_type=defaults['gcp']['pipeline_job_submission_service_type']), 'w') + + # Write main code files for cloud run base and queueing svc + write_file(f'{submission_service_base}/main.py', submission_service_main_jinja( + pipeline_root=defaults['pipelines']['pipeline_storage_path'], + pipeline_job_runner_service_account=defaults['gcp']['pipeline_job_runner_service_account'], + pipeline_job_submission_service_type=defaults['gcp']['pipeline_job_submission_service_type'], + project_id=defaults['gcp']['project_id']), 'w') + + +def create_component_base_requirements(): + """Writes a requirements.txt to the component_base directory. + Infers pip requirements from the python srcfiles using + pipreqs. Takes user-inputted requirements, and addes some + default gcp packages as well as packages that are often missing + in setup.py files (e.g db_types, pyarrow, gcsfs, fsspec). + """ + reqs_filename = f'{GENERATED_COMPONENT_BASE}/requirements.txt' + default_gcp_reqs = [ + 'google-cloud-aiplatform', + 'google-cloud-appengine-logging', + 'google-cloud-audit-log', + 'google-cloud-bigquery', + 'google-cloud-bigquery-storage', + 'google-cloud-bigtable', + 'google-cloud-core', + 'google-cloud-dataproc', + 'google-cloud-datastore', + 'google-cloud-dlp', + 'google-cloud-firestore', + 'google-cloud-kms', + 'google-cloud-language', + 'google-cloud-logging', + 'google-cloud-monitoring', + 'google-cloud-notebooks', + 'google-cloud-pipeline-components', + 'google-cloud-pubsub', + 'google-cloud-pubsublite', + 'google-cloud-recommendations-ai', + 'google-cloud-resource-manager', + 'google-cloud-scheduler', + 'google-cloud-spanner', + 'google-cloud-speech', + 'google-cloud-storage', + 'google-cloud-tasks', + 'google-cloud-translate', + 'google-cloud-videointelligence', + 'google-cloud-vision', + 'db_dtypes', + 'pyarrow', + 'gcsfs', + 'fsspec'] + # Get user-inputted requirements from the cache dir + user_inp_reqs = [] + components_path_list = get_components_list() + for component_path in components_path_list: + component_spec = read_yaml_file(component_path) + reqs = component_spec['implementation']['container']['command'][2] + formatted_reqs = re.findall('\'([^\']*)\'', reqs) + user_inp_reqs.extend(formatted_reqs) + # Check if user inputted requirements + if user_inp_reqs: + # Remove duplicates + set_of_requirements = set(user_inp_reqs) + else: + # If user did not input requirements, then infer reqs using pipreqs + execute_process(f'python3 -m pipreqs.pipreqs {GENERATED_COMPONENT_BASE} --mode no-pin --force', to_null=True) + pipreqs = read_file(reqs_filename).splitlines() + set_of_requirements = set(pipreqs + default_gcp_reqs) + # Remove empty string + if '' in set_of_requirements: + set_of_requirements.remove('') + # Pin kfp version + if 'kfp' in set_of_requirements: + set_of_requirements.remove('kfp') + set_of_requirements.add(PINNED_KFP_VERSION) + # Stringify and sort + reqs_str = ''.join(r+'\n' for r in sorted(set_of_requirements)) + return reqs_str + + +def build_pipeline_spec_jinja() -> str: + """Generates code for build_pipeline_spec.sh which builds the pipeline specs. + + Returns: + str: build_pipeline_spec.sh script. + """ + template_file = import_files(KFP_TEMPLATES_PATH + '.scripts') / 'build_pipeline_spec.sh.j2' + with template_file.open('r', encoding='utf-8') as f: + template = Template(f.read()) + return template.render( + generated_license=GENERATED_LICENSE, + base_dir=BASE_DIR) + + +def build_components_jinja() -> str: + """Generates code for build_components.sh which builds the components. + + Returns: + str: build_components.sh script. + """ + template_file = import_files(KFP_TEMPLATES_PATH + '.scripts') / 'build_components.sh.j2' + with template_file.open('r', encoding='utf-8') as f: + template = Template(f.read()) + return template.render( + generated_license=GENERATED_LICENSE, + base_dir=BASE_DIR) + + +def run_pipeline_jinja() -> str: + """Generates code for run_pipeline.sh which runs the pipeline locally. + + Returns: + str: run_pipeline.sh script. + """ + template_file = import_files(KFP_TEMPLATES_PATH + '.scripts') / 'run_pipeline.sh.j2' + with template_file.open('r', encoding='utf-8') as f: + template = Template(f.read()) + return template.render( + generated_license=GENERATED_LICENSE, + base_dir=BASE_DIR) + + +def run_all_jinja() -> str: + """Generates code for run_all.sh which builds runs all other shell scripts. + + Returns: + str: run_all.sh script. + """ + template_file = import_files(KFP_TEMPLATES_PATH + '.scripts') / 'run_all.sh.j2' + with template_file.open('r', encoding='utf-8') as f: + template = Template(f.read()) + return template.render( + generated_license=GENERATED_LICENSE, + base_dir=BASE_DIR) + + +def publish_to_topic_jinja(pubsub_topic_name: str) -> str: + """Generates code for publish_to_topic.sh which submits a message to the + pipeline job submission service. + + Args: + pubsub_topic_name: The name of the pubsub topic to publish to. + + Returns: + str: publish_to_topic.sh script. + """ + template_file = import_files(KFP_TEMPLATES_PATH + '.scripts') / 'publish_to_topic.sh.j2' + with template_file.open('r', encoding='utf-8') as f: + template = Template(f.read()) + return template.render( + base_dir=BASE_DIR, + generated_license=GENERATED_LICENSE, + generated_parameter_values_path=GENERATED_PARAMETER_VALUES_PATH, + pubsub_topic_name=pubsub_topic_name) + + +def readme_jinja(use_ci: str) -> str: + """Generates code for readme.md which is a readme markdown file to describe the contents of the + generated AutoMLOps code repo. + + Args: + use_ci: Flag that determines whether to use Cloud CI/CD. + + Returns: + str: README.md file. + """ + template_file = import_files(KFP_TEMPLATES_PATH) / 'README.md.j2' + with template_file.open('r', encoding='utf-8') as f: + template = Template(f.read()) + return template.render(use_ci=use_ci) + + +def component_base_dockerfile_jinja(base_image: str) -> str: + """Generates code for a Dockerfile to be written to the component_base directory. + + Args: + base_image: The image to use in the component base dockerfile. + + Returns: + str: Dockerfile file. + """ + template_file = import_files(KFP_TEMPLATES_PATH + '.components.component_base') / 'Dockerfile.j2' + with template_file.open('r', encoding='utf-8') as f: + template = Template(f.read()) + return template.render( + base_image=base_image, + generated_license=GENERATED_LICENSE) + + +def component_base_task_file_jinja(custom_code_contents: str, kfp_spec_bool: str) -> str: + """Generates code for the task.py file to be written to the component_base/src directory. + + Args: + custom_code_contents: Code inside of the component, specified by the user. + kfp_spec_bool: Boolean that specifies whether components are defined using kfp. + + Returns: + str: Contents of the task.py file. + """ + template_file = import_files(KFP_TEMPLATES_PATH + '.components.component_base.src') / 'task.py.j2' + with template_file.open('r', encoding='utf-8') as f: + template = Template(f.read()) + return template.render( + custom_code_contents=custom_code_contents, + generated_license=GENERATED_LICENSE, + kfp_spec_bool=kfp_spec_bool) + + +def pipeline_runner_jinja() -> str: + """Generates code for the pipeline_runner.py file to be written to the pipelines directory. + + Returns: + str: pipeline_runner.py file. + """ + template_file = import_files(KFP_TEMPLATES_PATH + '.pipelines') / 'pipeline_runner.py.j2' + with template_file.open('r', encoding='utf-8') as f: + template = Template(f.read()) + return template.render(generated_license=GENERATED_LICENSE) + + +def pipeline_jinja( + components_list: list, + custom_training_job_specs: list, + pipeline_scaffold_contents: str, + project_id: str) -> str: + """Generates code for the pipeline.py file to be written to the pipelines directory. + + Args: + components_list: Contains the names or paths of all component yamls in the dir. + custom_training_job_specs: Specifies the specs to run the training job with. + pipeline_scaffold_contents: The contents of the pipeline scaffold file, + which can be found at PIPELINE_CACHE_FILE. + project_id: The project ID. + + Returns: + str: pipeline.py file. + """ + template_file = import_files(KFP_TEMPLATES_PATH + '.pipelines') / 'pipeline.py.j2' + with template_file.open('r', encoding='utf-8') as f: + template = Template(f.read()) + return template.render( + components_list=components_list, + custom_training_job_specs=custom_training_job_specs, + generated_license=GENERATED_LICENSE, + pipeline_scaffold_contents=pipeline_scaffold_contents, + project_id=project_id) + + +def pipeline_requirements_jinja() -> str: + """Generates code for a requirements.txt to be written to the pipelines directory. + + Returns: + str: requirements.txt file for pipelines. + """ + template_file = import_files(KFP_TEMPLATES_PATH + '.pipelines') / 'requirements.txt.j2' + with template_file.open('r', encoding='utf-8') as f: + template = Template(f.read()) + return template.render(pinned_kfp_version=PINNED_KFP_VERSION) + + +def submission_service_dockerfile_jinja() -> str: + """Generates code for a Dockerfile to be written to the serivces/submission_service directory. + + Returns: + str: Dockerfile file. + """ + template_file = import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'Dockerfile.j2' + with template_file.open('r', encoding='utf-8') as f: + template = Template(f.read()) + return template.render( + base_dir=BASE_DIR, + generated_license=GENERATED_LICENSE) + + +def submission_service_requirements_jinja(pipeline_job_submission_service_type: str) -> str: + """Generates code for a requirements.txt to be written to the serivces/submission_service directory. + + Args: + pipeline_job_submission_service_type: The tool to host for the cloud submission service (e.g. cloud run, cloud functions). + + Returns: + str: requirements.txt file for submission_service. + """ + template_file = import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'requirements.txt.j2' + with template_file.open('r', encoding='utf-8') as f: + template = Template(f.read()) + return template.render( + pinned_kfp_version=PINNED_KFP_VERSION, + pipeline_job_submission_service_type=pipeline_job_submission_service_type) + + +def submission_service_main_jinja( + pipeline_root: str, + pipeline_job_runner_service_account: str, + pipeline_job_submission_service_type: str, + project_id: str) -> str: + """Generates content for main.py to be written to the serivces/submission_service directory. + This file contains code for running a flask service that will act as a pipeline job submission service. + + Args: + pipeline_root: GS location where to store metadata from pipeline runs. + pipeline_job_runner_service_account: Service Account to runner PipelineJobs. + pipeline_job_submission_service_type: The tool to host for the cloud submission service (e.g. cloud run, cloud functions). + project_id: The project ID. + + Returns: + str: Content of serivces/submission_service main.py. + """ + template_file = import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'main.py.j2' + with template_file.open('r', encoding='utf-8') as f: + template = Template(f.read()) + return template.render( + generated_license=GENERATED_LICENSE, + pipeline_root=pipeline_root, + pipeline_job_runner_service_account=pipeline_job_runner_service_account, + pipeline_job_submission_service_type=pipeline_job_submission_service_type, + project_id=project_id) diff --git a/google_cloud_automlops/orchestration/configs.py b/google_cloud_automlops/orchestration/configs.py new file mode 100644 index 0000000..27c674b --- /dev/null +++ b/google_cloud_automlops/orchestration/configs.py @@ -0,0 +1,39 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Model classes for AutoMLOps Orchestration Frameworks.""" + +# pylint: disable=C0103 +# pylint: disable=line-too-long + +from typing import Dict, List, Optional + +from pydantic import BaseModel + + +class KfpConfig(BaseModel): + """Model representing the KFP config. + + Args: + base_image: The image to use in the component base dockerfile. + custom_training_job_specs: Specifies the specs to run the training job with. + pipeline_params: Dictionary containing runtime pipeline parameters. + pubsub_topic_name: The name of the pubsub topic to publish to. + use_ci: Flag that determines whether to use Cloud Run CI/CD. + """ + base_image: str + custom_training_job_specs: Optional[List] + pipeline_params: Dict + pubsub_topic_name: str + use_ci: bool diff --git a/google_cloud_automlops/orchestration/enums.py b/google_cloud_automlops/orchestration/enums.py new file mode 100644 index 0000000..7894af0 --- /dev/null +++ b/google_cloud_automlops/orchestration/enums.py @@ -0,0 +1,36 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Sets global enums.""" + +# pylint: disable=C0103 +# pylint: disable=line-too-long + +from enum import Enum + + +class Orchestrator(Enum): + """Enum representing the available options for orchestration management.""" + + KFP = 'kfp' + # ARGO_WORKFLOWS = 'argo-workflows' # roadmap item + # TFX = 'tfx' # roadmap item + # AIRFLOW = 'airflow' # roadmap item + # RAY = 'ray' # roadmap item + +class PipelineJobSubmitter(Enum): + """Enum representing the available options for the Pipeline Job submission service.""" + + CLOUD_FUNCTIONS = 'cloud-functions' + CLOUD_RUN = 'cloud-run' diff --git a/google_cloud_automlops/orchestration/kfp/KFPComponent.py b/google_cloud_automlops/orchestration/kfp/KFPComponent.py index 7dbf874..988a255 100644 --- a/google_cloud_automlops/orchestration/kfp/KFPComponent.py +++ b/google_cloud_automlops/orchestration/kfp/KFPComponent.py @@ -65,8 +65,10 @@ def __init__(self, super().__init__(func, packages_to_install) # Update parameters and return types to reflect KFP data types - self.parameters = update_params(self.parameters) - self.return_types = update_params(self.return_types) + if self.parameters: + self.parameters = update_params(self.parameters) + if self.return_types: + self.return_types = update_params(self.return_types) # Set packages to install and component spec attributes self.packages_to_install_command = self._get_packages_to_install_command() @@ -77,6 +79,15 @@ def build(self): """ super().build() + # Set and create directory for components if it does not already exist + component_dir = BASE_DIR + 'components/' + self.component_spec['name'] + + # Build necessary folders + # TODO: make this only happen for the first component? or pull into automlops.py + make_dirs([ + component_dir, + BASE_DIR + 'components/component_base/src/']) + # TODO: can this be removed? kfp_spec_bool = self.component_spec['implementation']['container']['image'] != PLACEHOLDER_IMAGE @@ -93,11 +104,6 @@ def build(self): if kfp_spec_bool: self.component_spec['name'] = self.component_spec['name'].replace(' ', '_').lower() - # Set and create directory for components if it does not already exist - # TODO: make this only happen for the first component? or pull into automlops.py - component_dir = BASE_DIR + 'components/' + self.component_spec['name'] - make_dirs([component_dir]) - # Write task script to component base write_file( filepath=BASE_DIR + 'components/component_base/src/' + self.component_spec['name'] + '.py', diff --git a/google_cloud_automlops/orchestration/kfp/KFPPipeline.py b/google_cloud_automlops/orchestration/kfp/KFPPipeline.py index f50e7a1..07ef25a 100644 --- a/google_cloud_automlops/orchestration/kfp/KFPPipeline.py +++ b/google_cloud_automlops/orchestration/kfp/KFPPipeline.py @@ -33,6 +33,7 @@ from google_cloud_automlops.utils.utils import ( execute_process, get_components_list, + make_dirs, read_file, read_yaml_file, render_jinja, @@ -124,6 +125,13 @@ def build(self, pubsub_topic_name, use_ci) + # Build necessary folders + make_dirs([ + f'{BASE_DIR}scripts/pipeline_spec/', + f'{BASE_DIR}pipelines', + f'{BASE_DIR}pipelines/runtime_parameters/' + ]) + # README.md: Write description of the contents of the directory write_file( filepath=f'{BASE_DIR}README.md', @@ -152,7 +160,7 @@ def build(self, # scripts/pipeline_spec/.gitkeep: Write gitkeep to pipeline_spec directory write_file( - filepath=f'{BASE_DIR}scripts/pipeline_spec/.gitkeep', + filepath=f'{BASE_DIR}scripts/pipeline_spec/.gitkeep', text='', mode='w') @@ -201,8 +209,7 @@ def build(self, # pipelines/pipeline.py: Generates a Kubeflow pipeline spec from custom components. components_list = get_components_list(full_path=False) - pipeline_scaffold_contents = read_file(PIPELINE_CACHE_FILE) - pipeline_scaffold_contents = textwrap.indent(pipeline_scaffold_contents, 4 * ' ') + pipeline_scaffold_contents = textwrap.indent(self.pipeline_scaffold, 4 * ' ') write_file( filepath=GENERATED_PIPELINE_FILE, text=render_jinja( diff --git a/google_cloud_automlops/orchestration/kfp/scaffold.py b/google_cloud_automlops/orchestration/kfp/scaffold.py new file mode 100644 index 0000000..cf2b7a7 --- /dev/null +++ b/google_cloud_automlops/orchestration/kfp/scaffold.py @@ -0,0 +1,253 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Builds temporary component scaffold yaml files.""" + +# pylint: disable=anomalous-backslash-in-string +# pylint: disable=C0103 +# pylint: disable=line-too-long + +import inspect +from typing import Callable, List, Optional, TypeVar, Union + +import docstring_parser + +from google_cloud_automlops.utils.constants import ( + DEFAULT_PIPELINE_NAME, + PLACEHOLDER_IMAGE, + PIPELINE_CACHE_FILE, + CACHE_DIR +) +from google_cloud_automlops.utils.utils import ( + get_function_source_definition, + make_dirs, + update_params, + write_file, + write_yaml_file +) + +T = TypeVar('T') + + +def create_component_scaffold(func: Optional[Callable] = None, + *, + packages_to_install: Optional[List[str]] = None): + """Creates a tmp component scaffold which will be used by the formalize function. + Code is temporarily stored in component_spec['implementation']['container']['command']. + + Args: + func: The python function to create a component from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + packages_to_install: A list of optional packages to install before + executing func. These will always be installed at component runtime. + """ + # Extract name, docstring, and component description + name = func.__name__ + parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) + description = parsed_docstring.short_description + + # Instantiate component yaml attributes + component_spec = {} + component_spec['name'] = name + if description: + component_spec['description'] = description + outputs = get_function_return_types(func) + if outputs: + component_spec['outputs'] = outputs + component_spec['inputs'] = get_function_parameters(func) + component_spec['implementation'] = {} + component_spec['implementation']['container'] = {} + component_spec['implementation']['container']['image'] = PLACEHOLDER_IMAGE + component_spec['implementation']['container']['command'] = get_packages_to_install_command(func, packages_to_install) + component_spec['implementation']['container']['args'] = ['--executor_input', + {'executorInput': None}, + '--function_to_execute', + name] + # Write component yaml + filename = CACHE_DIR + f'/{name}.yaml' + make_dirs([CACHE_DIR]) + write_yaml_file(filename, component_spec, 'w') + + +def get_packages_to_install_command(func: Optional[Callable] = None, + packages_to_install: Optional[List[str]] = None): + """Returns a list of formatted list of commands, including code for tmp storage. + + Args: + func: The python function to create a component from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + packages_to_install: A list of optional packages to install before + executing func. These will always be installed at component runtime. + """ + newline = '\n' + if not packages_to_install: + packages_to_install = [] + concat_package_list = ' '.join([repr(str(package)) for package in packages_to_install]) + install_python_packages_script = ( + f'''if ! [ -x "$(command -v pip)" ]; then{newline}''' + f''' python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip{newline}''' + f'''fi{newline}''' + f'''PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \{newline}''' + f''' --no-warn-script-location {concat_package_list} && "$0" "$@"{newline}''' + f'''{newline}''') + src_code = get_function_source_definition(func) + return ['sh', '-c', install_python_packages_script, src_code] + + +def get_function_return_types(func: Callable) -> list: + """Returns a formatted list of function return types. + + Args: + func: The python function to create a component from. The function + can optionally have type annotations for its return values. + Returns: + list: return value list with types converted to kubeflow spec. + Raises: + Exception: If return type is provided and not a NamedTuple. + """ + annotation = inspect.signature(func).return_annotation + if maybe_strip_optional_from_annotation(annotation) is not annotation: + raise TypeError('Return type cannot be Optional.') + + # No annotations provided + # pylint: disable=protected-access + if annotation == inspect._empty: + return None + + if not (hasattr(annotation,'__annotations__') and isinstance(annotation.__annotations__, dict)): + raise TypeError(f'''Return type hint for function "{func.__name__}" must be a NamedTuple.''') + + outputs = [] + for name, type_ in annotation.__annotations__.items(): + metadata = {} + metadata['name'] = name + metadata['type'] = type_ + metadata['description'] = None + outputs.append(metadata) + return update_params(outputs) + + +def get_function_parameters(func: Callable) -> list: + """Returns a formatted list of parameters. + + Args: + func: The python function to create a component from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + Returns: + list: Params list with types converted to kubeflow spec. + Raises: + Exception: If parameter type hints are not provided. + """ + signature = inspect.signature(func) + parameters = list(signature.parameters.values()) + parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) + doc_dict = {p.arg_name: p.description for p in parsed_docstring.params} + + # Extract parameter metadata + parameter_holder = [] + for param in parameters: + metadata = {} + metadata['name'] = param.name + metadata['description'] = doc_dict.get(param.name) + metadata['type'] = maybe_strip_optional_from_annotation( + param.annotation) + parameter_holder.append(metadata) + # pylint: disable=protected-access + if metadata['type'] == inspect._empty: + raise TypeError( + f'''Missing type hint for parameter "{metadata['name']}". ''' + f'''Please specify the type for this parameter.''') + return update_params(parameter_holder) + + +def maybe_strip_optional_from_annotation(annotation: T) -> T: + """Strips 'Optional' from 'Optional[]' if applicable. + For example:: + Optional[str] -> str + str -> str + List[int] -> List[int] + Args: + annotation: The original type annotation which may or may not has `Optional`. + Returns: + The type inside Optional[] if Optional exists, otherwise the original type. + """ + if getattr(annotation, '__origin__', None) is Union and annotation.__args__[1] is type(None): + return annotation.__args__[0] + else: + return annotation + + +def create_pipeline_scaffold(func: Optional[Callable] = None, + *, + name: Optional[str] = None, + description: Optional[str] = None): + """Creates a temporary pipeline scaffold which will + be used by the formalize function. + + Args: + func: The python function to create a pipeline from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + name: The name of the pipeline. + description: Short description of what the pipeline does. + """ + pipeline_scaffold = (get_pipeline_decorator(name, description) + + get_function_source_definition(func) + + get_compile_step(func.__name__)) + make_dirs([CACHE_DIR]) # if it doesn't already exist + write_file(PIPELINE_CACHE_FILE, pipeline_scaffold, 'w') + + +def get_pipeline_decorator(name: Optional[str] = None, + description: Optional[str] = None): + """Creates the kfp pipeline decorator. + + Args: + name: The name of the pipeline. + description: Short description of what the pipeline does. + + Returns: + str: Python compile function call. + """ + default_name = DEFAULT_PIPELINE_NAME if not name else name + name_str = f'''(\n name='{default_name}',\n''' + desc_str = f''' description='{description}',\n''' if description else '' + ending_str = ')\n' + return '@dsl.pipeline' + name_str + desc_str + ending_str + + +def get_compile_step(func_name: str): + """Creates the compile function call. + + Args: + func_name: The name of the pipeline function. + + Returns: + str: Python compile function call. + """ + return ( + f'\n' + f'compiler.Compiler().compile(\n' + f' pipeline_func={func_name},\n' + f' package_path=pipeline_job_spec_path)\n' + f'\n' + ) + diff --git a/google_cloud_automlops/orchestration/scaffold.py b/google_cloud_automlops/orchestration/scaffold.py new file mode 100644 index 0000000..cf2b7a7 --- /dev/null +++ b/google_cloud_automlops/orchestration/scaffold.py @@ -0,0 +1,253 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Builds temporary component scaffold yaml files.""" + +# pylint: disable=anomalous-backslash-in-string +# pylint: disable=C0103 +# pylint: disable=line-too-long + +import inspect +from typing import Callable, List, Optional, TypeVar, Union + +import docstring_parser + +from google_cloud_automlops.utils.constants import ( + DEFAULT_PIPELINE_NAME, + PLACEHOLDER_IMAGE, + PIPELINE_CACHE_FILE, + CACHE_DIR +) +from google_cloud_automlops.utils.utils import ( + get_function_source_definition, + make_dirs, + update_params, + write_file, + write_yaml_file +) + +T = TypeVar('T') + + +def create_component_scaffold(func: Optional[Callable] = None, + *, + packages_to_install: Optional[List[str]] = None): + """Creates a tmp component scaffold which will be used by the formalize function. + Code is temporarily stored in component_spec['implementation']['container']['command']. + + Args: + func: The python function to create a component from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + packages_to_install: A list of optional packages to install before + executing func. These will always be installed at component runtime. + """ + # Extract name, docstring, and component description + name = func.__name__ + parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) + description = parsed_docstring.short_description + + # Instantiate component yaml attributes + component_spec = {} + component_spec['name'] = name + if description: + component_spec['description'] = description + outputs = get_function_return_types(func) + if outputs: + component_spec['outputs'] = outputs + component_spec['inputs'] = get_function_parameters(func) + component_spec['implementation'] = {} + component_spec['implementation']['container'] = {} + component_spec['implementation']['container']['image'] = PLACEHOLDER_IMAGE + component_spec['implementation']['container']['command'] = get_packages_to_install_command(func, packages_to_install) + component_spec['implementation']['container']['args'] = ['--executor_input', + {'executorInput': None}, + '--function_to_execute', + name] + # Write component yaml + filename = CACHE_DIR + f'/{name}.yaml' + make_dirs([CACHE_DIR]) + write_yaml_file(filename, component_spec, 'w') + + +def get_packages_to_install_command(func: Optional[Callable] = None, + packages_to_install: Optional[List[str]] = None): + """Returns a list of formatted list of commands, including code for tmp storage. + + Args: + func: The python function to create a component from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + packages_to_install: A list of optional packages to install before + executing func. These will always be installed at component runtime. + """ + newline = '\n' + if not packages_to_install: + packages_to_install = [] + concat_package_list = ' '.join([repr(str(package)) for package in packages_to_install]) + install_python_packages_script = ( + f'''if ! [ -x "$(command -v pip)" ]; then{newline}''' + f''' python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip{newline}''' + f'''fi{newline}''' + f'''PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \{newline}''' + f''' --no-warn-script-location {concat_package_list} && "$0" "$@"{newline}''' + f'''{newline}''') + src_code = get_function_source_definition(func) + return ['sh', '-c', install_python_packages_script, src_code] + + +def get_function_return_types(func: Callable) -> list: + """Returns a formatted list of function return types. + + Args: + func: The python function to create a component from. The function + can optionally have type annotations for its return values. + Returns: + list: return value list with types converted to kubeflow spec. + Raises: + Exception: If return type is provided and not a NamedTuple. + """ + annotation = inspect.signature(func).return_annotation + if maybe_strip_optional_from_annotation(annotation) is not annotation: + raise TypeError('Return type cannot be Optional.') + + # No annotations provided + # pylint: disable=protected-access + if annotation == inspect._empty: + return None + + if not (hasattr(annotation,'__annotations__') and isinstance(annotation.__annotations__, dict)): + raise TypeError(f'''Return type hint for function "{func.__name__}" must be a NamedTuple.''') + + outputs = [] + for name, type_ in annotation.__annotations__.items(): + metadata = {} + metadata['name'] = name + metadata['type'] = type_ + metadata['description'] = None + outputs.append(metadata) + return update_params(outputs) + + +def get_function_parameters(func: Callable) -> list: + """Returns a formatted list of parameters. + + Args: + func: The python function to create a component from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + Returns: + list: Params list with types converted to kubeflow spec. + Raises: + Exception: If parameter type hints are not provided. + """ + signature = inspect.signature(func) + parameters = list(signature.parameters.values()) + parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) + doc_dict = {p.arg_name: p.description for p in parsed_docstring.params} + + # Extract parameter metadata + parameter_holder = [] + for param in parameters: + metadata = {} + metadata['name'] = param.name + metadata['description'] = doc_dict.get(param.name) + metadata['type'] = maybe_strip_optional_from_annotation( + param.annotation) + parameter_holder.append(metadata) + # pylint: disable=protected-access + if metadata['type'] == inspect._empty: + raise TypeError( + f'''Missing type hint for parameter "{metadata['name']}". ''' + f'''Please specify the type for this parameter.''') + return update_params(parameter_holder) + + +def maybe_strip_optional_from_annotation(annotation: T) -> T: + """Strips 'Optional' from 'Optional[]' if applicable. + For example:: + Optional[str] -> str + str -> str + List[int] -> List[int] + Args: + annotation: The original type annotation which may or may not has `Optional`. + Returns: + The type inside Optional[] if Optional exists, otherwise the original type. + """ + if getattr(annotation, '__origin__', None) is Union and annotation.__args__[1] is type(None): + return annotation.__args__[0] + else: + return annotation + + +def create_pipeline_scaffold(func: Optional[Callable] = None, + *, + name: Optional[str] = None, + description: Optional[str] = None): + """Creates a temporary pipeline scaffold which will + be used by the formalize function. + + Args: + func: The python function to create a pipeline from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + name: The name of the pipeline. + description: Short description of what the pipeline does. + """ + pipeline_scaffold = (get_pipeline_decorator(name, description) + + get_function_source_definition(func) + + get_compile_step(func.__name__)) + make_dirs([CACHE_DIR]) # if it doesn't already exist + write_file(PIPELINE_CACHE_FILE, pipeline_scaffold, 'w') + + +def get_pipeline_decorator(name: Optional[str] = None, + description: Optional[str] = None): + """Creates the kfp pipeline decorator. + + Args: + name: The name of the pipeline. + description: Short description of what the pipeline does. + + Returns: + str: Python compile function call. + """ + default_name = DEFAULT_PIPELINE_NAME if not name else name + name_str = f'''(\n name='{default_name}',\n''' + desc_str = f''' description='{description}',\n''' if description else '' + ending_str = ')\n' + return '@dsl.pipeline' + name_str + desc_str + ending_str + + +def get_compile_step(func_name: str): + """Creates the compile function call. + + Args: + func_name: The name of the pipeline function. + + Returns: + str: Python compile function call. + """ + return ( + f'\n' + f'compiler.Compiler().compile(\n' + f' pipeline_func={func_name},\n' + f' package_path=pipeline_job_spec_path)\n' + f'\n' + ) + From 2ab4bfee712d5e54867cfc18bd963dc20a90a72c Mon Sep 17 00:00:00 2001 From: Allegra Noto Date: Tue, 20 Feb 2024 11:51:49 -0500 Subject: [PATCH 04/11] Added globals for comps, pipelines --- google_cloud_automlops/AutoMLOps.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/google_cloud_automlops/AutoMLOps.py b/google_cloud_automlops/AutoMLOps.py index 50d612f..b3c02a2 100644 --- a/google_cloud_automlops/AutoMLOps.py +++ b/google_cloud_automlops/AutoMLOps.py @@ -67,6 +67,10 @@ from google_cloud_automlops.orchestration.configs import ( KfpConfig ) + +from google_cloud_automlops.orchestration import Component, Pipeline, Services +from google_cloud_automlops.orchestration.kfp import KFPComponent, KFPPipeline, KFPServices + # Provisioning imports from google_cloud_automlops.provisioning.pulumi import builder as PulumiBuilder from google_cloud_automlops.provisioning.terraform import builder as TerraformBuilder @@ -91,13 +95,20 @@ ) from google_cloud_automlops.deployments.gitops.git_utils import git_workflow +# Set up logging logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s') logging.getLogger('googleapiclient').setLevel(logging.WARNING) logger = logging.getLogger() +# Create output directory make_dirs([OUTPUT_DIR]) +# Set up global dictionaries to hold pipeline and components +global components, pipeline +components = {} +pipeline = None + def launchAll( project_id: str, pipeline_params: Dict, From d08149181fc176749e3ebfece3b326a4252ac79d Mon Sep 17 00:00:00 2001 From: Allegra Noto Date: Mon, 26 Feb 2024 13:13:52 -0500 Subject: [PATCH 05/11] Initial working code, need to add Services to AutoMLOps.py and incorporate model monitoring --- google_cloud_automlops/AutoMLOps.py | 52 +- .../orchestration/Component.py | 11 +- .../orchestration/Pipeline.py | 47 +- .../orchestration/Services.py | 19 +- .../orchestration/builder.py | 522 ------------------ .../orchestration/configs.py | 39 -- .../orchestration/kfp/KFPComponent.py | 10 +- .../orchestration/kfp/KFPPipeline.py | 26 +- .../orchestration/kfp/KFPServices.py | 13 + .../orchestration/kfp/builder.py | 377 ------------- .../orchestration/kfp/scaffold.py | 253 --------- .../orchestration/scaffold.py | 253 --------- 12 files changed, 137 insertions(+), 1485 deletions(-) delete mode 100644 google_cloud_automlops/orchestration/builder.py delete mode 100644 google_cloud_automlops/orchestration/configs.py delete mode 100644 google_cloud_automlops/orchestration/kfp/builder.py delete mode 100644 google_cloud_automlops/orchestration/kfp/scaffold.py delete mode 100644 google_cloud_automlops/orchestration/scaffold.py diff --git a/google_cloud_automlops/AutoMLOps.py b/google_cloud_automlops/AutoMLOps.py index b3c02a2..08db000 100644 --- a/google_cloud_automlops/AutoMLOps.py +++ b/google_cloud_automlops/AutoMLOps.py @@ -68,8 +68,12 @@ KfpConfig ) -from google_cloud_automlops.orchestration import Component, Pipeline, Services -from google_cloud_automlops.orchestration.kfp import KFPComponent, KFPPipeline, KFPServices +from google_cloud_automlops.orchestration.Component import Component +from google_cloud_automlops.orchestration.Pipeline import Pipeline +from google_cloud_automlops.orchestration.Services import Services +from google_cloud_automlops.orchestration.kfp.KFPComponent import KFPComponent +from google_cloud_automlops.orchestration.kfp.KFPPipeline import KFPPipeline +from google_cloud_automlops.orchestration.kfp.KFPServices import KFPServices # Provisioning imports from google_cloud_automlops.provisioning.pulumi import builder as PulumiBuilder @@ -105,9 +109,8 @@ make_dirs([OUTPUT_DIR]) # Set up global dictionaries to hold pipeline and components -global components, pipeline -components = {} -pipeline = None +global components_dict +components_dict = {} def launchAll( project_id: str, @@ -340,12 +343,19 @@ def generate( logging.info(f'Writing scripts to {BASE_DIR}scripts') if use_ci: logging.info(f'Writing submission service code to {BASE_DIR}services') - KfpBuilder.build(KfpConfig( - base_image=base_image, - custom_training_job_specs=derived_custom_training_job_specs, - pipeline_params=pipeline_params, - pubsub_topic_name=derived_pubsub_topic_name, - use_ci=use_ci)) + logging.info("Writing pipleine code.") + kfppipe = KFPPipeline(func=pipeline_glob.func, + name=pipeline_glob.name, + description=pipeline_glob.description, + comps_dict=components_dict) + kfppipe.build(base_image, + custom_training_job_specs, + pipeline_params, + pubsub_topic_name, + use_ci) + for comp in kfppipe.comps: + logging.info(f"Writing code for component {comp.name}") + KFPComponent(func=comp.func, packages_to_install=comp.packages_to_install).build() # Generate files required to provision resources if provisioning_framework == Provisioner.GCLOUD.value: @@ -519,7 +529,7 @@ def deploy( # Log generated resources resources_generation_manifest(defaults) - +# TODO: Replace with component object creation def component(func: Optional[Callable] = None, *, packages_to_install: Optional[List[str]] = None): @@ -543,11 +553,13 @@ def my_function_one(input: str, output: Output[Model]): component, packages_to_install=packages_to_install) else: - return KfpScaffold.create_component_scaffold( + components_dict[func.__name__] = Component( func=func, - packages_to_install=packages_to_install) - + packages_to_install=packages_to_install + ) + return +# TODO: Replace with pipeline object creation def pipeline(func: Optional[Callable] = None, *, name: Optional[str] = None, @@ -581,10 +593,12 @@ def pipeline(bq_table: str, name=name, description=description) else: - return KfpScaffold.create_pipeline_scaffold( - func=func, - name=name, - description=description) + global pipeline_glob + pipeline_glob = Pipeline(func=func, + name=name, + description=description, + comps_dict=components_dict) + return def clear_cache(): diff --git a/google_cloud_automlops/orchestration/Component.py b/google_cloud_automlops/orchestration/Component.py index bf91204..123f479 100644 --- a/google_cloud_automlops/orchestration/Component.py +++ b/google_cloud_automlops/orchestration/Component.py @@ -18,7 +18,6 @@ # pylint: disable=C0103 # pylint: disable=line-too-long -from abc import ABC, abstractmethod import docstring_parser import inspect from typing import Callable, List, Optional, TypeVar, Union @@ -32,7 +31,7 @@ T = TypeVar('T') -class Component(ABC): +class Component(): """The Component object represents a component defined by the user. Args: @@ -75,7 +74,12 @@ def __init__(self, self.return_types = self._get_function_return_types() self.src_code = get_function_source_definition(self.func) - @abstractmethod + # Instantiate attributes to be set during build + self.artifact_repo_location = None + self.artifact_repo_name = None + self.project_id = None + self.naming_prefix = None + def build(self): """Instantiates an abstract built method to create and write task files. Also reads in defaults file to save default arguments to attributes. @@ -85,6 +89,7 @@ def build(self): self.artifact_repo_name = defaults['gcp']['artifact_repo_name'] self.project_id = defaults['gcp']['project_id'] self.naming_prefix = defaults['gcp']['naming_prefix'] + raise NotImplementedError("Subclass needs to define this.") def _get_function_return_types(self) -> list: """Returns a formatted list of function return types. diff --git a/google_cloud_automlops/orchestration/Pipeline.py b/google_cloud_automlops/orchestration/Pipeline.py index 112c0bb..d52eaff 100644 --- a/google_cloud_automlops/orchestration/Pipeline.py +++ b/google_cloud_automlops/orchestration/Pipeline.py @@ -18,7 +18,8 @@ # pylint: disable=C0103 # pylint: disable=line-too-long -from abc import ABC, abstractmethod +import ast +import inspect from typing import Callable, Optional from google_cloud_automlops.utils.constants import ( @@ -31,7 +32,7 @@ ) -class Pipeline(ABC): +class Pipeline(): """The Pipeline object represents a component defined by the user. Args: @@ -42,7 +43,8 @@ def __init__(self, func: Optional[Callable] = None, *, name: Optional[str] = None, - description: Optional[str] = None): + description: Optional[str] = None, + comps_dict: dict): """Initiates a pipeline object created out of a function holding all necessary code. @@ -53,14 +55,26 @@ def __init__(self, a plain parameter, or a path to a file). name: The name of the pipeline. description: Short description of what the pipeline does. + comps_list: Dictionary of potential components for pipeline to utilize imported + as the global held in AutoMLOps.py. """ + # Instantiate and set key pipeline attributes self.func = func self.func_name = func.__name__ self.name = DEFAULT_PIPELINE_NAME if not name else name self.description = description self.src_code = get_function_source_definition(self.func) + self.comps = self.get_pipeline_components(func, comps_dict) + + # Instantiate attributes to be set at build process + self.base_image = None + self.custom_training_job_specs = None + self.pipeline_params = None + self.pubsub_topic_name = None + self.use_ci = None + self.project_id = None + self.gs_pipeline_job_spec_path = None - @abstractmethod def build(self, base_image, custom_training_job_specs, @@ -94,6 +108,31 @@ def build(self, self.project_id = defaults['gcp']['project_id'] self.gs_pipeline_job_spec_path = defaults['pipelines']['gs_pipeline_job_spec_path'] + raise NotImplementedError("Subclass needs to define this.") + + def get_pipeline_components(self, pipeline_func: Callable, comps_dict: dict): + """Returns a list of components used within a given pipeline. + + Args: + pipeline_func (Callable): Pipeline function. + comps_dict (dict): List of potential components to use within pipeline. + + Returns: + List: Components from comps_dict used within the pipeline_func. + """ + #Returns a list of components used within a given pipeline. + code = inspect.getsource(pipeline_func) + ast_tree = ast.parse(code) + comps_list = [] + for node in ast.walk(ast_tree): + try: + if isinstance(node, ast.Call) and node.func.id in comps_dict.keys(): + comps_list.append(comps_dict[node.func.id]) + except Exception: + pass + return comps_list + + class FuturePipeline(): """Placeholder for future pipeline object that will be created out of a list of components. """ diff --git a/google_cloud_automlops/orchestration/Services.py b/google_cloud_automlops/orchestration/Services.py index e59fca8..97701c5 100644 --- a/google_cloud_automlops/orchestration/Services.py +++ b/google_cloud_automlops/orchestration/Services.py @@ -18,8 +18,6 @@ # pylint: disable=C0103 # pylint: disable=line-too-long -from abc import ABC, abstractmethod - from google_cloud_automlops.utils.utils import read_yaml_file from google_cloud_automlops.utils.constants import ( BASE_DIR, @@ -27,7 +25,7 @@ ) -class Services(ABC): +class Services(): """The Services object will contain TODO: fill out what this does Args: @@ -37,6 +35,15 @@ class Services(ABC): def __init__(self) -> None: """Instantiates a generic Services object. """ + self.pipeline_storage_path = None + self.pipeline_job_runner_service_account = None + self.pipeline_job_submission_service_type = None + self.project_id = None + self.pipeline_job_submission_service_type = None + + # Set directory for files to be written to + self.submission_service_base_dir = BASE_DIR + 'services/submission_service' + def build(self): """Constructs and writes a Dockerfile, requirements.txt, and @@ -58,17 +65,17 @@ def build(self): self._build_dockerfile() self._build_requirements() - @abstractmethod def _build_dockerfile(self): """Abstract method to create the Dockerfile file of the services/submission_service directory. """ + raise NotImplementedError("Subclass needs to define this.") - @abstractmethod def _build_requirements(self): """Abstract method to create the requirements.txt file of the services/submission_service directory. """ + raise NotImplementedError("Subclass needs to define this.") - @abstractmethod def _build_main(self): """Abstract method to create the main.py file of the services/submission_service directory. """ + raise NotImplementedError("Subclass needs to define this.") diff --git a/google_cloud_automlops/orchestration/builder.py b/google_cloud_automlops/orchestration/builder.py deleted file mode 100644 index c121954..0000000 --- a/google_cloud_automlops/orchestration/builder.py +++ /dev/null @@ -1,522 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Builds KFP components and pipeline.""" - -# pylint: disable=line-too-long - -import json -try: - from importlib.resources import files as import_files -except ImportError: - # Try backported to PY<37 `importlib_resources` - from importlib_resources import files as import_files -import re -import textwrap - -from jinja2 import Template - -from google_cloud_automlops.utils.utils import ( - execute_process, - get_components_list, - make_dirs, - read_file, - read_yaml_file, - is_using_kfp_spec, - write_and_chmod, - write_file, - write_yaml_file -) -from google_cloud_automlops.utils.constants import ( - BASE_DIR, - GENERATED_BUILD_COMPONENTS_SH_FILE, - GENERATED_DEFAULTS_FILE, - GENERATED_COMPONENT_BASE, - GENERATED_LICENSE, - GENERATED_PARAMETER_VALUES_PATH, - GENERATED_PIPELINE_FILE, - GENERATED_PIPELINE_REQUIREMENTS_FILE, - GENERATED_PIPELINE_RUNNER_FILE, - GENERATED_PIPELINE_SPEC_SH_FILE, - GENERATED_PUBLISH_TO_TOPIC_FILE, - GENERATED_RUN_PIPELINE_SH_FILE, - GENERATED_RUN_ALL_SH_FILE, - KFP_TEMPLATES_PATH, - PINNED_KFP_VERSION, - PIPELINE_CACHE_FILE -) -from google_cloud_automlops.orchestration.configs import KfpConfig - -def build(config: KfpConfig): - """Constructs files for running and managing Kubeflow pipelines. - - Args: - config.base_image: The image to use in the component base dockerfile. - config.custom_training_job_specs: Specifies the specs to run the training job with. - config.pipeline_params: Dictionary containing runtime pipeline parameters. - config.pubsub_topic_name: The name of the pubsub topic to publish to. - config.use_ci: Flag that determines whether to use Cloud Run CI/CD. - """ - - # Write scripts for building pipeline, building components, running pipeline, and running all files - write_and_chmod(GENERATED_PIPELINE_SPEC_SH_FILE, build_pipeline_spec_jinja()) - write_and_chmod(GENERATED_BUILD_COMPONENTS_SH_FILE, build_components_jinja()) - write_and_chmod(GENERATED_RUN_PIPELINE_SH_FILE, run_pipeline_jinja()) - write_and_chmod(GENERATED_RUN_ALL_SH_FILE, run_all_jinja()) - if config.use_ci: - write_and_chmod(GENERATED_PUBLISH_TO_TOPIC_FILE, publish_to_topic_jinja(pubsub_topic_name=config.pubsub_topic_name)) - - # Create components and pipelines - components_path_list = get_components_list(full_path=True) - for path in components_path_list: - build_component(path) - build_pipeline(config.custom_training_job_specs, config.pipeline_params) - - # Write empty .gitkeep to pipeline_spec directory - write_file(f'{BASE_DIR}scripts/pipeline_spec/.gitkeep', '', 'w') - - # Write readme.md to description the contents of the directory - write_file(f'{BASE_DIR}README.md', readme_jinja(config.use_ci), 'w') - - # Write dockerfile to the component base directory - write_file(f'{GENERATED_COMPONENT_BASE}/Dockerfile', component_base_dockerfile_jinja(config.base_image), 'w') - - # Write requirements.txt to the component base directory - write_file(f'{GENERATED_COMPONENT_BASE}/requirements.txt', create_component_base_requirements(), 'w') - - # Build the submission service files - if config.use_ci: - build_services() - - -def build_component(component_path: str): - """Constructs and writes component.yaml and {component_name}.py files. - component.yaml: Contains the Kubeflow custom component definition. - {component_name}.py: Contains the python code from the Jupyter cell. - - Args: - component_path: Path to the temporary component yaml. This file - is used to create the permanent component.yaml, and deleted - after calling AutoMLOps.generate(). - """ - # Retrieve defaults vars - defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) - - # Read in component specs - component_spec = read_yaml_file(component_path) - kfp_spec_bool = is_using_kfp_spec(component_spec['implementation']['container']['image']) - custom_code_contents = component_spec['implementation']['container']['command'][-1] - compspec_image = ( - f'''{defaults['gcp']['artifact_repo_location']}-docker.pkg.dev/''' - f'''{defaults['gcp']['project_id']}/''' - f'''{defaults['gcp']['artifact_repo_name']}/''' - f'''{defaults['gcp']['naming_prefix']}/''' - f'''components/component_base:latest''') - - # If using kfp, remove spaces in name and convert to lowercase - if kfp_spec_bool: - component_spec['name'] = component_spec['name'].replace(' ', '_').lower() - - # Set and create directory for component, and set directory for task - component_dir = BASE_DIR + 'components/' + component_spec['name'] - make_dirs([component_dir]) - task_filepath = (BASE_DIR - + 'components/component_base/src/' - + component_spec['name'] - + '.py') - - # Write task script to component base - write_file(task_filepath, component_base_task_file_jinja(custom_code_contents, kfp_spec_bool), 'w') - - # Update component_spec to include correct image and startup command - component_spec['implementation']['container']['image'] = compspec_image - component_spec['implementation']['container']['command'] = [ - 'python3', - f'''/pipelines/component/src/{component_spec['name']+'.py'}'''] - - # Write license and component spec to the appropriate component.yaml file - filename = component_dir + '/component.yaml' - write_file(filename, GENERATED_LICENSE, 'w') - write_yaml_file(filename, component_spec, 'a') - - -def build_pipeline(custom_training_job_specs: list, - pipeline_parameter_values: dict): - """Constructs and writes pipeline.py, pipeline_runner.py, and pipeline_parameter_values.json files. - pipeline.py: Generates a Kubeflow pipeline spec from custom components. - pipeline_runner.py: Sends a PipelineJob to Vertex AI using pipeline spec. - pipeline_parameter_values.json: Provides runtime parameters for the PipelineJob. - - Args: - custom_training_job_specs: Specifies the specs to run the training job with. - pipeline_parameter_values: Dictionary of runtime parameters for the PipelineJob. - """ - defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) - # Get the names of the components - components_list = get_components_list(full_path=False) - # Read pipeline definition - pipeline_scaffold_contents = read_file(PIPELINE_CACHE_FILE) - # Add indentation - pipeline_scaffold_contents = textwrap.indent(pipeline_scaffold_contents, 4 * ' ') - # Construct pipeline.py - project_id = defaults['gcp']['project_id'] - write_file(GENERATED_PIPELINE_FILE, pipeline_jinja( - components_list, - custom_training_job_specs, - pipeline_scaffold_contents, - project_id=project_id), 'w') - # Construct pipeline_runner.py - write_file(GENERATED_PIPELINE_RUNNER_FILE, pipeline_runner_jinja(), 'w') - # Construct requirements.txt - write_file(GENERATED_PIPELINE_REQUIREMENTS_FILE, pipeline_requirements_jinja(), 'w') - # Add pipeline_spec_path to dict - pipeline_parameter_values['gs_pipeline_spec_path'] = defaults['pipelines']['gs_pipeline_job_spec_path'] - # Construct pipeline_parameter_values.json - serialized_params = json.dumps(pipeline_parameter_values, indent=4) - write_file(BASE_DIR + GENERATED_PARAMETER_VALUES_PATH, serialized_params, 'w') - - -def build_services(): - """Constructs and writes a Dockerfile, requirements.txt, and - main.py to the services/submission_service directory. - """ - # Retrieve defaults vars - defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) - - # Set new folders as variables - submission_service_base = BASE_DIR + 'services/submission_service' - - # Write cloud run dockerfile - write_file(f'{submission_service_base}/Dockerfile', submission_service_dockerfile_jinja(), 'w') - - # Write requirements files for cloud run base and queueing svc - write_file(f'{submission_service_base}/requirements.txt', submission_service_requirements_jinja( - pipeline_job_submission_service_type=defaults['gcp']['pipeline_job_submission_service_type']), 'w') - - # Write main code files for cloud run base and queueing svc - write_file(f'{submission_service_base}/main.py', submission_service_main_jinja( - pipeline_root=defaults['pipelines']['pipeline_storage_path'], - pipeline_job_runner_service_account=defaults['gcp']['pipeline_job_runner_service_account'], - pipeline_job_submission_service_type=defaults['gcp']['pipeline_job_submission_service_type'], - project_id=defaults['gcp']['project_id']), 'w') - - -def create_component_base_requirements(): - """Writes a requirements.txt to the component_base directory. - Infers pip requirements from the python srcfiles using - pipreqs. Takes user-inputted requirements, and addes some - default gcp packages as well as packages that are often missing - in setup.py files (e.g db_types, pyarrow, gcsfs, fsspec). - """ - reqs_filename = f'{GENERATED_COMPONENT_BASE}/requirements.txt' - default_gcp_reqs = [ - 'google-cloud-aiplatform', - 'google-cloud-appengine-logging', - 'google-cloud-audit-log', - 'google-cloud-bigquery', - 'google-cloud-bigquery-storage', - 'google-cloud-bigtable', - 'google-cloud-core', - 'google-cloud-dataproc', - 'google-cloud-datastore', - 'google-cloud-dlp', - 'google-cloud-firestore', - 'google-cloud-kms', - 'google-cloud-language', - 'google-cloud-logging', - 'google-cloud-monitoring', - 'google-cloud-notebooks', - 'google-cloud-pipeline-components', - 'google-cloud-pubsub', - 'google-cloud-pubsublite', - 'google-cloud-recommendations-ai', - 'google-cloud-resource-manager', - 'google-cloud-scheduler', - 'google-cloud-spanner', - 'google-cloud-speech', - 'google-cloud-storage', - 'google-cloud-tasks', - 'google-cloud-translate', - 'google-cloud-videointelligence', - 'google-cloud-vision', - 'db_dtypes', - 'pyarrow', - 'gcsfs', - 'fsspec'] - # Get user-inputted requirements from the cache dir - user_inp_reqs = [] - components_path_list = get_components_list() - for component_path in components_path_list: - component_spec = read_yaml_file(component_path) - reqs = component_spec['implementation']['container']['command'][2] - formatted_reqs = re.findall('\'([^\']*)\'', reqs) - user_inp_reqs.extend(formatted_reqs) - # Check if user inputted requirements - if user_inp_reqs: - # Remove duplicates - set_of_requirements = set(user_inp_reqs) - else: - # If user did not input requirements, then infer reqs using pipreqs - execute_process(f'python3 -m pipreqs.pipreqs {GENERATED_COMPONENT_BASE} --mode no-pin --force', to_null=True) - pipreqs = read_file(reqs_filename).splitlines() - set_of_requirements = set(pipreqs + default_gcp_reqs) - # Remove empty string - if '' in set_of_requirements: - set_of_requirements.remove('') - # Pin kfp version - if 'kfp' in set_of_requirements: - set_of_requirements.remove('kfp') - set_of_requirements.add(PINNED_KFP_VERSION) - # Stringify and sort - reqs_str = ''.join(r+'\n' for r in sorted(set_of_requirements)) - return reqs_str - - -def build_pipeline_spec_jinja() -> str: - """Generates code for build_pipeline_spec.sh which builds the pipeline specs. - - Returns: - str: build_pipeline_spec.sh script. - """ - template_file = import_files(KFP_TEMPLATES_PATH + '.scripts') / 'build_pipeline_spec.sh.j2' - with template_file.open('r', encoding='utf-8') as f: - template = Template(f.read()) - return template.render( - generated_license=GENERATED_LICENSE, - base_dir=BASE_DIR) - - -def build_components_jinja() -> str: - """Generates code for build_components.sh which builds the components. - - Returns: - str: build_components.sh script. - """ - template_file = import_files(KFP_TEMPLATES_PATH + '.scripts') / 'build_components.sh.j2' - with template_file.open('r', encoding='utf-8') as f: - template = Template(f.read()) - return template.render( - generated_license=GENERATED_LICENSE, - base_dir=BASE_DIR) - - -def run_pipeline_jinja() -> str: - """Generates code for run_pipeline.sh which runs the pipeline locally. - - Returns: - str: run_pipeline.sh script. - """ - template_file = import_files(KFP_TEMPLATES_PATH + '.scripts') / 'run_pipeline.sh.j2' - with template_file.open('r', encoding='utf-8') as f: - template = Template(f.read()) - return template.render( - generated_license=GENERATED_LICENSE, - base_dir=BASE_DIR) - - -def run_all_jinja() -> str: - """Generates code for run_all.sh which builds runs all other shell scripts. - - Returns: - str: run_all.sh script. - """ - template_file = import_files(KFP_TEMPLATES_PATH + '.scripts') / 'run_all.sh.j2' - with template_file.open('r', encoding='utf-8') as f: - template = Template(f.read()) - return template.render( - generated_license=GENERATED_LICENSE, - base_dir=BASE_DIR) - - -def publish_to_topic_jinja(pubsub_topic_name: str) -> str: - """Generates code for publish_to_topic.sh which submits a message to the - pipeline job submission service. - - Args: - pubsub_topic_name: The name of the pubsub topic to publish to. - - Returns: - str: publish_to_topic.sh script. - """ - template_file = import_files(KFP_TEMPLATES_PATH + '.scripts') / 'publish_to_topic.sh.j2' - with template_file.open('r', encoding='utf-8') as f: - template = Template(f.read()) - return template.render( - base_dir=BASE_DIR, - generated_license=GENERATED_LICENSE, - generated_parameter_values_path=GENERATED_PARAMETER_VALUES_PATH, - pubsub_topic_name=pubsub_topic_name) - - -def readme_jinja(use_ci: str) -> str: - """Generates code for readme.md which is a readme markdown file to describe the contents of the - generated AutoMLOps code repo. - - Args: - use_ci: Flag that determines whether to use Cloud CI/CD. - - Returns: - str: README.md file. - """ - template_file = import_files(KFP_TEMPLATES_PATH) / 'README.md.j2' - with template_file.open('r', encoding='utf-8') as f: - template = Template(f.read()) - return template.render(use_ci=use_ci) - - -def component_base_dockerfile_jinja(base_image: str) -> str: - """Generates code for a Dockerfile to be written to the component_base directory. - - Args: - base_image: The image to use in the component base dockerfile. - - Returns: - str: Dockerfile file. - """ - template_file = import_files(KFP_TEMPLATES_PATH + '.components.component_base') / 'Dockerfile.j2' - with template_file.open('r', encoding='utf-8') as f: - template = Template(f.read()) - return template.render( - base_image=base_image, - generated_license=GENERATED_LICENSE) - - -def component_base_task_file_jinja(custom_code_contents: str, kfp_spec_bool: str) -> str: - """Generates code for the task.py file to be written to the component_base/src directory. - - Args: - custom_code_contents: Code inside of the component, specified by the user. - kfp_spec_bool: Boolean that specifies whether components are defined using kfp. - - Returns: - str: Contents of the task.py file. - """ - template_file = import_files(KFP_TEMPLATES_PATH + '.components.component_base.src') / 'task.py.j2' - with template_file.open('r', encoding='utf-8') as f: - template = Template(f.read()) - return template.render( - custom_code_contents=custom_code_contents, - generated_license=GENERATED_LICENSE, - kfp_spec_bool=kfp_spec_bool) - - -def pipeline_runner_jinja() -> str: - """Generates code for the pipeline_runner.py file to be written to the pipelines directory. - - Returns: - str: pipeline_runner.py file. - """ - template_file = import_files(KFP_TEMPLATES_PATH + '.pipelines') / 'pipeline_runner.py.j2' - with template_file.open('r', encoding='utf-8') as f: - template = Template(f.read()) - return template.render(generated_license=GENERATED_LICENSE) - - -def pipeline_jinja( - components_list: list, - custom_training_job_specs: list, - pipeline_scaffold_contents: str, - project_id: str) -> str: - """Generates code for the pipeline.py file to be written to the pipelines directory. - - Args: - components_list: Contains the names or paths of all component yamls in the dir. - custom_training_job_specs: Specifies the specs to run the training job with. - pipeline_scaffold_contents: The contents of the pipeline scaffold file, - which can be found at PIPELINE_CACHE_FILE. - project_id: The project ID. - - Returns: - str: pipeline.py file. - """ - template_file = import_files(KFP_TEMPLATES_PATH + '.pipelines') / 'pipeline.py.j2' - with template_file.open('r', encoding='utf-8') as f: - template = Template(f.read()) - return template.render( - components_list=components_list, - custom_training_job_specs=custom_training_job_specs, - generated_license=GENERATED_LICENSE, - pipeline_scaffold_contents=pipeline_scaffold_contents, - project_id=project_id) - - -def pipeline_requirements_jinja() -> str: - """Generates code for a requirements.txt to be written to the pipelines directory. - - Returns: - str: requirements.txt file for pipelines. - """ - template_file = import_files(KFP_TEMPLATES_PATH + '.pipelines') / 'requirements.txt.j2' - with template_file.open('r', encoding='utf-8') as f: - template = Template(f.read()) - return template.render(pinned_kfp_version=PINNED_KFP_VERSION) - - -def submission_service_dockerfile_jinja() -> str: - """Generates code for a Dockerfile to be written to the serivces/submission_service directory. - - Returns: - str: Dockerfile file. - """ - template_file = import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'Dockerfile.j2' - with template_file.open('r', encoding='utf-8') as f: - template = Template(f.read()) - return template.render( - base_dir=BASE_DIR, - generated_license=GENERATED_LICENSE) - - -def submission_service_requirements_jinja(pipeline_job_submission_service_type: str) -> str: - """Generates code for a requirements.txt to be written to the serivces/submission_service directory. - - Args: - pipeline_job_submission_service_type: The tool to host for the cloud submission service (e.g. cloud run, cloud functions). - - Returns: - str: requirements.txt file for submission_service. - """ - template_file = import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'requirements.txt.j2' - with template_file.open('r', encoding='utf-8') as f: - template = Template(f.read()) - return template.render( - pinned_kfp_version=PINNED_KFP_VERSION, - pipeline_job_submission_service_type=pipeline_job_submission_service_type) - - -def submission_service_main_jinja( - pipeline_root: str, - pipeline_job_runner_service_account: str, - pipeline_job_submission_service_type: str, - project_id: str) -> str: - """Generates content for main.py to be written to the serivces/submission_service directory. - This file contains code for running a flask service that will act as a pipeline job submission service. - - Args: - pipeline_root: GS location where to store metadata from pipeline runs. - pipeline_job_runner_service_account: Service Account to runner PipelineJobs. - pipeline_job_submission_service_type: The tool to host for the cloud submission service (e.g. cloud run, cloud functions). - project_id: The project ID. - - Returns: - str: Content of serivces/submission_service main.py. - """ - template_file = import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'main.py.j2' - with template_file.open('r', encoding='utf-8') as f: - template = Template(f.read()) - return template.render( - generated_license=GENERATED_LICENSE, - pipeline_root=pipeline_root, - pipeline_job_runner_service_account=pipeline_job_runner_service_account, - pipeline_job_submission_service_type=pipeline_job_submission_service_type, - project_id=project_id) diff --git a/google_cloud_automlops/orchestration/configs.py b/google_cloud_automlops/orchestration/configs.py deleted file mode 100644 index 27c674b..0000000 --- a/google_cloud_automlops/orchestration/configs.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Model classes for AutoMLOps Orchestration Frameworks.""" - -# pylint: disable=C0103 -# pylint: disable=line-too-long - -from typing import Dict, List, Optional - -from pydantic import BaseModel - - -class KfpConfig(BaseModel): - """Model representing the KFP config. - - Args: - base_image: The image to use in the component base dockerfile. - custom_training_job_specs: Specifies the specs to run the training job with. - pipeline_params: Dictionary containing runtime pipeline parameters. - pubsub_topic_name: The name of the pubsub topic to publish to. - use_ci: Flag that determines whether to use Cloud Run CI/CD. - """ - base_image: str - custom_training_job_specs: Optional[List] - pipeline_params: Dict - pubsub_topic_name: str - use_ci: bool diff --git a/google_cloud_automlops/orchestration/kfp/KFPComponent.py b/google_cloud_automlops/orchestration/kfp/KFPComponent.py index 988a255..00ed7ff 100644 --- a/google_cloud_automlops/orchestration/kfp/KFPComponent.py +++ b/google_cloud_automlops/orchestration/kfp/KFPComponent.py @@ -29,12 +29,14 @@ from google_cloud_automlops.orchestration.Component import Component from google_cloud_automlops.utils.constants import ( BASE_DIR, + GENERATED_DEFAULTS_FILE, GENERATED_LICENSE, KFP_TEMPLATES_PATH, PLACEHOLDER_IMAGE, ) from google_cloud_automlops.utils.utils import ( make_dirs, + read_yaml_file, render_jinja, write_file, write_yaml_file @@ -77,7 +79,11 @@ def __init__(self, def build(self): """Constructs files for running and managing Kubeflow pipelines. """ - super().build() + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + self.artifact_repo_location = defaults['gcp']['artifact_repo_location'] + self.artifact_repo_name = defaults['gcp']['artifact_repo_name'] + self.project_id = defaults['gcp']['project_id'] + self.naming_prefix = defaults['gcp']['naming_prefix'] # Set and create directory for components if it does not already exist component_dir = BASE_DIR + 'components/' + self.component_spec['name'] @@ -111,7 +117,7 @@ def build(self): template_path=import_files(KFP_TEMPLATES_PATH + '.components.component_base.src') / 'task.py.j2', generated_license=GENERATED_LICENSE, kfp_spec_bool=kfp_spec_bool, - custom_code_content=custom_code_contents), + custom_code_contents=custom_code_contents), mode='w') # Update component_spec to include correct image and startup command diff --git a/google_cloud_automlops/orchestration/kfp/KFPPipeline.py b/google_cloud_automlops/orchestration/kfp/KFPPipeline.py index 07ef25a..323e423 100644 --- a/google_cloud_automlops/orchestration/kfp/KFPPipeline.py +++ b/google_cloud_automlops/orchestration/kfp/KFPPipeline.py @@ -44,6 +44,7 @@ BASE_DIR, GENERATED_BUILD_COMPONENTS_SH_FILE, GENERATED_COMPONENT_BASE, + GENERATED_DEFAULTS_FILE, GENERATED_LICENSE, GENERATED_PARAMETER_VALUES_PATH, GENERATED_PIPELINE_FILE, @@ -70,7 +71,8 @@ def __init__(self, func: Optional[Callable] = None, *, name: Optional[str] = None, - description: Optional[str] = None) -> None: + description: Optional[str] = None, + comps_dict: dict) -> None: """Initiates a KFP pipeline object created out of a function holding all necessary code. @@ -81,11 +83,14 @@ def __init__(self, a plain parameter, or a path to a file). name: The name of the pipeline. description: Short description of what the pipeline does. + comps_list: Dictionary of potential components for pipeline to utilize imported + as the global held in AutoMLOps.py. """ super().__init__( func=func, name=name, - description=description) + description=description, + comps_dict=comps_dict) # Create pipeline scaffold attribute # TODO: more descriptive self.pipeline_scaffold = ( @@ -119,11 +124,18 @@ def build(self, requirements.txt runtime_parameters/pipeline_parameter_values.json """ - super().build(base_image, - custom_training_job_specs, - pipeline_params, - pubsub_topic_name, - use_ci) + # Save parameters as attributes + self.base_image = base_image + self.custom_training_job_specs = custom_training_job_specs + self.pipeline_params = pipeline_params + self.pubsub_topic_name = pubsub_topic_name + self.use_ci = use_ci + + # Extract additional attributes from defaults file + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + self.project_id = defaults['gcp']['project_id'] + self.gs_pipeline_job_spec_path = defaults['pipelines']['gs_pipeline_job_spec_path'] + # Build necessary folders make_dirs([ diff --git a/google_cloud_automlops/orchestration/kfp/KFPServices.py b/google_cloud_automlops/orchestration/kfp/KFPServices.py index 0de439e..6b447d4 100644 --- a/google_cloud_automlops/orchestration/kfp/KFPServices.py +++ b/google_cloud_automlops/orchestration/kfp/KFPServices.py @@ -26,11 +26,13 @@ from google_cloud_automlops.orchestration.Services import Services from google_cloud_automlops.utils.utils import ( + read_yaml_file, render_jinja, write_file ) from google_cloud_automlops.utils.constants import ( BASE_DIR, + GENERATED_DEFAULTS_FILE, GENERATED_LICENSE, KFP_TEMPLATES_PATH, PINNED_KFP_VERSION @@ -51,6 +53,17 @@ def __init__(self) -> None: def _build_dockerfile(self): """Writes the services/submission_service/Dockerfile #TODO add more """ + # Read in defaults params + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + self.pipeline_storage_path = defaults['pipelines']['pipeline_storage_path'] + self.pipeline_job_runner_service_account = defaults['gcp']['pipeline_job_runner_service_account'] + self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] + self.project_id = defaults['gcp']['project_id'] + self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] + + # Set directory for files to be written to + self.submission_service_base_dir = BASE_DIR + 'services/submission_service' + write_file( f'{self.submission_service_base_dir}/Dockerfile', render_jinja( diff --git a/google_cloud_automlops/orchestration/kfp/builder.py b/google_cloud_automlops/orchestration/kfp/builder.py deleted file mode 100644 index 019eb59..0000000 --- a/google_cloud_automlops/orchestration/kfp/builder.py +++ /dev/null @@ -1,377 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Builds KFP components and pipeline.""" - -# pylint: disable=line-too-long - -import json -try: - from importlib.resources import files as import_files -except ImportError: - # Try backported to PY<37 `importlib_resources` - from importlib_resources import files as import_files -import re -import textwrap - -from google_cloud_automlops.utils.utils import ( - execute_process, - get_components_list, - make_dirs, - read_file, - read_yaml_file, - render_jinja, - is_using_kfp_spec, - write_and_chmod, - write_file, - write_yaml_file -) -from google_cloud_automlops.utils.constants import ( - BASE_DIR, - GENERATED_BUILD_COMPONENTS_SH_FILE, - GENERATED_DEFAULTS_FILE, - GENERATED_COMPONENT_BASE, - GENERATED_LICENSE, - GENERATED_PARAMETER_VALUES_PATH, - GENERATED_PIPELINE_FILE, - GENERATED_PIPELINE_REQUIREMENTS_FILE, - GENERATED_PIPELINE_RUNNER_FILE, - GENERATED_PIPELINE_SPEC_SH_FILE, - GENERATED_PUBLISH_TO_TOPIC_FILE, - GENERATED_RUN_PIPELINE_SH_FILE, - GENERATED_RUN_ALL_SH_FILE, - KFP_TEMPLATES_PATH, - PINNED_KFP_VERSION, - PIPELINE_CACHE_FILE -) -from google_cloud_automlops.orchestration.configs import KfpConfig - -def build(config: KfpConfig): - """Constructs files for running and managing Kubeflow pipelines. - - Args: - config.base_image: The image to use in the component base dockerfile. - config.custom_training_job_specs: Specifies the specs to run the training job with. - config.pipeline_params: Dictionary containing runtime pipeline parameters. - config.pubsub_topic_name: The name of the pubsub topic to publish to. - config.use_ci: Flag that determines whether to use Cloud Run CI/CD. - """ - - # Write scripts for building pipeline, building components, running pipeline, and running all files - scripts_path = import_files(KFP_TEMPLATES_PATH + '.scripts') - - # Write script for building pipeline - write_and_chmod( - GENERATED_PIPELINE_SPEC_SH_FILE, - render_jinja( - template_path=scripts_path / 'build_pipeline_spec.sh.j2', - generated_license=GENERATED_LICENSE, - base_dir=BASE_DIR)) - - # Write script for building components - write_and_chmod( - GENERATED_BUILD_COMPONENTS_SH_FILE, - render_jinja( - template_path=scripts_path / 'build_components.sh.j2', - generated_license=GENERATED_LICENSE, - base_dir=BASE_DIR)) - - # Write script for running pipeline - write_and_chmod( - GENERATED_RUN_PIPELINE_SH_FILE, - render_jinja( - template_path=scripts_path / 'run_pipeline.sh.j2', - generated_license=GENERATED_LICENSE, - base_dir=BASE_DIR)) - - # Write script for running all files - write_and_chmod( - GENERATED_RUN_ALL_SH_FILE, - render_jinja( - template_path=scripts_path / 'run_all.sh.j2', - generated_license=GENERATED_LICENSE, - base_dir=BASE_DIR)) - - # If using CI, write script for publishing to pubsub topic - if config.use_ci: - write_and_chmod( - GENERATED_PUBLISH_TO_TOPIC_FILE, - render_jinja( - template_path=scripts_path / 'publish_to_topic.sh.j2', - base_dir=BASE_DIR, - generated_license=GENERATED_LICENSE, - generated_parameter_values_path=GENERATED_PARAMETER_VALUES_PATH, - pubsub_topic_name=config.pubsub_topic_name)) - - # Create components and pipelines - components_path_list = get_components_list(full_path=True) - for path in components_path_list: - build_component(path) - build_pipeline(config.custom_training_job_specs, config.pipeline_params) - - # Write empty .gitkeep to pipeline_spec directory - write_file(f'{BASE_DIR}scripts/pipeline_spec/.gitkeep', '', 'w') - - # Write readme.md to description the contents of the directory - write_file( - f'{BASE_DIR}README.md', - render_jinja( - template_path=import_files(KFP_TEMPLATES_PATH) / 'README.md.j2', - use_ci=config.use_ci), - 'w') - - # Write dockerfile to the component base directory - write_file( - f'{GENERATED_COMPONENT_BASE}/Dockerfile', - render_jinja( - template_path=import_files(KFP_TEMPLATES_PATH + '.components.component_base') / 'Dockerfile.j2', - base_image=config.base_image, - generated_license=GENERATED_LICENSE), - 'w') - - # Write requirements.txt to the component base directory - write_file(f'{GENERATED_COMPONENT_BASE}/requirements.txt', create_component_base_requirements(), 'w') - - # Build the submission service files - if config.use_ci: - build_services() - - -def build_component(component_path: str): - """Constructs and writes component.yaml and {component_name}.py files. - component.yaml: Contains the Kubeflow custom component definition. - {component_name}.py: Contains the python code from the Jupyter cell. - - Args: - component_path: Path to the temporary component yaml. This file - is used to create the permanent component.yaml, and deleted - after calling AutoMLOps.generate(). - """ - # Retrieve defaults vars - defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) - - # Read in component specs - component_spec = read_yaml_file(component_path) - kfp_spec_bool = is_using_kfp_spec(component_spec['implementation']['container']['image']) - custom_code_contents = component_spec['implementation']['container']['command'][-1] - compspec_image = ( - f'''{defaults['gcp']['artifact_repo_location']}-docker.pkg.dev/''' - f'''{defaults['gcp']['project_id']}/''' - f'''{defaults['gcp']['artifact_repo_name']}/''' - f'''{defaults['gcp']['naming_prefix']}/''' - f'''components/component_base:latest''') - - # If using kfp, remove spaces in name and convert to lowercase - if kfp_spec_bool: - component_spec['name'] = component_spec['name'].replace(' ', '_').lower() - - # Set and create directory for component, and set directory for task - component_dir = BASE_DIR + 'components/' + component_spec['name'] - make_dirs([component_dir]) - task_filepath = (BASE_DIR - + 'components/component_base/src/' - + component_spec['name'] - + '.py') - - # Write task script to component base - write_file( - task_filepath, - render_jinja( - template_path=import_files(KFP_TEMPLATES_PATH + '.components.component_base.src') / 'task.py.j2', - custom_code_contents=custom_code_contents, - generated_license=GENERATED_LICENSE, - kfp_spec_bool=kfp_spec_bool), - 'w') - - # Update component_spec to include correct image and startup command - component_spec['implementation']['container']['image'] = compspec_image - component_spec['implementation']['container']['command'] = [ - 'python3', - f'''/pipelines/component/src/{component_spec['name']+'.py'}'''] - - # Write license and component spec to the appropriate component.yaml file - filename = component_dir + '/component.yaml' - write_file(filename, GENERATED_LICENSE, 'w') - write_yaml_file(filename, component_spec, 'a') - - -def build_pipeline(custom_training_job_specs: list, - pipeline_parameter_values: dict): - """Constructs and writes pipeline.py, pipeline_runner.py, and pipeline_parameter_values.json files. - pipeline.py: Generates a Kubeflow pipeline spec from custom components. - pipeline_runner.py: Sends a PipelineJob to Vertex AI using pipeline spec. - pipeline_parameter_values.json: Provides runtime parameters for the PipelineJob. - - Args: - custom_training_job_specs: Specifies the specs to run the training job with. - pipeline_parameter_values: Dictionary of runtime parameters for the PipelineJob. - """ - defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) - - # Get the names of the components - components_list = get_components_list(full_path=False) - - # Read pipeline definition - pipeline_scaffold_contents = read_file(PIPELINE_CACHE_FILE) - - # Add indentation - pipeline_scaffold_contents = textwrap.indent(pipeline_scaffold_contents, 4 * ' ') - - # Construct pipeline.py - project_id = defaults['gcp']['project_id'] - write_file( - GENERATED_PIPELINE_FILE, - render_jinja( - template_path=import_files(KFP_TEMPLATES_PATH + '.pipelines') / 'pipeline.py.j2', - components_list=components_list, - custom_training_job_specs=custom_training_job_specs, - generated_license=GENERATED_LICENSE, - pipeline_scaffold_contents=pipeline_scaffold_contents, - project_id=project_id), - 'w') - - # Construct pipeline_runner.py - write_file( - GENERATED_PIPELINE_RUNNER_FILE, - render_jinja( - template_path=import_files(KFP_TEMPLATES_PATH + '.pipelines') / 'pipeline_runner.py.j2', - generated_license=GENERATED_LICENSE), - 'w') - - # Construct requirements.txt - write_file( - GENERATED_PIPELINE_REQUIREMENTS_FILE, - render_jinja( - template_path=import_files(KFP_TEMPLATES_PATH + '.pipelines') / 'requirements.txt.j2', - pinned_kfp_version=PINNED_KFP_VERSION), - 'w') - - # Add pipeline_spec_path to dict - pipeline_parameter_values['gs_pipeline_spec_path'] = defaults['pipelines']['gs_pipeline_job_spec_path'] - - # Construct pipeline_parameter_values.json - serialized_params = json.dumps(pipeline_parameter_values, indent=4) - write_file(BASE_DIR + GENERATED_PARAMETER_VALUES_PATH, serialized_params, 'w') - - -def build_services(): - """Constructs and writes a Dockerfile, requirements.txt, and - main.py to the services/submission_service directory. - """ - # Retrieve defaults vars - defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) - - # Set new folders as variables - submission_service_base = BASE_DIR + 'services/submission_service' - - # Write cloud run dockerfile - write_file( - f'{submission_service_base}/Dockerfile', - render_jinja( - template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'Dockerfile.j2', - base_dir=BASE_DIR, - generated_license=GENERATED_LICENSE), - 'w') - - # Write requirements files for cloud run base and queueing svc - write_file( - f'{submission_service_base}/requirements.txt', - render_jinja( - template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'requirements.txt.j2', - pinned_kfp_version=PINNED_KFP_VERSION, - pipeline_job_submission_service_type=defaults['gcp']['pipeline_job_submission_service_type']), - 'w') - - # Write main code files for cloud run base and queueing svc - write_file( - f'{submission_service_base}/main.py', - render_jinja( - template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'main.py.j2', - generated_license=GENERATED_LICENSE, - pipeline_root=defaults['pipelines']['pipeline_storage_path'], - pipeline_job_runner_service_account=defaults['gcp']['pipeline_job_runner_service_account'], - pipeline_job_submission_service_type=defaults['gcp']['pipeline_job_submission_service_type'], - project_id=defaults['gcp']['project_id']), - 'w') - - -def create_component_base_requirements(): - """Writes a requirements.txt to the component_base directory. - Infers pip requirements from the python srcfiles using - pipreqs. Takes user-inputted requirements, and addes some - default gcp packages as well as packages that are often missing - in setup.py files (e.g db_types, pyarrow, gcsfs, fsspec). - """ - reqs_filename = f'{GENERATED_COMPONENT_BASE}/requirements.txt' - default_gcp_reqs = [ - 'google-cloud-aiplatform', - 'google-cloud-appengine-logging', - 'google-cloud-audit-log', - 'google-cloud-bigquery', - 'google-cloud-bigquery-storage', - 'google-cloud-bigtable', - 'google-cloud-core', - 'google-cloud-dataproc', - 'google-cloud-datastore', - 'google-cloud-dlp', - 'google-cloud-firestore', - 'google-cloud-kms', - 'google-cloud-language', - 'google-cloud-logging', - 'google-cloud-monitoring', - 'google-cloud-notebooks', - 'google-cloud-pipeline-components', - 'google-cloud-pubsub', - 'google-cloud-pubsublite', - 'google-cloud-recommendations-ai', - 'google-cloud-resource-manager', - 'google-cloud-scheduler', - 'google-cloud-spanner', - 'google-cloud-speech', - 'google-cloud-storage', - 'google-cloud-tasks', - 'google-cloud-translate', - 'google-cloud-videointelligence', - 'google-cloud-vision', - 'db_dtypes', - 'pyarrow', - 'gcsfs', - 'fsspec'] - # Get user-inputted requirements from the cache dir - user_inp_reqs = [] - components_path_list = get_components_list() - for component_path in components_path_list: - component_spec = read_yaml_file(component_path) - reqs = component_spec['implementation']['container']['command'][2] - formatted_reqs = re.findall('\'([^\']*)\'', reqs) - user_inp_reqs.extend(formatted_reqs) - # Check if user inputted requirements - if user_inp_reqs: - # Remove duplicates - set_of_requirements = set(user_inp_reqs) - else: - # If user did not input requirements, then infer reqs using pipreqs - execute_process(f'python3 -m pipreqs.pipreqs {GENERATED_COMPONENT_BASE} --mode no-pin --force', to_null=True) - pipreqs = read_file(reqs_filename).splitlines() - set_of_requirements = set(pipreqs + default_gcp_reqs) - # Remove empty string - if '' in set_of_requirements: - set_of_requirements.remove('') - # Pin kfp version - if 'kfp' in set_of_requirements: - set_of_requirements.remove('kfp') - set_of_requirements.add(PINNED_KFP_VERSION) - # Stringify and sort - reqs_str = ''.join(r+'\n' for r in sorted(set_of_requirements)) - return reqs_str diff --git a/google_cloud_automlops/orchestration/kfp/scaffold.py b/google_cloud_automlops/orchestration/kfp/scaffold.py deleted file mode 100644 index cf2b7a7..0000000 --- a/google_cloud_automlops/orchestration/kfp/scaffold.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Builds temporary component scaffold yaml files.""" - -# pylint: disable=anomalous-backslash-in-string -# pylint: disable=C0103 -# pylint: disable=line-too-long - -import inspect -from typing import Callable, List, Optional, TypeVar, Union - -import docstring_parser - -from google_cloud_automlops.utils.constants import ( - DEFAULT_PIPELINE_NAME, - PLACEHOLDER_IMAGE, - PIPELINE_CACHE_FILE, - CACHE_DIR -) -from google_cloud_automlops.utils.utils import ( - get_function_source_definition, - make_dirs, - update_params, - write_file, - write_yaml_file -) - -T = TypeVar('T') - - -def create_component_scaffold(func: Optional[Callable] = None, - *, - packages_to_install: Optional[List[str]] = None): - """Creates a tmp component scaffold which will be used by the formalize function. - Code is temporarily stored in component_spec['implementation']['container']['command']. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - packages_to_install: A list of optional packages to install before - executing func. These will always be installed at component runtime. - """ - # Extract name, docstring, and component description - name = func.__name__ - parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) - description = parsed_docstring.short_description - - # Instantiate component yaml attributes - component_spec = {} - component_spec['name'] = name - if description: - component_spec['description'] = description - outputs = get_function_return_types(func) - if outputs: - component_spec['outputs'] = outputs - component_spec['inputs'] = get_function_parameters(func) - component_spec['implementation'] = {} - component_spec['implementation']['container'] = {} - component_spec['implementation']['container']['image'] = PLACEHOLDER_IMAGE - component_spec['implementation']['container']['command'] = get_packages_to_install_command(func, packages_to_install) - component_spec['implementation']['container']['args'] = ['--executor_input', - {'executorInput': None}, - '--function_to_execute', - name] - # Write component yaml - filename = CACHE_DIR + f'/{name}.yaml' - make_dirs([CACHE_DIR]) - write_yaml_file(filename, component_spec, 'w') - - -def get_packages_to_install_command(func: Optional[Callable] = None, - packages_to_install: Optional[List[str]] = None): - """Returns a list of formatted list of commands, including code for tmp storage. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - packages_to_install: A list of optional packages to install before - executing func. These will always be installed at component runtime. - """ - newline = '\n' - if not packages_to_install: - packages_to_install = [] - concat_package_list = ' '.join([repr(str(package)) for package in packages_to_install]) - install_python_packages_script = ( - f'''if ! [ -x "$(command -v pip)" ]; then{newline}''' - f''' python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip{newline}''' - f'''fi{newline}''' - f'''PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \{newline}''' - f''' --no-warn-script-location {concat_package_list} && "$0" "$@"{newline}''' - f'''{newline}''') - src_code = get_function_source_definition(func) - return ['sh', '-c', install_python_packages_script, src_code] - - -def get_function_return_types(func: Callable) -> list: - """Returns a formatted list of function return types. - - Args: - func: The python function to create a component from. The function - can optionally have type annotations for its return values. - Returns: - list: return value list with types converted to kubeflow spec. - Raises: - Exception: If return type is provided and not a NamedTuple. - """ - annotation = inspect.signature(func).return_annotation - if maybe_strip_optional_from_annotation(annotation) is not annotation: - raise TypeError('Return type cannot be Optional.') - - # No annotations provided - # pylint: disable=protected-access - if annotation == inspect._empty: - return None - - if not (hasattr(annotation,'__annotations__') and isinstance(annotation.__annotations__, dict)): - raise TypeError(f'''Return type hint for function "{func.__name__}" must be a NamedTuple.''') - - outputs = [] - for name, type_ in annotation.__annotations__.items(): - metadata = {} - metadata['name'] = name - metadata['type'] = type_ - metadata['description'] = None - outputs.append(metadata) - return update_params(outputs) - - -def get_function_parameters(func: Callable) -> list: - """Returns a formatted list of parameters. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - Returns: - list: Params list with types converted to kubeflow spec. - Raises: - Exception: If parameter type hints are not provided. - """ - signature = inspect.signature(func) - parameters = list(signature.parameters.values()) - parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) - doc_dict = {p.arg_name: p.description for p in parsed_docstring.params} - - # Extract parameter metadata - parameter_holder = [] - for param in parameters: - metadata = {} - metadata['name'] = param.name - metadata['description'] = doc_dict.get(param.name) - metadata['type'] = maybe_strip_optional_from_annotation( - param.annotation) - parameter_holder.append(metadata) - # pylint: disable=protected-access - if metadata['type'] == inspect._empty: - raise TypeError( - f'''Missing type hint for parameter "{metadata['name']}". ''' - f'''Please specify the type for this parameter.''') - return update_params(parameter_holder) - - -def maybe_strip_optional_from_annotation(annotation: T) -> T: - """Strips 'Optional' from 'Optional[]' if applicable. - For example:: - Optional[str] -> str - str -> str - List[int] -> List[int] - Args: - annotation: The original type annotation which may or may not has `Optional`. - Returns: - The type inside Optional[] if Optional exists, otherwise the original type. - """ - if getattr(annotation, '__origin__', None) is Union and annotation.__args__[1] is type(None): - return annotation.__args__[0] - else: - return annotation - - -def create_pipeline_scaffold(func: Optional[Callable] = None, - *, - name: Optional[str] = None, - description: Optional[str] = None): - """Creates a temporary pipeline scaffold which will - be used by the formalize function. - - Args: - func: The python function to create a pipeline from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - name: The name of the pipeline. - description: Short description of what the pipeline does. - """ - pipeline_scaffold = (get_pipeline_decorator(name, description) + - get_function_source_definition(func) + - get_compile_step(func.__name__)) - make_dirs([CACHE_DIR]) # if it doesn't already exist - write_file(PIPELINE_CACHE_FILE, pipeline_scaffold, 'w') - - -def get_pipeline_decorator(name: Optional[str] = None, - description: Optional[str] = None): - """Creates the kfp pipeline decorator. - - Args: - name: The name of the pipeline. - description: Short description of what the pipeline does. - - Returns: - str: Python compile function call. - """ - default_name = DEFAULT_PIPELINE_NAME if not name else name - name_str = f'''(\n name='{default_name}',\n''' - desc_str = f''' description='{description}',\n''' if description else '' - ending_str = ')\n' - return '@dsl.pipeline' + name_str + desc_str + ending_str - - -def get_compile_step(func_name: str): - """Creates the compile function call. - - Args: - func_name: The name of the pipeline function. - - Returns: - str: Python compile function call. - """ - return ( - f'\n' - f'compiler.Compiler().compile(\n' - f' pipeline_func={func_name},\n' - f' package_path=pipeline_job_spec_path)\n' - f'\n' - ) - diff --git a/google_cloud_automlops/orchestration/scaffold.py b/google_cloud_automlops/orchestration/scaffold.py deleted file mode 100644 index cf2b7a7..0000000 --- a/google_cloud_automlops/orchestration/scaffold.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Builds temporary component scaffold yaml files.""" - -# pylint: disable=anomalous-backslash-in-string -# pylint: disable=C0103 -# pylint: disable=line-too-long - -import inspect -from typing import Callable, List, Optional, TypeVar, Union - -import docstring_parser - -from google_cloud_automlops.utils.constants import ( - DEFAULT_PIPELINE_NAME, - PLACEHOLDER_IMAGE, - PIPELINE_CACHE_FILE, - CACHE_DIR -) -from google_cloud_automlops.utils.utils import ( - get_function_source_definition, - make_dirs, - update_params, - write_file, - write_yaml_file -) - -T = TypeVar('T') - - -def create_component_scaffold(func: Optional[Callable] = None, - *, - packages_to_install: Optional[List[str]] = None): - """Creates a tmp component scaffold which will be used by the formalize function. - Code is temporarily stored in component_spec['implementation']['container']['command']. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - packages_to_install: A list of optional packages to install before - executing func. These will always be installed at component runtime. - """ - # Extract name, docstring, and component description - name = func.__name__ - parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) - description = parsed_docstring.short_description - - # Instantiate component yaml attributes - component_spec = {} - component_spec['name'] = name - if description: - component_spec['description'] = description - outputs = get_function_return_types(func) - if outputs: - component_spec['outputs'] = outputs - component_spec['inputs'] = get_function_parameters(func) - component_spec['implementation'] = {} - component_spec['implementation']['container'] = {} - component_spec['implementation']['container']['image'] = PLACEHOLDER_IMAGE - component_spec['implementation']['container']['command'] = get_packages_to_install_command(func, packages_to_install) - component_spec['implementation']['container']['args'] = ['--executor_input', - {'executorInput': None}, - '--function_to_execute', - name] - # Write component yaml - filename = CACHE_DIR + f'/{name}.yaml' - make_dirs([CACHE_DIR]) - write_yaml_file(filename, component_spec, 'w') - - -def get_packages_to_install_command(func: Optional[Callable] = None, - packages_to_install: Optional[List[str]] = None): - """Returns a list of formatted list of commands, including code for tmp storage. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - packages_to_install: A list of optional packages to install before - executing func. These will always be installed at component runtime. - """ - newline = '\n' - if not packages_to_install: - packages_to_install = [] - concat_package_list = ' '.join([repr(str(package)) for package in packages_to_install]) - install_python_packages_script = ( - f'''if ! [ -x "$(command -v pip)" ]; then{newline}''' - f''' python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip{newline}''' - f'''fi{newline}''' - f'''PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \{newline}''' - f''' --no-warn-script-location {concat_package_list} && "$0" "$@"{newline}''' - f'''{newline}''') - src_code = get_function_source_definition(func) - return ['sh', '-c', install_python_packages_script, src_code] - - -def get_function_return_types(func: Callable) -> list: - """Returns a formatted list of function return types. - - Args: - func: The python function to create a component from. The function - can optionally have type annotations for its return values. - Returns: - list: return value list with types converted to kubeflow spec. - Raises: - Exception: If return type is provided and not a NamedTuple. - """ - annotation = inspect.signature(func).return_annotation - if maybe_strip_optional_from_annotation(annotation) is not annotation: - raise TypeError('Return type cannot be Optional.') - - # No annotations provided - # pylint: disable=protected-access - if annotation == inspect._empty: - return None - - if not (hasattr(annotation,'__annotations__') and isinstance(annotation.__annotations__, dict)): - raise TypeError(f'''Return type hint for function "{func.__name__}" must be a NamedTuple.''') - - outputs = [] - for name, type_ in annotation.__annotations__.items(): - metadata = {} - metadata['name'] = name - metadata['type'] = type_ - metadata['description'] = None - outputs.append(metadata) - return update_params(outputs) - - -def get_function_parameters(func: Callable) -> list: - """Returns a formatted list of parameters. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - Returns: - list: Params list with types converted to kubeflow spec. - Raises: - Exception: If parameter type hints are not provided. - """ - signature = inspect.signature(func) - parameters = list(signature.parameters.values()) - parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) - doc_dict = {p.arg_name: p.description for p in parsed_docstring.params} - - # Extract parameter metadata - parameter_holder = [] - for param in parameters: - metadata = {} - metadata['name'] = param.name - metadata['description'] = doc_dict.get(param.name) - metadata['type'] = maybe_strip_optional_from_annotation( - param.annotation) - parameter_holder.append(metadata) - # pylint: disable=protected-access - if metadata['type'] == inspect._empty: - raise TypeError( - f'''Missing type hint for parameter "{metadata['name']}". ''' - f'''Please specify the type for this parameter.''') - return update_params(parameter_holder) - - -def maybe_strip_optional_from_annotation(annotation: T) -> T: - """Strips 'Optional' from 'Optional[]' if applicable. - For example:: - Optional[str] -> str - str -> str - List[int] -> List[int] - Args: - annotation: The original type annotation which may or may not has `Optional`. - Returns: - The type inside Optional[] if Optional exists, otherwise the original type. - """ - if getattr(annotation, '__origin__', None) is Union and annotation.__args__[1] is type(None): - return annotation.__args__[0] - else: - return annotation - - -def create_pipeline_scaffold(func: Optional[Callable] = None, - *, - name: Optional[str] = None, - description: Optional[str] = None): - """Creates a temporary pipeline scaffold which will - be used by the formalize function. - - Args: - func: The python function to create a pipeline from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - name: The name of the pipeline. - description: Short description of what the pipeline does. - """ - pipeline_scaffold = (get_pipeline_decorator(name, description) + - get_function_source_definition(func) + - get_compile_step(func.__name__)) - make_dirs([CACHE_DIR]) # if it doesn't already exist - write_file(PIPELINE_CACHE_FILE, pipeline_scaffold, 'w') - - -def get_pipeline_decorator(name: Optional[str] = None, - description: Optional[str] = None): - """Creates the kfp pipeline decorator. - - Args: - name: The name of the pipeline. - description: Short description of what the pipeline does. - - Returns: - str: Python compile function call. - """ - default_name = DEFAULT_PIPELINE_NAME if not name else name - name_str = f'''(\n name='{default_name}',\n''' - desc_str = f''' description='{description}',\n''' if description else '' - ending_str = ')\n' - return '@dsl.pipeline' + name_str + desc_str + ending_str - - -def get_compile_step(func_name: str): - """Creates the compile function call. - - Args: - func_name: The name of the pipeline function. - - Returns: - str: Python compile function call. - """ - return ( - f'\n' - f'compiler.Compiler().compile(\n' - f' pipeline_func={func_name},\n' - f' package_path=pipeline_job_spec_path)\n' - f'\n' - ) - From a568f310f6c50bb5e96f1129e86c4269d2eca904 Mon Sep 17 00:00:00 2001 From: Allegra Noto Date: Mon, 26 Feb 2024 13:14:24 -0500 Subject: [PATCH 06/11] Removed old imports --- google_cloud_automlops/AutoMLOps.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/google_cloud_automlops/AutoMLOps.py b/google_cloud_automlops/AutoMLOps.py index 08db000..3ccac0e 100644 --- a/google_cloud_automlops/AutoMLOps.py +++ b/google_cloud_automlops/AutoMLOps.py @@ -58,15 +58,10 @@ write_file ) # Orchestration imports -from google_cloud_automlops.orchestration.kfp import builder as KfpBuilder -from google_cloud_automlops.orchestration.kfp import scaffold as KfpScaffold from google_cloud_automlops.orchestration.enums import ( Orchestrator, PipelineJobSubmitter ) -from google_cloud_automlops.orchestration.configs import ( - KfpConfig -) from google_cloud_automlops.orchestration.Component import Component from google_cloud_automlops.orchestration.Pipeline import Pipeline From 54cc7be275b08b22f01fb88e9c2ce779178aaad2 Mon Sep 17 00:00:00 2001 From: Allegra Noto Date: Tue, 12 Mar 2024 16:39:17 -0400 Subject: [PATCH 07/11] Merged changes into automlops.py --- google_cloud_automlops/AutoMLOps.py | 46 ++++++++++++------- google_cloud_automlops/orchestration/enums.py | 1 - google_cloud_automlops/utils/utils.py | 13 ++++++ 3 files changed, 43 insertions(+), 17 deletions(-) diff --git a/google_cloud_automlops/AutoMLOps.py b/google_cloud_automlops/AutoMLOps.py index 3ccac0e..bb36e96 100644 --- a/google_cloud_automlops/AutoMLOps.py +++ b/google_cloud_automlops/AutoMLOps.py @@ -18,6 +18,8 @@ # pylint: disable=C0103 # pylint: disable=line-too-long # pylint: disable=unused-import +# pylint: disable=logging-fstring-interpolation +# pylint: disable=global-at-module-level import functools import logging @@ -47,6 +49,7 @@ from google_cloud_automlops.utils.utils import ( account_permissions_warning, check_installation_versions, + coalesce, create_default_config, execute_process, make_dirs, @@ -279,8 +282,10 @@ def generate( raise ValueError(f'Unsupported deployment framework: {deployment_framework}') logging.info(f'Writing directories under {BASE_DIR}') + # Make standard directories make_dirs(GENERATED_DIRS) + # Make optional directories if use_ci: make_dirs(GENERATED_SERVICES_DIRS) @@ -290,15 +295,15 @@ def generate( make_dirs(GENERATED_GITHUB_DIRS) # Set derived vars if none were given for certain variables - derived_artifact_repo_name = f'{naming_prefix}-artifact-registry' if artifact_repo_name is None else artifact_repo_name - derived_build_trigger_name = f'{naming_prefix}-build-trigger' if build_trigger_name is None else build_trigger_name - derived_custom_training_job_specs = stringify_job_spec_list(custom_training_job_specs) if custom_training_job_specs is not None else custom_training_job_specs - derived_pipeline_job_runner_service_account = f'vertex-pipelines@{project_id}.iam.gserviceaccount.com' if pipeline_job_runner_service_account is None else pipeline_job_runner_service_account - derived_pipeline_job_submission_service_name = f'{naming_prefix}-job-submission-svc' if pipeline_job_submission_service_name is None else pipeline_job_submission_service_name - derived_pubsub_topic_name = f'{naming_prefix}-queueing-svc' if pubsub_topic_name is None else pubsub_topic_name - derived_schedule_name = f'{naming_prefix}-schedule' if schedule_name is None else schedule_name - derived_source_repo_name = f'{naming_prefix}-repository' if source_repo_name is None else source_repo_name - derived_storage_bucket_name = f'{project_id}-{naming_prefix}-bucket' if storage_bucket_name is None else storage_bucket_name + derived_artifact_repo_name = coalesce(artifact_repo_name, f'{naming_prefix}-artifact-registry') + derived_build_trigger_name = coalesce(build_trigger_name, f'{naming_prefix}-build-trigger') + derived_custom_training_job_specs = stringify_job_spec_list(custom_training_job_specs) + derived_pipeline_job_runner_service_account = coalesce(pipeline_job_runner_service_account, f'vertex-pipelines@{project_id}.iam.gserviceaccount.com') + derived_pipeline_job_submission_service_name = coalesce(pipeline_job_submission_service_name, f'{naming_prefix}-job-submission-svc') + derived_pubsub_topic_name = coalesce(pubsub_topic_name, f'{naming_prefix}-queueing-svc') + derived_schedule_name = coalesce(schedule_name, f'{naming_prefix}-schedule') + derived_source_repo_name = coalesce(source_repo_name, f'{naming_prefix}-repository') + derived_storage_bucket_name = coalesce(storage_bucket_name, f'{project_id}-{naming_prefix}-bucket') # Write defaults.yaml defaults = create_default_config( @@ -333,12 +338,13 @@ def generate( # Generate files required to run a Kubeflow pipeline if orchestration_framework == Orchestrator.KFP.value: + + # Log what files will be created logging.info(f'Writing README.md to {BASE_DIR}README.md') - logging.info(f'Writing kubeflow pipelines code to {BASE_DIR}pipelines, {BASE_DIR}components') logging.info(f'Writing scripts to {BASE_DIR}scripts') - if use_ci: - logging.info(f'Writing submission service code to {BASE_DIR}services') - logging.info("Writing pipleine code.") + + # Write kubeflow pipeline code + logging.info(f'Writing kubeflow pipelines code to {BASE_DIR}pipelines') kfppipe = KFPPipeline(func=pipeline_glob.func, name=pipeline_glob.name, description=pipeline_glob.description, @@ -348,10 +354,18 @@ def generate( pipeline_params, pubsub_topic_name, use_ci) + + # Write kubeflow components code + logging.info(f'Writing kubeflow components code to {BASE_DIR}components') for comp in kfppipe.comps: - logging.info(f"Writing code for component {comp.name}") + logging.info(f' -- Writing {comp.name}') KFPComponent(func=comp.func, packages_to_install=comp.packages_to_install).build() + # If user specified services, write services scripts + if use_ci: + logging.info(f'Writing submission service code to {BASE_DIR}services') + KFPServices().build() + # Generate files required to provision resources if provisioning_framework == Provisioner.GCLOUD.value: logging.info(f'Writing gcloud provisioning code to {BASE_DIR}provision') @@ -524,7 +538,7 @@ def deploy( # Log generated resources resources_generation_manifest(defaults) -# TODO: Replace with component object creation + def component(func: Optional[Callable] = None, *, packages_to_install: Optional[List[str]] = None): @@ -554,7 +568,7 @@ def my_function_one(input: str, output: Output[Model]): ) return -# TODO: Replace with pipeline object creation + def pipeline(func: Optional[Callable] = None, *, name: Optional[str] = None, diff --git a/google_cloud_automlops/orchestration/enums.py b/google_cloud_automlops/orchestration/enums.py index 7894af0..f63af5d 100644 --- a/google_cloud_automlops/orchestration/enums.py +++ b/google_cloud_automlops/orchestration/enums.py @@ -19,7 +19,6 @@ from enum import Enum - class Orchestrator(Enum): """Enum representing the available options for orchestration management.""" diff --git a/google_cloud_automlops/utils/utils.py b/google_cloud_automlops/utils/utils.py index a4ac109..99d0868 100644 --- a/google_cloud_automlops/utils/utils.py +++ b/google_cloud_automlops/utils/utils.py @@ -323,6 +323,8 @@ def stringify_job_spec_list(job_spec_list: list) -> list: Returns: list[str]: Python formatted dictionary code. """ + if not job_spec_list: + return None output = [] for spec in job_spec_list: mapping = {} @@ -956,3 +958,14 @@ def render_jinja(template_path, **template_vars): with open(template_path, 'r', encoding='utf-8') as f: template = Template(f.read()) return template.render(**template_vars) + +def coalesce(*arg): + """Returns the first non-None value from a sequence of arguments. + + Returns: + The first non-None argument, or None if all arguments are None. + """ + for el in arg: + if el is not None: + return el + return None From e6460910f5e969ed3588f56c947a15071b9c8491 Mon Sep 17 00:00:00 2001 From: Allegra Noto Date: Sat, 30 Mar 2024 20:46:03 -0400 Subject: [PATCH 08/11] Restructured folder structure --- google_cloud_automlops/AutoMLOps.py | 15 +- .../orchestration/Component.py | 169 --------- .../orchestration/Pipeline.py | 141 ------- .../orchestration/Services.py | 81 ---- .../orchestration/airflow/.gitkeep | 0 .../orchestration/argo/.gitkeep | 0 google_cloud_automlops/orchestration/base.py | 354 ++++++++++++++++++ google_cloud_automlops/orchestration/enums.py | 35 -- .../{kfp/KFPPipeline.py => kfp.py} | 242 +++++++++++- .../orchestration/kfp/KFPComponent.py | 210 ----------- .../orchestration/kfp/KFPServices.py | 98 ----- .../orchestration/ray/.gitkeep | 0 .../{kfp => templates}/__init__.py | 0 .../templates => templates/kfp}/README.md.j2 | 0 .../templates => templates/kfp}/__init__.py | 0 .../kfp}/components/__init__.py | 0 .../components/component_base/Dockerfile.j2 | 0 .../components/component_base/__init__.py | 0 .../components/component_base/src/__init__.py | 0 .../components/component_base/src/task.py.j2 | 0 .../kfp}/pipelines/__init__.py | 0 .../kfp}/pipelines/pipeline.py.j2 | 0 .../kfp}/pipelines/pipeline_runner.py.j2 | 0 .../kfp}/pipelines/requirements.txt.j2 | 0 .../kfp}/scripts/__init__.py | 0 .../kfp}/scripts/build_components.sh.j2 | 0 .../kfp}/scripts/build_pipeline_spec.sh.j2 | 0 .../kfp}/scripts/publish_to_topic.sh.j2 | 0 .../kfp}/scripts/run_all.sh.j2 | 0 .../kfp}/scripts/run_pipeline.sh.j2 | 0 .../kfp}/services/__init__.py | 0 .../services/submission_service/Dockerfile.j2 | 0 .../services/submission_service/__init__.py | 0 .../services/submission_service/main.py.j2 | 0 .../submission_service/requirements.txt.j2 | 0 .../orchestration/tfx/.gitkeep | 0 google_cloud_automlops/utils/constants.py | 2 +- 37 files changed, 596 insertions(+), 751 deletions(-) delete mode 100644 google_cloud_automlops/orchestration/Component.py delete mode 100644 google_cloud_automlops/orchestration/Pipeline.py delete mode 100644 google_cloud_automlops/orchestration/Services.py delete mode 100644 google_cloud_automlops/orchestration/airflow/.gitkeep delete mode 100644 google_cloud_automlops/orchestration/argo/.gitkeep create mode 100644 google_cloud_automlops/orchestration/base.py delete mode 100644 google_cloud_automlops/orchestration/enums.py rename google_cloud_automlops/orchestration/{kfp/KFPPipeline.py => kfp.py} (56%) delete mode 100644 google_cloud_automlops/orchestration/kfp/KFPComponent.py delete mode 100644 google_cloud_automlops/orchestration/kfp/KFPServices.py delete mode 100644 google_cloud_automlops/orchestration/ray/.gitkeep rename google_cloud_automlops/orchestration/{kfp => templates}/__init__.py (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/README.md.j2 (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/__init__.py (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/components/__init__.py (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/components/component_base/Dockerfile.j2 (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/components/component_base/__init__.py (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/components/component_base/src/__init__.py (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/components/component_base/src/task.py.j2 (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/pipelines/__init__.py (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/pipelines/pipeline.py.j2 (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/pipelines/pipeline_runner.py.j2 (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/pipelines/requirements.txt.j2 (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/scripts/__init__.py (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/scripts/build_components.sh.j2 (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/scripts/build_pipeline_spec.sh.j2 (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/scripts/publish_to_topic.sh.j2 (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/scripts/run_all.sh.j2 (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/scripts/run_pipeline.sh.j2 (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/services/__init__.py (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/services/submission_service/Dockerfile.j2 (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/services/submission_service/__init__.py (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/services/submission_service/main.py.j2 (100%) rename google_cloud_automlops/orchestration/{kfp/templates => templates/kfp}/services/submission_service/requirements.txt.j2 (100%) delete mode 100644 google_cloud_automlops/orchestration/tfx/.gitkeep diff --git a/google_cloud_automlops/AutoMLOps.py b/google_cloud_automlops/AutoMLOps.py index bb36e96..e9a93bd 100644 --- a/google_cloud_automlops/AutoMLOps.py +++ b/google_cloud_automlops/AutoMLOps.py @@ -61,17 +61,12 @@ write_file ) # Orchestration imports -from google_cloud_automlops.orchestration.enums import ( +from google_cloud_automlops.orchestration.base import ( Orchestrator, PipelineJobSubmitter ) - -from google_cloud_automlops.orchestration.Component import Component -from google_cloud_automlops.orchestration.Pipeline import Pipeline -from google_cloud_automlops.orchestration.Services import Services -from google_cloud_automlops.orchestration.kfp.KFPComponent import KFPComponent -from google_cloud_automlops.orchestration.kfp.KFPPipeline import KFPPipeline -from google_cloud_automlops.orchestration.kfp.KFPServices import KFPServices +from google_cloud_automlops.orchestration.base import BaseComponent, BasePipeline, BaseServices +from google_cloud_automlops.orchestration.kfp import KFPComponent, KFPPipeline, KFPServices # Provisioning imports from google_cloud_automlops.provisioning.pulumi import builder as PulumiBuilder @@ -562,7 +557,7 @@ def my_function_one(input: str, output: Output[Model]): component, packages_to_install=packages_to_install) else: - components_dict[func.__name__] = Component( + components_dict[func.__name__] = BaseComponent( func=func, packages_to_install=packages_to_install ) @@ -603,7 +598,7 @@ def pipeline(bq_table: str, description=description) else: global pipeline_glob - pipeline_glob = Pipeline(func=func, + pipeline_glob = BasePipeline(func=func, name=name, description=description, comps_dict=components_dict) diff --git a/google_cloud_automlops/orchestration/Component.py b/google_cloud_automlops/orchestration/Component.py deleted file mode 100644 index 123f479..0000000 --- a/google_cloud_automlops/orchestration/Component.py +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Creates a generic component object.""" - -# pylint: disable=anomalous-backslash-in-string -# pylint: disable=C0103 -# pylint: disable=line-too-long - -import docstring_parser -import inspect -from typing import Callable, List, Optional, TypeVar, Union - -from google_cloud_automlops.utils.constants import GENERATED_DEFAULTS_FILE -from google_cloud_automlops.utils.utils import ( - get_function_source_definition, - read_yaml_file -) - -T = TypeVar('T') - - -class Component(): - """The Component object represents a component defined by the user. - - Args: - ABC: Abstract class - """ - - def __init__(self, - func: Optional[Callable] = None, - packages_to_install: Optional[List[str]] = None): - """Initiates a generic Component object created out of a function holding - all necessary code. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - packages_to_install: A list of optional packages to install before - executing func. These will always be installed at component runtime. - - Raises: - ValueError: Confirms that the input is an existing function. - """ - - # Confirm the input is an existing function - if not inspect.isfunction(func): - raise ValueError(f"{func} must be of type function.") - - # Set simple attributes of the component function - self.func = func - self.name = func.__name__ - self.packages_to_install = [] if not packages_to_install else packages_to_install - - # Parse the docstring for description - self.parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) - self.description = self.parsed_docstring.short_description - - # Process and extract details from passed function - self.parameters = self._get_function_parameters() - self.return_types = self._get_function_return_types() - self.src_code = get_function_source_definition(self.func) - - # Instantiate attributes to be set during build - self.artifact_repo_location = None - self.artifact_repo_name = None - self.project_id = None - self.naming_prefix = None - - def build(self): - """Instantiates an abstract built method to create and write task files. Also - reads in defaults file to save default arguments to attributes. - """ - defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) - self.artifact_repo_location = defaults['gcp']['artifact_repo_location'] - self.artifact_repo_name = defaults['gcp']['artifact_repo_name'] - self.project_id = defaults['gcp']['project_id'] - self.naming_prefix = defaults['gcp']['naming_prefix'] - raise NotImplementedError("Subclass needs to define this.") - - def _get_function_return_types(self) -> list: - """Returns a formatted list of function return types. - - Returns: - list: return value list with types converted to kubeflow spec. - Raises: - Exception: If return type is provided and not a NamedTuple. - """ - # TODO: COMMENT - annotation = inspect.signature(self.func).return_annotation - if maybe_strip_optional_from_annotation(annotation) is not annotation: - raise TypeError('Return type cannot be Optional.') - - # No annotations provided - # pylint: disable=protected-access - if annotation == inspect._empty: - return None - - if not (hasattr(annotation,'__annotations__') and isinstance(annotation.__annotations__, dict)): - raise TypeError(f'''Return type hint for function "{self.name}" must be a NamedTuple.''') - - # TODO: COMMENT - outputs = [] - for name, type_ in annotation.__annotations__.items(): - metadata = {} - metadata['name'] = name - metadata['type'] = type_ - metadata['description'] = None - outputs.append(metadata) - return outputs - - def _get_function_parameters(self) -> list: - """Returns a formatted list of parameters. - - Returns: - list: Params list with types converted to kubeflow spec. - Raises: - Exception: If parameter type hints are not provided. - """ - #TODO: COMMENT? - signature = inspect.signature(self.func) - parameters = list(signature.parameters.values()) - parsed_docstring = docstring_parser.parse(inspect.getdoc(self.func)) - doc_dict = {p.arg_name: p.description for p in parsed_docstring.params} - - # Extract parameter metadata - parameter_holder = [] - for param in parameters: - metadata = {} - metadata['name'] = param.name - metadata['description'] = doc_dict.get(param.name) - metadata['type'] = maybe_strip_optional_from_annotation( - param.annotation) - parameter_holder.append(metadata) - # pylint: disable=protected-access - if metadata['type'] == inspect._empty: - raise TypeError( - f'''Missing type hint for parameter "{metadata['name']}". ''' - f'''Please specify the type for this parameter.''') - return parameter_holder - -def maybe_strip_optional_from_annotation(annotation: T) -> T: - """Strips 'Optional' from 'Optional[]' if applicable. - For example:: - Optional[str] -> str - str -> str - List[int] -> List[int] - Args: - annotation: The original type annotation which may or may not has `Optional`. - Returns: - The type inside Optional[] if Optional exists, otherwise the original type. - """ - if getattr(annotation, '__origin__', None) is Union and annotation.__args__[1] is type(None): - return annotation.__args__[0] - else: - return annotation diff --git a/google_cloud_automlops/orchestration/Pipeline.py b/google_cloud_automlops/orchestration/Pipeline.py deleted file mode 100644 index d52eaff..0000000 --- a/google_cloud_automlops/orchestration/Pipeline.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Creates a generic pipeline object.""" - -# pylint: disable=anomalous-backslash-in-string -# pylint: disable=C0103 -# pylint: disable=line-too-long - -import ast -import inspect -from typing import Callable, Optional - -from google_cloud_automlops.utils.constants import ( - DEFAULT_PIPELINE_NAME, - GENERATED_DEFAULTS_FILE -) -from google_cloud_automlops.utils.utils import ( - get_function_source_definition, - read_yaml_file -) - - -class Pipeline(): - """The Pipeline object represents a component defined by the user. - - Args: - ABC: Abstract class - """ - - def __init__(self, - func: Optional[Callable] = None, - *, - name: Optional[str] = None, - description: Optional[str] = None, - comps_dict: dict): - """Initiates a pipeline object created out of a function holding - all necessary code. - - Args: - func: The python function to create a pipeline from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - name: The name of the pipeline. - description: Short description of what the pipeline does. - comps_list: Dictionary of potential components for pipeline to utilize imported - as the global held in AutoMLOps.py. - """ - # Instantiate and set key pipeline attributes - self.func = func - self.func_name = func.__name__ - self.name = DEFAULT_PIPELINE_NAME if not name else name - self.description = description - self.src_code = get_function_source_definition(self.func) - self.comps = self.get_pipeline_components(func, comps_dict) - - # Instantiate attributes to be set at build process - self.base_image = None - self.custom_training_job_specs = None - self.pipeline_params = None - self.pubsub_topic_name = None - self.use_ci = None - self.project_id = None - self.gs_pipeline_job_spec_path = None - - def build(self, - base_image, - custom_training_job_specs, - pipeline_params, - pubsub_topic_name, - use_ci): - """Instantiates an abstract built method to create and write pipeline files. Also - reads in defaults file to save default arguments to attributes. - - Files created must include: - 1. README.md - 2. Dockerfile - 3. Requirements.txt - - Args: - base_image (_type_): _description_ - custom_training_job_specs (_type_): _description_ - pipeline_params (_type_): _description_ - pubsub_topic_name (_type_): _description_ - use_ci (_type_): _description_ - """ - # Save parameters as attributes - self.base_image = base_image - self.custom_training_job_specs = custom_training_job_specs - self.pipeline_params = pipeline_params - self.pubsub_topic_name = pubsub_topic_name - self.use_ci = use_ci - - # Extract additional attributes from defaults file - defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) - self.project_id = defaults['gcp']['project_id'] - self.gs_pipeline_job_spec_path = defaults['pipelines']['gs_pipeline_job_spec_path'] - - raise NotImplementedError("Subclass needs to define this.") - - def get_pipeline_components(self, pipeline_func: Callable, comps_dict: dict): - """Returns a list of components used within a given pipeline. - - Args: - pipeline_func (Callable): Pipeline function. - comps_dict (dict): List of potential components to use within pipeline. - - Returns: - List: Components from comps_dict used within the pipeline_func. - """ - #Returns a list of components used within a given pipeline. - code = inspect.getsource(pipeline_func) - ast_tree = ast.parse(code) - comps_list = [] - for node in ast.walk(ast_tree): - try: - if isinstance(node, ast.Call) and node.func.id in comps_dict.keys(): - comps_list.append(comps_dict[node.func.id]) - except Exception: - pass - return comps_list - - -class FuturePipeline(): - """Placeholder for future pipeline object that will be created out of a list of components. - """ - def __init__(self, comps: list) -> None: - self.comps = comps - self.names = [comp.name for comp in self.comps] diff --git a/google_cloud_automlops/orchestration/Services.py b/google_cloud_automlops/orchestration/Services.py deleted file mode 100644 index 97701c5..0000000 --- a/google_cloud_automlops/orchestration/Services.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Creates a generic services object.""" - -# pylint: disable=anomalous-backslash-in-string -# pylint: disable=C0103 -# pylint: disable=line-too-long - -from google_cloud_automlops.utils.utils import read_yaml_file -from google_cloud_automlops.utils.constants import ( - BASE_DIR, - GENERATED_DEFAULTS_FILE -) - - -class Services(): - """The Services object will contain TODO: fill out what this does - - Args: - ABC: Abstract class - """ - - def __init__(self) -> None: - """Instantiates a generic Services object. - """ - self.pipeline_storage_path = None - self.pipeline_job_runner_service_account = None - self.pipeline_job_submission_service_type = None - self.project_id = None - self.pipeline_job_submission_service_type = None - - # Set directory for files to be written to - self.submission_service_base_dir = BASE_DIR + 'services/submission_service' - - - def build(self): - """Constructs and writes a Dockerfile, requirements.txt, and - main.py to the services/submission_service directory. - """ - - # Read in defaults params - defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) - self.pipeline_storage_path = defaults['pipelines']['pipeline_storage_path'] - self.pipeline_job_runner_service_account = defaults['gcp']['pipeline_job_runner_service_account'] - self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] - self.project_id = defaults['gcp']['project_id'] - self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] - - # Set directory for files to be written to - self.submission_service_base_dir = BASE_DIR + 'services/submission_service' - - self._build_main() - self._build_dockerfile() - self._build_requirements() - - def _build_dockerfile(self): - """Abstract method to create the Dockerfile file of the services/submission_service directory. - """ - raise NotImplementedError("Subclass needs to define this.") - - def _build_requirements(self): - """Abstract method to create the requirements.txt file of the services/submission_service directory. - """ - raise NotImplementedError("Subclass needs to define this.") - - def _build_main(self): - """Abstract method to create the main.py file of the services/submission_service directory. - """ - raise NotImplementedError("Subclass needs to define this.") diff --git a/google_cloud_automlops/orchestration/airflow/.gitkeep b/google_cloud_automlops/orchestration/airflow/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/google_cloud_automlops/orchestration/argo/.gitkeep b/google_cloud_automlops/orchestration/argo/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/google_cloud_automlops/orchestration/base.py b/google_cloud_automlops/orchestration/base.py new file mode 100644 index 0000000..d529b48 --- /dev/null +++ b/google_cloud_automlops/orchestration/base.py @@ -0,0 +1,354 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Creates enums for orchestrator and submission service options as well as generic component, pipeline, and services objects.""" + +# pylint: disable=anomalous-backslash-in-string +# pylint: disable=C0103 +# pylint: disable=line-too-long + +import ast +import docstring_parser +from enum import Enum +import inspect +from typing import Callable, List, Optional, TypeVar, Union + +from google_cloud_automlops.utils.utils import ( + get_function_source_definition, + read_yaml_file +) +from google_cloud_automlops.utils.constants import ( + BASE_DIR, + DEFAULT_PIPELINE_NAME, + GENERATED_DEFAULTS_FILE +) + +T = TypeVar('T') + + +class Orchestrator(Enum): + """Enum representing the available options for orchestration management.""" + + KFP = 'kfp' + # ARGO_WORKFLOWS = 'argo-workflows' # roadmap item + # TFX = 'tfx' # roadmap item + # AIRFLOW = 'airflow' # roadmap item + # RAY = 'ray' # roadmap item + + +class PipelineJobSubmitter(Enum): + """Enum representing the available options for the Pipeline Job submission service.""" + + CLOUD_FUNCTIONS = 'cloud-functions' + CLOUD_RUN = 'cloud-run' + + +class BaseComponent(): + """The Component object represents a component defined by the user. + """ + + def __init__(self, + func: Optional[Callable] = None, + packages_to_install: Optional[List[str]] = None): + """Initiates a generic Component object created out of a function holding + all necessary code. + + Args: + func: The python function to create a component from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + packages_to_install: A list of optional packages to install before + executing func. These will always be installed at component runtime. + + Raises: + ValueError: Confirms that the input is an existing function. + """ + + # Confirm the input is an existing function + if not inspect.isfunction(func): + raise ValueError(f"{func} must be of type function.") + + # Set simple attributes of the component function + self.func = func + self.name = func.__name__ + self.packages_to_install = [] if not packages_to_install else packages_to_install + + # Parse the docstring for description + self.parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) + self.description = self.parsed_docstring.short_description + + # Process and extract details from passed function + self.parameters = self._get_function_parameters() + self.return_types = self._get_function_return_types() + self.src_code = get_function_source_definition(self.func) + + # Instantiate attributes to be set during build + self.artifact_repo_location = None + self.artifact_repo_name = None + self.project_id = None + self.naming_prefix = None + + def build(self): + """Instantiates an abstract built method to create and write task files. Also + reads in defaults file to save default arguments to attributes. + """ + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + self.artifact_repo_location = defaults['gcp']['artifact_repo_location'] + self.artifact_repo_name = defaults['gcp']['artifact_repo_name'] + self.project_id = defaults['gcp']['project_id'] + self.naming_prefix = defaults['gcp']['naming_prefix'] + + raise NotImplementedError("Subclass needs to define this.") + + def _get_function_return_types(self) -> list: + """Returns a formatted list of function return types. + + Returns: + list: return value list with types converted to kubeflow spec. + Raises: + Exception: If return type is provided and not a NamedTuple. + """ + # Extract return type annotation of function + annotation = inspect.signature(self.func).return_annotation + + # Ensures return type is not optional + if self.maybe_strip_optional_from_annotation(annotation) is not annotation: + raise TypeError('Return type cannot be Optional.') + + # No annotations provided, return none + # pylint: disable=protected-access + if annotation == inspect._empty: + return None + + # Checks if the function's return type annotation is a valid NamedTuple + if not (hasattr(annotation,'__annotations__') and isinstance(annotation.__annotations__, dict)): + raise TypeError(f'''Return type hint for function "{self.name}" must be a NamedTuple.''') + + # Creates a dictionary of metadata for each object returned by component + outputs = [] + for name, type_ in annotation.__annotations__.items(): + metadata = {} + metadata['name'] = name + metadata['type'] = type_ + metadata['description'] = None + outputs.append(metadata) + return outputs + + def _get_function_parameters(self) -> list: + """Returns a formatted list of parameters. + + Returns: + list: Params list with types converted to kubeflow spec. + Raises: + Exception: If parameter type hints are not provided. + """ + # Extract function parameter names and their descriptions from the function's docstring + signature = inspect.signature(self.func) + parameters = list(signature.parameters.values()) + parsed_docstring = docstring_parser.parse(inspect.getdoc(self.func)) + doc_dict = {p.arg_name: p.description for p in parsed_docstring.params} + + # Extract parameter metadata + parameter_holder = [] + for param in parameters: + metadata = {} + metadata['name'] = param.name + metadata['description'] = doc_dict.get(param.name) + metadata['type'] = self.maybe_strip_optional_from_annotation( + param.annotation) + parameter_holder.append(metadata) + # pylint: disable=protected-access + if metadata['type'] == inspect._empty: + raise TypeError( + f'''Missing type hint for parameter "{metadata['name']}". ''' + f'''Please specify the type for this parameter.''') + return parameter_holder + + def maybe_strip_optional_from_annotation(self, annotation: T) -> T: + """Strips 'Optional' from 'Optional[]' if applicable. + For example:: + Optional[str] -> str + str -> str + List[int] -> List[int] + Args: + annotation: The original type annotation which may or may not has `Optional`. + Returns: + The type inside Optional[] if Optional exists, otherwise the original type. + """ + if getattr(annotation, '__origin__', None) is Union and annotation.__args__[1] is type(None): + return annotation.__args__[0] + else: + return annotation + + +class BasePipeline(): + """The Pipeline object represents a component defined by the user. + """ + + def __init__(self, + func: Optional[Callable] = None, + *, + name: Optional[str] = None, + description: Optional[str] = None, + comps_dict: dict): + """Initiates a pipeline object created out of a function holding + all necessary code. + + Args: + func: The python function to create a pipeline from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + name: The name of the pipeline. + description: Short description of what the pipeline does. + comps_list: Dictionary of potential components for pipeline to utilize imported + as the global held in AutoMLOps.py. + """ + # Instantiate and set key pipeline attributes + self.func = func + self.func_name = func.__name__ + self.name = DEFAULT_PIPELINE_NAME if not name else name + self.description = description + self.src_code = get_function_source_definition(self.func) + self.comps = self.get_pipeline_components(func, comps_dict) + + # Instantiate attributes to be set at build process + self.base_image = None + self.custom_training_job_specs = None + self.pipeline_params = None + self.pubsub_topic_name = None + self.use_ci = None + self.project_id = None + self.gs_pipeline_job_spec_path = None + + def build(self, + base_image, + custom_training_job_specs, + pipeline_params, + pubsub_topic_name, + use_ci): + """Instantiates an abstract built method to create and write pipeline files. Also + reads in defaults file to save default arguments to attributes. + + Files created must include: + 1. README.md + 2. Dockerfile + 3. Requirements.txt + + Args: + base_image (_type_): _description_ + custom_training_job_specs (_type_): _description_ + pipeline_params (_type_): _description_ + pubsub_topic_name (_type_): _description_ + use_ci (_type_): _description_ + """ + # Save parameters as attributes + self.base_image = base_image + self.custom_training_job_specs = custom_training_job_specs + self.pipeline_params = pipeline_params + self.pubsub_topic_name = pubsub_topic_name + self.use_ci = use_ci + + # Extract additional attributes from defaults file + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + self.project_id = defaults['gcp']['project_id'] + self.gs_pipeline_job_spec_path = defaults['pipelines']['gs_pipeline_job_spec_path'] + + raise NotImplementedError("Subclass needs to define this.") + + def get_pipeline_components(self, pipeline_func: Callable, comps_dict: dict): + """Returns a list of components used within a given pipeline. + + Args: + pipeline_func (Callable): Pipeline function. + comps_dict (dict): List of potential components to use within pipeline. + + Returns: + List: Components from comps_dict used within the pipeline_func. + """ + # Retrieves pipeline source code and parses it into an Abstract Syntax Tree (AST) + code = inspect.getsource(pipeline_func) + ast_tree = ast.parse(code) + + # Iterates through AST, finds function calls to components that are in comps_dict + comps_list = [] + for node in ast.walk(ast_tree): + try: + if isinstance(node, ast.Call) and node.func.id in comps_dict.keys(): + comps_list.append(comps_dict[node.func.id]) + except Exception: + pass + return comps_list + + +class BaseFuturePipeline(): + """Placeholder for future pipeline object that will be created out of a list of components. + """ + def __init__(self, comps: list) -> None: + self.comps = comps + self.names = [comp.name for comp in self.comps] + + +class BaseServices(): + """The Services object will contain TODO: fill out what this does + """ + + def __init__(self) -> None: + """Instantiates a generic Services object. + """ + self.pipeline_storage_path = None + self.pipeline_job_runner_service_account = None + self.pipeline_job_submission_service_type = None + self.project_id = None + self.pipeline_job_submission_service_type = None + + # Set directory for files to be written to + self.submission_service_base_dir = BASE_DIR + 'services/submission_service' + + def build(self): + """Constructs and writes a Dockerfile, requirements.txt, and + main.py to the services/submission_service directory. + """ + + # Read in defaults params + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + self.pipeline_storage_path = defaults['pipelines']['pipeline_storage_path'] + self.pipeline_job_runner_service_account = defaults['gcp']['pipeline_job_runner_service_account'] + self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] + self.project_id = defaults['gcp']['project_id'] + self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] + + # Set directory for files to be written to + self.submission_service_base_dir = BASE_DIR + 'services/submission_service' + + # Build services files + self._build_main() + self._build_dockerfile() + self._build_requirements() + + def _build_dockerfile(self): + """Abstract method to create the Dockerfile file of the services/submission_service directory. + """ + raise NotImplementedError("Subclass needs to define this.") + + def _build_requirements(self): + """Abstract method to create the requirements.txt file of the services/submission_service directory. + """ + raise NotImplementedError("Subclass needs to define this.") + + def _build_main(self): + """Abstract method to create the main.py file of the services/submission_service directory. + """ + raise NotImplementedError("Subclass needs to define this.") diff --git a/google_cloud_automlops/orchestration/enums.py b/google_cloud_automlops/orchestration/enums.py deleted file mode 100644 index f63af5d..0000000 --- a/google_cloud_automlops/orchestration/enums.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Sets global enums.""" - -# pylint: disable=C0103 -# pylint: disable=line-too-long - -from enum import Enum - -class Orchestrator(Enum): - """Enum representing the available options for orchestration management.""" - - KFP = 'kfp' - # ARGO_WORKFLOWS = 'argo-workflows' # roadmap item - # TFX = 'tfx' # roadmap item - # AIRFLOW = 'airflow' # roadmap item - # RAY = 'ray' # roadmap item - -class PipelineJobSubmitter(Enum): - """Enum representing the available options for the Pipeline Job submission service.""" - - CLOUD_FUNCTIONS = 'cloud-functions' - CLOUD_RUN = 'cloud-run' diff --git a/google_cloud_automlops/orchestration/kfp/KFPPipeline.py b/google_cloud_automlops/orchestration/kfp.py similarity index 56% rename from google_cloud_automlops/orchestration/kfp/KFPPipeline.py rename to google_cloud_automlops/orchestration/kfp.py index 323e423..a208ce3 100644 --- a/google_cloud_automlops/orchestration/kfp/KFPPipeline.py +++ b/google_cloud_automlops/orchestration/kfp.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Creates a KFP pipeline subclass.""" +"""Creates a KFP component, pipeline, and services subclass.""" # pylint: disable=anomalous-backslash-in-string # pylint: disable=C0103 @@ -21,7 +21,7 @@ import json import re import textwrap -from typing import Callable, Optional +from typing import Callable, List, Optional try: from importlib.resources import files as import_files @@ -29,7 +29,7 @@ # Try backported to PY<37 `importlib_resources` from importlib_resources import files as import_files -from google_cloud_automlops.orchestration.Pipeline import Pipeline +from google_cloud_automlops.orchestration.base import BaseComponent, BasePipeline, BaseServices from google_cloud_automlops.utils.utils import ( execute_process, get_components_list, @@ -39,6 +39,7 @@ render_jinja, write_and_chmod, write_file, + write_yaml_file ) from google_cloud_automlops.utils.constants import ( BASE_DIR, @@ -56,11 +57,182 @@ GENERATED_RUN_ALL_SH_FILE, KFP_TEMPLATES_PATH, PINNED_KFP_VERSION, - PIPELINE_CACHE_FILE + PLACEHOLDER_IMAGE ) -class KFPPipeline(Pipeline): +class KFPComponent(BaseComponent): + """Creates a KFP specific Component object for #TODO: add more + + Args: + Component (object): Generic Component object. + """ + + def __init__(self, + func: Optional[Callable] = None, + packages_to_install: Optional[List[str]] = None): + """Initiates a KFP Component object created out of a function holding + all necessary code. + + Args: + func: The python function to create a component from. The function + should have type annotations for all its arguments, indicating how + it is intended to be used (e.g. as an input/output Artifact object, + a plain parameter, or a path to a file). + packages_to_install: A list of optional packages to install before + executing func. These will always be installed at component runtime. + """ + super().__init__(func, packages_to_install) + + # Update parameters and return types to reflect KFP data types + if self.parameters: + self.parameters = self._update_params(self.parameters) + if self.return_types: + self.return_types = self._update_params(self.return_types) + + # Set packages to install and component spec attributes + self.packages_to_install_command = self._get_packages_to_install_command() + self.component_spec = self._create_component_spec() + + def build(self): + """Constructs files for running and managing Kubeflow pipelines. + """ + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + self.artifact_repo_location = defaults['gcp']['artifact_repo_location'] + self.artifact_repo_name = defaults['gcp']['artifact_repo_name'] + self.project_id = defaults['gcp']['project_id'] + self.naming_prefix = defaults['gcp']['naming_prefix'] + + # Set and create directory for components if it does not already exist + component_dir = BASE_DIR + 'components/' + self.component_spec['name'] + + # Build necessary folders + # TODO: make this only happen for the first component? or pull into automlops.py + make_dirs([ + component_dir, + BASE_DIR + 'components/component_base/src/']) + + # TODO: can this be removed? + kfp_spec_bool = self.component_spec['implementation']['container']['image'] != PLACEHOLDER_IMAGE + + # Read in component specs + custom_code_contents = self.component_spec['implementation']['container']['command'][-1] + compspec_image = ( + f'''{self.artifact_repo_location}-docker.pkg.dev/''' + f'''{self.project_id}/''' + f'''{self.artifact_repo_name}/''' + f'''{self.naming_prefix}/''' + f'''components/component_base:latest''') + + # If using kfp, remove spaces in name and convert to lowercase + if kfp_spec_bool: + self.component_spec['name'] = self.component_spec['name'].replace(' ', '_').lower() + + # Write task script to component base + write_file( + filepath=BASE_DIR + 'components/component_base/src/' + self.component_spec['name'] + '.py', + text=render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.components.component_base.src') / 'task.py.j2', + generated_license=GENERATED_LICENSE, + kfp_spec_bool=kfp_spec_bool, + custom_code_contents=custom_code_contents), + mode='w') + + # Update component_spec to include correct image and startup command + self.component_spec['implementation']['container']['image'] = compspec_image + self.component_spec['implementation']['container']['command'] = [ + 'python3', + f'''/pipelines/component/src/{self.component_spec['name']+'.py'}'''] + + # Write license and component spec to the appropriate component.yaml file + comp_yaml_path = component_dir + '/component.yaml' + write_file( + filepath=comp_yaml_path, + text=GENERATED_LICENSE, + mode='w') + write_yaml_file( + filepath=comp_yaml_path, + contents=self.component_spec, + mode='a') + + def _get_packages_to_install_command(self): + """Returns a list of formatted list of commands, including code for tmp storage. + """ + newline = '\n' + concat_package_list = ' '.join([repr(str(package)) for package in self.packages_to_install]) + install_python_packages_script = ( + f'''if ! [ -x "$(command -v pip)" ]; then{newline}''' + f''' python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip{newline}''' + f'''fi{newline}''' + f'''PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \{newline}''' + f''' --no-warn-script-location {concat_package_list} && "$0" "$@"{newline}''' + f'''{newline}''') + return ['sh', '-c', install_python_packages_script, self.src_code] + + def _create_component_spec(self): + """Creates a tmp component scaffold which will be used by the formalize function. + Code is temporarily stored in component_spec['implementation']['container']['command']. + + Returns: + _type_: _description_ #TODO: FILL OUT + """ + # Instantiate component yaml attributes + component_spec = {} + + # Save component name, description, outputs, and parameters + component_spec['name'] = self.name + if self.description: + component_spec['description'] = self.description + outputs = self.return_types + if outputs: + component_spec['outputs'] = outputs + component_spec['inputs'] = self.parameters + + # TODO: comment + component_spec['implementation'] = {} + component_spec['implementation']['container'] = {} + component_spec['implementation']['container']['image'] = PLACEHOLDER_IMAGE + component_spec['implementation']['container']['command'] = self.packages_to_install_command + component_spec['implementation']['container']['args'] = ['--executor_input', + {'executorInput': None}, + '--function_to_execute', + self.name] + return component_spec + + def _update_params(self, params: list) -> list: + """Converts the parameter types from Python types + to Kubeflow types. Currently only supports + Python primitive types. + + Args: + params: Pipeline parameters. A list of dictionaries, + each param is a dict containing keys: + 'name': required, str param name. + 'type': required, python primitive type. + 'description': optional, str param desc. + Returns: + list: Params list with converted types. + Raises: + Exception: If an inputted type is not a primitive. + """ + python_kfp_types_mapper = { + int: 'Integer', + str: 'String', + float: 'Float', + bool: 'Bool', + list: 'JsonArray', + dict: 'JsonObject' + } + for param in params: + try: + param['type'] = python_kfp_types_mapper[param['type']] + except KeyError as err: + raise ValueError(f'Unsupported python type - we only support ' + f'primitive types at this time. {err}') from err + return params + + +class KFPPipeline(BasePipeline): """Creates a KFP specific Pipeline object for #TODO: add more Args: @@ -136,7 +308,6 @@ def build(self, self.project_id = defaults['gcp']['project_id'] self.gs_pipeline_job_spec_path = defaults['pipelines']['gs_pipeline_job_spec_path'] - # Build necessary folders make_dirs([ f'{BASE_DIR}scripts/pipeline_spec/', @@ -360,3 +531,62 @@ def _create_component_base_requirements(self): # Stringify and sort reqs_str = ''.join(r+'\n' for r in sorted(set_of_requirements)) return reqs_str + + +class KFPServices(BaseServices): + """Creates a KFP specific Services object for #TODO: add more + + Args: + Services (object): Generic Services object. + """ + + def __init__(self) -> None: + """Initializes KFPServices Object. + """ + + def _build_dockerfile(self): + """Writes the services/submission_service/Dockerfile #TODO add more + """ + # Read in defaults params + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + self.pipeline_storage_path = defaults['pipelines']['pipeline_storage_path'] + self.pipeline_job_runner_service_account = defaults['gcp']['pipeline_job_runner_service_account'] + self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] + self.project_id = defaults['gcp']['project_id'] + self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] + + # Set directory for files to be written to + self.submission_service_base_dir = BASE_DIR + 'services/submission_service' + + write_file( + f'{self.submission_service_base_dir}/Dockerfile', + render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'Dockerfile.j2', + base_dir=BASE_DIR, + generated_license=GENERATED_LICENSE), + 'w') + + def _build_requirements(self): + """Writes the services/submission_service/requirements.txt #TODO add more + """ + write_file( + f'{self.submission_service_base_dir}/requirements.txt', + render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'requirements.txt.j2', + pinned_kfp_version=PINNED_KFP_VERSION, + pipeline_job_submission_service_type=self.pipeline_job_submission_service_type), + 'w') + + def _build_main(self): + """Writes the services/submission_service/main.py file to #TODO add more + """ + write_file( + f'{self.submission_service_base_dir}/main.py', + render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'main.py.j2', + generated_license=GENERATED_LICENSE, + pipeline_root=self.pipeline_storage_path, + pipeline_job_runner_service_account=self.pipeline_job_runner_service_account, + pipeline_job_submission_service_type=self.pipeline_job_submission_service_type, + project_id=self.project_id), + 'w') diff --git a/google_cloud_automlops/orchestration/kfp/KFPComponent.py b/google_cloud_automlops/orchestration/kfp/KFPComponent.py deleted file mode 100644 index 00ed7ff..0000000 --- a/google_cloud_automlops/orchestration/kfp/KFPComponent.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Creates a KFP component subclass.""" - -# pylint: disable=anomalous-backslash-in-string -# pylint: disable=C0103 -# pylint: disable=line-too-long - -from typing import Callable, List, Optional - -try: - from importlib.resources import files as import_files -except ImportError: - # Try backported to PY<37 `importlib_resources` - from importlib_resources import files as import_files - -from google_cloud_automlops.orchestration.Component import Component -from google_cloud_automlops.utils.constants import ( - BASE_DIR, - GENERATED_DEFAULTS_FILE, - GENERATED_LICENSE, - KFP_TEMPLATES_PATH, - PLACEHOLDER_IMAGE, -) -from google_cloud_automlops.utils.utils import ( - make_dirs, - read_yaml_file, - render_jinja, - write_file, - write_yaml_file -) - - -class KFPComponent(Component): - """Creates a KFP specific Component object for #TODO: add more - - Args: - Component (object): Generic Component object. - """ - - def __init__(self, - func: Optional[Callable] = None, - packages_to_install: Optional[List[str]] = None): - """Initiates a KFP Component object created out of a function holding - all necessary code. - - Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - packages_to_install: A list of optional packages to install before - executing func. These will always be installed at component runtime. - """ - super().__init__(func, packages_to_install) - - # Update parameters and return types to reflect KFP data types - if self.parameters: - self.parameters = update_params(self.parameters) - if self.return_types: - self.return_types = update_params(self.return_types) - - # Set packages to install and component spec attributes - self.packages_to_install_command = self._get_packages_to_install_command() - self.component_spec = self._create_component_spec() - - def build(self): - """Constructs files for running and managing Kubeflow pipelines. - """ - defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) - self.artifact_repo_location = defaults['gcp']['artifact_repo_location'] - self.artifact_repo_name = defaults['gcp']['artifact_repo_name'] - self.project_id = defaults['gcp']['project_id'] - self.naming_prefix = defaults['gcp']['naming_prefix'] - - # Set and create directory for components if it does not already exist - component_dir = BASE_DIR + 'components/' + self.component_spec['name'] - - # Build necessary folders - # TODO: make this only happen for the first component? or pull into automlops.py - make_dirs([ - component_dir, - BASE_DIR + 'components/component_base/src/']) - - # TODO: can this be removed? - kfp_spec_bool = self.component_spec['implementation']['container']['image'] != PLACEHOLDER_IMAGE - - # Read in component specs - custom_code_contents = self.component_spec['implementation']['container']['command'][-1] - compspec_image = ( - f'''{self.artifact_repo_location}-docker.pkg.dev/''' - f'''{self.project_id}/''' - f'''{self.artifact_repo_name}/''' - f'''{self.naming_prefix}/''' - f'''components/component_base:latest''') - - # If using kfp, remove spaces in name and convert to lowercase - if kfp_spec_bool: - self.component_spec['name'] = self.component_spec['name'].replace(' ', '_').lower() - - # Write task script to component base - write_file( - filepath=BASE_DIR + 'components/component_base/src/' + self.component_spec['name'] + '.py', - text=render_jinja( - template_path=import_files(KFP_TEMPLATES_PATH + '.components.component_base.src') / 'task.py.j2', - generated_license=GENERATED_LICENSE, - kfp_spec_bool=kfp_spec_bool, - custom_code_contents=custom_code_contents), - mode='w') - - # Update component_spec to include correct image and startup command - self.component_spec['implementation']['container']['image'] = compspec_image - self.component_spec['implementation']['container']['command'] = [ - 'python3', - f'''/pipelines/component/src/{self.component_spec['name']+'.py'}'''] - - # Write license and component spec to the appropriate component.yaml file - comp_yaml_path = component_dir + '/component.yaml' - write_file( - filepath=comp_yaml_path, - text=GENERATED_LICENSE, - mode='w') - write_yaml_file( - filepath=comp_yaml_path, - contents=self.component_spec, - mode='a') - - def _get_packages_to_install_command(self): - """Returns a list of formatted list of commands, including code for tmp storage. - """ - newline = '\n' - concat_package_list = ' '.join([repr(str(package)) for package in self.packages_to_install]) - install_python_packages_script = ( - f'''if ! [ -x "$(command -v pip)" ]; then{newline}''' - f''' python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip{newline}''' - f'''fi{newline}''' - f'''PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet \{newline}''' - f''' --no-warn-script-location {concat_package_list} && "$0" "$@"{newline}''' - f'''{newline}''') - return ['sh', '-c', install_python_packages_script, self.src_code] - - def _create_component_spec(self): - """Creates a tmp component scaffold which will be used by the formalize function. - Code is temporarily stored in component_spec['implementation']['container']['command']. - - Returns: - _type_: _description_ #TODO: FILL OUT - """ - # Instantiate component yaml attributes - component_spec = {} - component_spec['name'] = self.name - if self.description: - component_spec['description'] = self.description - outputs = self.return_types - if outputs: - component_spec['outputs'] = outputs - component_spec['inputs'] = self.parameters - component_spec['implementation'] = {} - component_spec['implementation']['container'] = {} - component_spec['implementation']['container']['image'] = PLACEHOLDER_IMAGE - component_spec['implementation']['container']['command'] = self.packages_to_install_command - component_spec['implementation']['container']['args'] = ['--executor_input', - {'executorInput': None}, - '--function_to_execute', - self.name] - return component_spec - -def update_params(params: list) -> list: - """Converts the parameter types from Python types - to Kubeflow types. Currently only supports - Python primitive types. - - Args: - params: Pipeline parameters. A list of dictionaries, - each param is a dict containing keys: - 'name': required, str param name. - 'type': required, python primitive type. - 'description': optional, str param desc. - Returns: - list: Params list with converted types. - Raises: - Exception: If an inputted type is not a primitive. - """ - python_kfp_types_mapper = { - int: 'Integer', - str: 'String', - float: 'Float', - bool: 'Bool', - list: 'JsonArray', - dict: 'JsonObject' - } - for param in params: - try: - param['type'] = python_kfp_types_mapper[param['type']] - except KeyError as err: - raise ValueError(f'Unsupported python type - we only support ' - f'primitive types at this time. {err}') from err - return params diff --git a/google_cloud_automlops/orchestration/kfp/KFPServices.py b/google_cloud_automlops/orchestration/kfp/KFPServices.py deleted file mode 100644 index 6b447d4..0000000 --- a/google_cloud_automlops/orchestration/kfp/KFPServices.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2023 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Creates a KFP services subclass.""" - -# pylint: disable=anomalous-backslash-in-string -# pylint: disable=C0103 -# pylint: disable=line-too-long - -try: - from importlib.resources import files as import_files -except ImportError: - # Try backported to PY<37 `importlib_resources` - from importlib_resources import files as import_files - -from google_cloud_automlops.orchestration.Services import Services -from google_cloud_automlops.utils.utils import ( - read_yaml_file, - render_jinja, - write_file -) -from google_cloud_automlops.utils.constants import ( - BASE_DIR, - GENERATED_DEFAULTS_FILE, - GENERATED_LICENSE, - KFP_TEMPLATES_PATH, - PINNED_KFP_VERSION -) - - -class KFPServices(Services): - """Creates a KFP specific Services object for #TODO: add more - - Args: - Services (object): Generic Services object. - """ - - def __init__(self) -> None: - """Initializes KFPServices Object. - """ - - def _build_dockerfile(self): - """Writes the services/submission_service/Dockerfile #TODO add more - """ - # Read in defaults params - defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) - self.pipeline_storage_path = defaults['pipelines']['pipeline_storage_path'] - self.pipeline_job_runner_service_account = defaults['gcp']['pipeline_job_runner_service_account'] - self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] - self.project_id = defaults['gcp']['project_id'] - self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] - - # Set directory for files to be written to - self.submission_service_base_dir = BASE_DIR + 'services/submission_service' - - write_file( - f'{self.submission_service_base_dir}/Dockerfile', - render_jinja( - template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'Dockerfile.j2', - base_dir=BASE_DIR, - generated_license=GENERATED_LICENSE), - 'w') - - def _build_requirements(self): - """Writes the services/submission_service/requirements.txt #TODO add more - """ - write_file( - f'{self.submission_service_base_dir}/requirements.txt', - render_jinja( - template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'requirements.txt.j2', - pinned_kfp_version=PINNED_KFP_VERSION, - pipeline_job_submission_service_type=self.pipeline_job_submission_service_type), - 'w') - - def _build_main(self): - """Writes the services/submission_service/main.py file to #TODO add more - """ - write_file( - f'{self.submission_service_base_dir}/main.py', - render_jinja( - template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'main.py.j2', - generated_license=GENERATED_LICENSE, - pipeline_root=self.pipeline_storage_path, - pipeline_job_runner_service_account=self.pipeline_job_runner_service_account, - pipeline_job_submission_service_type=self.pipeline_job_submission_service_type, - project_id=self.project_id), - 'w') diff --git a/google_cloud_automlops/orchestration/ray/.gitkeep b/google_cloud_automlops/orchestration/ray/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/google_cloud_automlops/orchestration/kfp/__init__.py b/google_cloud_automlops/orchestration/templates/__init__.py similarity index 100% rename from google_cloud_automlops/orchestration/kfp/__init__.py rename to google_cloud_automlops/orchestration/templates/__init__.py diff --git a/google_cloud_automlops/orchestration/kfp/templates/README.md.j2 b/google_cloud_automlops/orchestration/templates/kfp/README.md.j2 similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/README.md.j2 rename to google_cloud_automlops/orchestration/templates/kfp/README.md.j2 diff --git a/google_cloud_automlops/orchestration/kfp/templates/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/__init__.py similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/__init__.py rename to google_cloud_automlops/orchestration/templates/kfp/__init__.py diff --git a/google_cloud_automlops/orchestration/kfp/templates/components/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/components/__init__.py similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/components/__init__.py rename to google_cloud_automlops/orchestration/templates/kfp/components/__init__.py diff --git a/google_cloud_automlops/orchestration/kfp/templates/components/component_base/Dockerfile.j2 b/google_cloud_automlops/orchestration/templates/kfp/components/component_base/Dockerfile.j2 similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/components/component_base/Dockerfile.j2 rename to google_cloud_automlops/orchestration/templates/kfp/components/component_base/Dockerfile.j2 diff --git a/google_cloud_automlops/orchestration/kfp/templates/components/component_base/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/components/component_base/__init__.py similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/components/component_base/__init__.py rename to google_cloud_automlops/orchestration/templates/kfp/components/component_base/__init__.py diff --git a/google_cloud_automlops/orchestration/kfp/templates/components/component_base/src/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/components/component_base/src/__init__.py similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/components/component_base/src/__init__.py rename to google_cloud_automlops/orchestration/templates/kfp/components/component_base/src/__init__.py diff --git a/google_cloud_automlops/orchestration/kfp/templates/components/component_base/src/task.py.j2 b/google_cloud_automlops/orchestration/templates/kfp/components/component_base/src/task.py.j2 similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/components/component_base/src/task.py.j2 rename to google_cloud_automlops/orchestration/templates/kfp/components/component_base/src/task.py.j2 diff --git a/google_cloud_automlops/orchestration/kfp/templates/pipelines/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/pipelines/__init__.py similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/pipelines/__init__.py rename to google_cloud_automlops/orchestration/templates/kfp/pipelines/__init__.py diff --git a/google_cloud_automlops/orchestration/kfp/templates/pipelines/pipeline.py.j2 b/google_cloud_automlops/orchestration/templates/kfp/pipelines/pipeline.py.j2 similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/pipelines/pipeline.py.j2 rename to google_cloud_automlops/orchestration/templates/kfp/pipelines/pipeline.py.j2 diff --git a/google_cloud_automlops/orchestration/kfp/templates/pipelines/pipeline_runner.py.j2 b/google_cloud_automlops/orchestration/templates/kfp/pipelines/pipeline_runner.py.j2 similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/pipelines/pipeline_runner.py.j2 rename to google_cloud_automlops/orchestration/templates/kfp/pipelines/pipeline_runner.py.j2 diff --git a/google_cloud_automlops/orchestration/kfp/templates/pipelines/requirements.txt.j2 b/google_cloud_automlops/orchestration/templates/kfp/pipelines/requirements.txt.j2 similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/pipelines/requirements.txt.j2 rename to google_cloud_automlops/orchestration/templates/kfp/pipelines/requirements.txt.j2 diff --git a/google_cloud_automlops/orchestration/kfp/templates/scripts/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/scripts/__init__.py similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/scripts/__init__.py rename to google_cloud_automlops/orchestration/templates/kfp/scripts/__init__.py diff --git a/google_cloud_automlops/orchestration/kfp/templates/scripts/build_components.sh.j2 b/google_cloud_automlops/orchestration/templates/kfp/scripts/build_components.sh.j2 similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/scripts/build_components.sh.j2 rename to google_cloud_automlops/orchestration/templates/kfp/scripts/build_components.sh.j2 diff --git a/google_cloud_automlops/orchestration/kfp/templates/scripts/build_pipeline_spec.sh.j2 b/google_cloud_automlops/orchestration/templates/kfp/scripts/build_pipeline_spec.sh.j2 similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/scripts/build_pipeline_spec.sh.j2 rename to google_cloud_automlops/orchestration/templates/kfp/scripts/build_pipeline_spec.sh.j2 diff --git a/google_cloud_automlops/orchestration/kfp/templates/scripts/publish_to_topic.sh.j2 b/google_cloud_automlops/orchestration/templates/kfp/scripts/publish_to_topic.sh.j2 similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/scripts/publish_to_topic.sh.j2 rename to google_cloud_automlops/orchestration/templates/kfp/scripts/publish_to_topic.sh.j2 diff --git a/google_cloud_automlops/orchestration/kfp/templates/scripts/run_all.sh.j2 b/google_cloud_automlops/orchestration/templates/kfp/scripts/run_all.sh.j2 similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/scripts/run_all.sh.j2 rename to google_cloud_automlops/orchestration/templates/kfp/scripts/run_all.sh.j2 diff --git a/google_cloud_automlops/orchestration/kfp/templates/scripts/run_pipeline.sh.j2 b/google_cloud_automlops/orchestration/templates/kfp/scripts/run_pipeline.sh.j2 similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/scripts/run_pipeline.sh.j2 rename to google_cloud_automlops/orchestration/templates/kfp/scripts/run_pipeline.sh.j2 diff --git a/google_cloud_automlops/orchestration/kfp/templates/services/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/services/__init__.py similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/services/__init__.py rename to google_cloud_automlops/orchestration/templates/kfp/services/__init__.py diff --git a/google_cloud_automlops/orchestration/kfp/templates/services/submission_service/Dockerfile.j2 b/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/Dockerfile.j2 similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/services/submission_service/Dockerfile.j2 rename to google_cloud_automlops/orchestration/templates/kfp/services/submission_service/Dockerfile.j2 diff --git a/google_cloud_automlops/orchestration/kfp/templates/services/submission_service/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/__init__.py similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/services/submission_service/__init__.py rename to google_cloud_automlops/orchestration/templates/kfp/services/submission_service/__init__.py diff --git a/google_cloud_automlops/orchestration/kfp/templates/services/submission_service/main.py.j2 b/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/main.py.j2 similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/services/submission_service/main.py.j2 rename to google_cloud_automlops/orchestration/templates/kfp/services/submission_service/main.py.j2 diff --git a/google_cloud_automlops/orchestration/kfp/templates/services/submission_service/requirements.txt.j2 b/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/requirements.txt.j2 similarity index 100% rename from google_cloud_automlops/orchestration/kfp/templates/services/submission_service/requirements.txt.j2 rename to google_cloud_automlops/orchestration/templates/kfp/services/submission_service/requirements.txt.j2 diff --git a/google_cloud_automlops/orchestration/tfx/.gitkeep b/google_cloud_automlops/orchestration/tfx/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/google_cloud_automlops/utils/constants.py b/google_cloud_automlops/utils/constants.py index a9579ba..c596ff7 100644 --- a/google_cloud_automlops/utils/constants.py +++ b/google_cloud_automlops/utils/constants.py @@ -121,7 +121,7 @@ TERRAFORM_TEMPLATES_PATH = 'google_cloud_automlops.provisioning.terraform.templates' PULUMI_TEMPLATES_PATH = 'google_cloud_automlops.provisioning.pulumi.templates' GCLOUD_TEMPLATES_PATH = 'google_cloud_automlops.provisioning.gcloud.templates' -KFP_TEMPLATES_PATH = 'google_cloud_automlops.orchestration.kfp.templates' +KFP_TEMPLATES_PATH = 'google_cloud_automlops.orchestration.templates.kfp' CLOUDBUILD_TEMPLATES_PATH = 'google_cloud_automlops.deployments.cloudbuild.templates' GITHUB_ACTIONS_TEMPLATES_PATH = 'google_cloud_automlops.deployments.github_actions.templates' GITOPS_TEMPLATES_PATH = 'google_cloud_automlops.deployments.gitops.templates' From 8f0947378dafae047020f5e9dfbf19365cbc2311 Mon Sep 17 00:00:00 2001 From: Allegra Noto Date: Tue, 2 Apr 2024 14:54:42 -0400 Subject: [PATCH 09/11] CHECKPOINT -- FIRST MODEL MONITORING ADJUSTMENTS --- google_cloud_automlops/AutoMLOps.py | 31 +- google_cloud_automlops/orchestration/base.py | 46 +-- google_cloud_automlops/orchestration/kfp.py | 86 ++++-- .../orchestration/templates/kfp/README.md.j2 | 6 +- .../orchestration/templates/kfp/__init__.py | 2 +- .../templates/kfp/components/__init__.py | 2 +- .../kfp/components/component_base/__init__.py | 2 +- .../components/component_base/src/__init__.py | 2 +- .../kfp/model_monitoring/__init__.py | 13 + .../kfp/model_monitoring/monitor.py.j2 | 286 ++++++++++++++++++ .../kfp/model_monitoring/requirements.txt.j2 | 4 + .../templates/kfp/pipelines/__init__.py | 2 +- .../templates/kfp/scripts/__init__.py | 2 +- .../scripts/create_model_monitoring_job.sh.j2 | 9 + .../templates/kfp/services/__init__.py | 2 +- .../services/submission_service/__init__.py | 2 +- .../services/submission_service/main.py.j2 | 41 ++- .../submission_service/requirements.txt.j2 | 1 + google_cloud_automlops/utils/utils.py | 121 +++++--- 19 files changed, 544 insertions(+), 116 deletions(-) create mode 100644 google_cloud_automlops/orchestration/templates/kfp/model_monitoring/__init__.py create mode 100644 google_cloud_automlops/orchestration/templates/kfp/model_monitoring/monitor.py.j2 create mode 100644 google_cloud_automlops/orchestration/templates/kfp/model_monitoring/requirements.txt.j2 create mode 100644 google_cloud_automlops/orchestration/templates/kfp/scripts/create_model_monitoring_job.sh.j2 diff --git a/google_cloud_automlops/AutoMLOps.py b/google_cloud_automlops/AutoMLOps.py index e9a93bd..8d99a96 100644 --- a/google_cloud_automlops/AutoMLOps.py +++ b/google_cloud_automlops/AutoMLOps.py @@ -44,6 +44,7 @@ GENERATED_RESOURCES_SH_FILE, GENERATED_SERVICES_DIRS, GENERATED_TERRAFORM_DIRS, + GENERATED_MODEL_MONITORING_DIRS, OUTPUT_DIR ) from google_cloud_automlops.utils.utils import ( @@ -57,7 +58,7 @@ read_yaml_file, resources_generation_manifest, stringify_job_spec_list, - validate_schedule, + validate_use_ci, write_file ) # Orchestration imports @@ -130,6 +131,7 @@ def launchAll( schedule_location: Optional[str] = DEFAULT_RESOURCE_LOCATION, schedule_name: Optional[str] = None, schedule_pattern: Optional[str] = DEFAULT_SCHEDULE_PATTERN, + setup_model_monitoring: Optional[bool] = False, source_repo_branch: Optional[str] = DEFAULT_SOURCE_REPO_BRANCH, source_repo_name: Optional[str] = None, source_repo_type: Optional[str] = CodeRepository.CLOUD_SOURCE_REPOSITORIES.value, @@ -170,6 +172,7 @@ def launchAll( schedule_location: The location of the scheduler resource. schedule_name: The name of the scheduler resource. schedule_pattern: Cron formatted value used to create a Scheduled retrain job. + setup_model_monitoring: Boolean parameter which specifies whether to set up a Vertex AI Model Monitoring Job. source_repo_branch: The branch to use in the source repository. source_repo_name: The name of the source repository to use. source_repo_type: The type of source repository to use (e.g. gitlab, github, etc.) @@ -206,6 +209,7 @@ def launchAll( schedule_location=schedule_location, schedule_name=schedule_name, schedule_pattern=schedule_pattern, + setup_model_monitoring=setup_model_monitoring, source_repo_branch=source_repo_branch, source_repo_name=source_repo_name, source_repo_type=source_repo_type, @@ -244,6 +248,7 @@ def generate( schedule_location: Optional[str] = DEFAULT_RESOURCE_LOCATION, schedule_name: Optional[str] = None, schedule_pattern: Optional[str] = DEFAULT_SCHEDULE_PATTERN, + setup_model_monitoring: Optional[bool] = False, source_repo_branch: Optional[str] = DEFAULT_SOURCE_REPO_BRANCH, source_repo_name: Optional[str] = None, source_repo_type: Optional[str] = CodeRepository.CLOUD_SOURCE_REPOSITORIES.value, @@ -259,8 +264,8 @@ def generate( Args: See launchAll() function. """ - # Validate that use_ci=True if schedule_pattern parameter is set - validate_schedule(schedule_pattern, use_ci) + # Validate that use_ci=True if schedule_pattern parameter is set or setup_model_monitoring is True + validate_use_ci(setup_model_monitoring, schedule_pattern, use_ci) # Validate currently supported tools if artifact_repo_type not in [e.value for e in ArtifactRepository]: @@ -288,6 +293,8 @@ def generate( make_dirs(GENERATED_TERRAFORM_DIRS) if deployment_framework == Deployer.GITHUB_ACTIONS.value: make_dirs(GENERATED_GITHUB_DIRS) + if setup_model_monitoring: + make_dirs(GENERATED_MODEL_MONITORING_DIRS) # Set derived vars if none were given for certain variables derived_artifact_repo_name = coalesce(artifact_repo_name, f'{naming_prefix}-artifact-registry') @@ -321,6 +328,7 @@ def generate( schedule_location=schedule_location, schedule_name=derived_schedule_name, schedule_pattern=schedule_pattern, + setup_model_monitoring=setup_model_monitoring, source_repo_branch=source_repo_branch, source_repo_name=derived_source_repo_name, source_repo_type=source_repo_type, @@ -345,10 +353,11 @@ def generate( description=pipeline_glob.description, comps_dict=components_dict) kfppipe.build(base_image, - custom_training_job_specs, + derived_custom_training_job_specs, pipeline_params, pubsub_topic_name, - use_ci) + use_ci, + setup_model_monitoring) # Write kubeflow components code logging.info(f'Writing kubeflow components code to {BASE_DIR}components') @@ -356,10 +365,20 @@ def generate( logging.info(f' -- Writing {comp.name}') KFPComponent(func=comp.func, packages_to_install=comp.packages_to_install).build() + if setup_model_monitoring: + logging.info(f'Writing model monitoring code to {BASE_DIR}model_monitoring') + # If user specified services, write services scripts if use_ci: logging.info(f'Writing submission service code to {BASE_DIR}services') - KFPServices().build() + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + KFPServices().build( + pipeline_storage_path=defaults['pipelines']['pipeline_storage_path'], + pipeline_job_runner_service_account = defaults['gcp']['pipeline_job_runner_service_account'], + pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'], + project_id=project_id, + setup_model_monitoring=setup_model_monitoring + ) # Generate files required to provision resources if provisioning_framework == Provisioner.GCLOUD.value: diff --git a/google_cloud_automlops/orchestration/base.py b/google_cloud_automlops/orchestration/base.py index d529b48..c0b3730 100644 --- a/google_cloud_automlops/orchestration/base.py +++ b/google_cloud_automlops/orchestration/base.py @@ -232,13 +232,15 @@ def __init__(self, self.use_ci = None self.project_id = None self.gs_pipeline_job_spec_path = None + self.setup_model_monitoring = None def build(self, base_image, custom_training_job_specs, pipeline_params, pubsub_topic_name, - use_ci): + use_ci, + setup_model_monitoring): """Instantiates an abstract built method to create and write pipeline files. Also reads in defaults file to save default arguments to attributes. @@ -253,6 +255,7 @@ def build(self, pipeline_params (_type_): _description_ pubsub_topic_name (_type_): _description_ use_ci (_type_): _description_ + setup_model_monitoring (_type_): _description_ """ # Save parameters as attributes self.base_image = base_image @@ -260,6 +263,7 @@ def build(self, self.pipeline_params = pipeline_params self.pubsub_topic_name = pubsub_topic_name self.use_ci = use_ci + self.setup_model_monitoring = setup_model_monitoring # Extract additional attributes from defaults file defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) @@ -313,42 +317,44 @@ def __init__(self) -> None: self.pipeline_job_submission_service_type = None self.project_id = None self.pipeline_job_submission_service_type = None + self.setup_model_monitoring = None # Set directory for files to be written to self.submission_service_base_dir = BASE_DIR + 'services/submission_service' - def build(self): + def build(self, + pipeline_storage_path, + pipeline_job_runner_service_account, + pipeline_job_submission_service_type, + project_id, + setup_model_monitoring): """Constructs and writes a Dockerfile, requirements.txt, and main.py to the services/submission_service directory. """ # Read in defaults params - defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) - self.pipeline_storage_path = defaults['pipelines']['pipeline_storage_path'] - self.pipeline_job_runner_service_account = defaults['gcp']['pipeline_job_runner_service_account'] - self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] - self.project_id = defaults['gcp']['project_id'] - self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] + self.pipeline_storage_path = pipeline_storage_path + self.pipeline_job_runner_service_account = pipeline_job_runner_service_account + self.pipeline_job_submission_service_type = pipeline_job_submission_service_type + self.project_id = project_id + self.setup_model_monitoring = setup_model_monitoring # Set directory for files to be written to self.submission_service_base_dir = BASE_DIR + 'services/submission_service' # Build services files - self._build_main() - self._build_dockerfile() - self._build_requirements() + self._build_submission_services() - def _build_dockerfile(self): - """Abstract method to create the Dockerfile file of the services/submission_service directory. - """ - raise NotImplementedError("Subclass needs to define this.") + # Setup model monitoring + if self.setup_model_monitoring: + self._build_monitoring() - def _build_requirements(self): - """Abstract method to create the requirements.txt file of the services/submission_service directory. + def _build_monitoring(self): + """Abstract method to create the model monitoring files. """ - raise NotImplementedError("Subclass needs to define this.") + raise NotImplementedError("Subclass needs to define this") - def _build_main(self): - """Abstract method to create the main.py file of the services/submission_service directory. + def _build_submission_services(self): + """Abstract method to create the Dockerfile, requirements.txt, and main.py files of the services/submission_service directory. """ raise NotImplementedError("Subclass needs to define this.") diff --git a/google_cloud_automlops/orchestration/kfp.py b/google_cloud_automlops/orchestration/kfp.py index a208ce3..6762996 100644 --- a/google_cloud_automlops/orchestration/kfp.py +++ b/google_cloud_automlops/orchestration/kfp.py @@ -29,7 +29,7 @@ # Try backported to PY<37 `importlib_resources` from importlib_resources import files as import_files -from google_cloud_automlops.orchestration.base import BaseComponent, BasePipeline, BaseServices +from google_cloud_automlops.orchestration.base import BaseComponent, BasePipeline, BaseServices, BaseMonitoring from google_cloud_automlops.utils.utils import ( execute_process, get_components_list, @@ -47,6 +47,9 @@ GENERATED_COMPONENT_BASE, GENERATED_DEFAULTS_FILE, GENERATED_LICENSE, + GENERATED_MODEL_MONITORING_MONITOR_PY_FILE, + GENERATED_MODEL_MONITORING_REQUIREMENTS_FILE, + GENERATED_MODEL_MONITORING_SH_FILE, GENERATED_PARAMETER_VALUES_PATH, GENERATED_PIPELINE_FILE, GENERATED_PIPELINE_REQUIREMENTS_FILE, @@ -275,7 +278,8 @@ def build(self, custom_training_job_specs, pipeline_params, pubsub_topic_name, - use_ci): + use_ci, + setup_model_monitoring): """Constructs files for running and managing Kubeflow pipelines. Files created under AutoMLOps/: @@ -302,6 +306,7 @@ def build(self, self.pipeline_params = pipeline_params self.pubsub_topic_name = pubsub_topic_name self.use_ci = use_ci + self.setup_model_monitoring = setup_model_monitoring # Extract additional attributes from defaults file defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) @@ -317,9 +322,10 @@ def build(self, # README.md: Write description of the contents of the directory write_file( - filepath=f'{BASE_DIR}README.md', + filepath=f'{BASE_DIR}README.md', text=render_jinja( template_path=import_files(KFP_TEMPLATES_PATH) / 'README.md.j2', + setup_model_monitoring=self.setup_model_monitoring, use_ci=self.use_ci), mode='w') @@ -543,31 +549,51 @@ class KFPServices(BaseServices): def __init__(self) -> None: """Initializes KFPServices Object. """ + super().__init__() - def _build_dockerfile(self): - """Writes the services/submission_service/Dockerfile #TODO add more - """ - # Read in defaults params - defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) - self.pipeline_storage_path = defaults['pipelines']['pipeline_storage_path'] - self.pipeline_job_runner_service_account = defaults['gcp']['pipeline_job_runner_service_account'] - self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] - self.project_id = defaults['gcp']['project_id'] - self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] + def build(self, + pipeline_storage_path, + pipeline_job_runner_service_account, + pipeline_job_submission_service_type, + project_id, + setup_model_monitoring): + super().build( + pipeline_storage_path, + pipeline_job_runner_service_account, + pipeline_job_submission_service_type, + project_id, + setup_model_monitoring) + + def _build_monitoring(self): + # Writes script create_model_monitoring_job.sh which creates a Vertex AI model monitoring job + write_and_chmod( + filepath=GENERATED_MODEL_MONITORING_SH_FILE, + text=render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.scripts') / 'create_model_monitoring_job.sh.j2', + generated_license=GENERATED_LICENSE, + base_dir=BASE_DIR + )) - # Set directory for files to be written to - self.submission_service_base_dir = BASE_DIR + 'services/submission_service' + # Writes monitor.py to create or update a model monitoring job in Vertex AI for a deployed model endpoint + write_file( + filepath=GENERATED_MODEL_MONITORING_MONITOR_PY_FILE, + text=render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.model_monitoring') / 'monitor.py.j2', + generated_license=GENERATED_LICENSE + ), + mode='w') + # Writes a requirements.txt to the model_monitoring directory write_file( - f'{self.submission_service_base_dir}/Dockerfile', - render_jinja( - template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'Dockerfile.j2', - base_dir=BASE_DIR, - generated_license=GENERATED_LICENSE), - 'w') + filepath=GENERATED_MODEL_MONITORING_REQUIREMENTS_FILE, + text=render_jinja(template_path=import_files(KFP_TEMPLATES_PATH + '.model_monitoring') / 'requirements.txt.j2'), + mode='w') - def _build_requirements(self): - """Writes the services/submission_service/requirements.txt #TODO add more + def _build_submission_services(self): + """Writes the #TODO add more + services/submission_service/requirements.txt + services/submission_service/main.py + services/submission_service/Dockerfile """ write_file( f'{self.submission_service_base_dir}/requirements.txt', @@ -577,9 +603,6 @@ def _build_requirements(self): pipeline_job_submission_service_type=self.pipeline_job_submission_service_type), 'w') - def _build_main(self): - """Writes the services/submission_service/main.py file to #TODO add more - """ write_file( f'{self.submission_service_base_dir}/main.py', render_jinja( @@ -588,5 +611,14 @@ def _build_main(self): pipeline_root=self.pipeline_storage_path, pipeline_job_runner_service_account=self.pipeline_job_runner_service_account, pipeline_job_submission_service_type=self.pipeline_job_submission_service_type, - project_id=self.project_id), + project_id=self.project_id, + setup_model_monitoring=self.setup_model_monitoring), + 'w') + + write_file( + f'{self.submission_service_base_dir}/Dockerfile', + render_jinja( + template_path=import_files(KFP_TEMPLATES_PATH + '.services.submission_service') / 'Dockerfile.j2', + base_dir=BASE_DIR, + generated_license=GENERATED_LICENSE), 'w') diff --git a/google_cloud_automlops/orchestration/templates/kfp/README.md.j2 b/google_cloud_automlops/orchestration/templates/kfp/README.md.j2 index 32235f2..944a3c2 100644 --- a/google_cloud_automlops/orchestration/templates/kfp/README.md.j2 +++ b/google_cloud_automlops/orchestration/templates/kfp/README.md.j2 @@ -39,7 +39,11 @@ For a user-guide, please view these [slides](https://github.com/GoogleCloudPlatf ├── build_pipeline_spec.sh : Compiles the pipeline specs. ├── run_pipeline.sh : Submit the PipelineJob to Vertex AI. ├── run_all.sh : Builds components, compiles pipeline specs, and submits the PipelineJob.{% if use_ci %} - ├── publish_to_topic.sh : Publishes a message to a Pub/Sub topic to invoke the pipeline job submission service. + ├── publish_to_topic.sh : Publishes a message to a Pub/Sub topic to invoke the pipeline job submission service.{% if setup_model_monitoring %} + ├── create_model_monitoring_job.sh : Creates or updated a Vertex AI model monitoring job for a given deployed model endpoint. +├── model_monitoring : Code for building and maintaining model monitoring jobs. + ├── requirements.txt : Package requirements for creating and updating model monitoring jobs. + ├── monitor.py : Creates a ModelDeploymentMonitoringJob and optionally creates a Log Sink for automatic retraining.{% endif %} ├── services : MLOps services related to continuous training. ├── submission_service : REST API service used to submit pipeline jobs to Vertex AI. ├── Dockerfile : Dockerfile for running the REST API service. diff --git a/google_cloud_automlops/orchestration/templates/kfp/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/__init__.py index 2379f87..70d7dec 100644 --- a/google_cloud_automlops/orchestration/templates/kfp/__init__.py +++ b/google_cloud_automlops/orchestration/templates/kfp/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC. All Rights Reserved. +# Copyright 2024 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/google_cloud_automlops/orchestration/templates/kfp/components/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/components/__init__.py index 2379f87..70d7dec 100644 --- a/google_cloud_automlops/orchestration/templates/kfp/components/__init__.py +++ b/google_cloud_automlops/orchestration/templates/kfp/components/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC. All Rights Reserved. +# Copyright 2024 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/google_cloud_automlops/orchestration/templates/kfp/components/component_base/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/components/component_base/__init__.py index 2379f87..70d7dec 100644 --- a/google_cloud_automlops/orchestration/templates/kfp/components/component_base/__init__.py +++ b/google_cloud_automlops/orchestration/templates/kfp/components/component_base/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC. All Rights Reserved. +# Copyright 2024 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/google_cloud_automlops/orchestration/templates/kfp/components/component_base/src/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/components/component_base/src/__init__.py index 2379f87..70d7dec 100644 --- a/google_cloud_automlops/orchestration/templates/kfp/components/component_base/src/__init__.py +++ b/google_cloud_automlops/orchestration/templates/kfp/components/component_base/src/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC. All Rights Reserved. +# Copyright 2024 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/google_cloud_automlops/orchestration/templates/kfp/model_monitoring/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/model_monitoring/__init__.py new file mode 100644 index 0000000..70d7dec --- /dev/null +++ b/google_cloud_automlops/orchestration/templates/kfp/model_monitoring/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2024 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/google_cloud_automlops/orchestration/templates/kfp/model_monitoring/monitor.py.j2 b/google_cloud_automlops/orchestration/templates/kfp/model_monitoring/monitor.py.j2 new file mode 100644 index 0000000..fb8fda0 --- /dev/null +++ b/google_cloud_automlops/orchestration/templates/kfp/model_monitoring/monitor.py.j2 @@ -0,0 +1,286 @@ +{{generated_license}} +"""Creates a Model Monitoring Job in Vertex AI for a deployed model endpoint.""" + +import argparse +import json +import pprint as pp +import subprocess +import yaml + +from google.cloud import aiplatform +from google.cloud.aiplatform import model_monitoring +from google.cloud import logging +from google.cloud import storage + +def execute_process(command: str, to_null: bool): + """Executes an external shell process. + + Args: + command: The string of the command to execute. + to_null: Determines where to send output. + Raises: + Exception: If an error occurs in executing the script. + """ + stdout = subprocess.DEVNULL if to_null else None + try: + subprocess.run([command], + shell=True, + check=True, + stdout=stdout, + stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as err: + raise RuntimeError(f'Error executing process. {err}') from err + + +def write_file(filepath: str, text: str, mode: str): + """Writes a file at the specified path. Defaults to utf-8 encoding. + + Args: + filepath: Path to the file. + text: Text to be written to file. + mode: Read/write mode to be used. + Raises: + Exception: If an error is encountered writing the file. + """ + try: + with open(filepath, mode, encoding='utf-8') as file: + file.write(text) + file.close() + except OSError as err: + raise OSError(f'Error writing to file. {err}') from err + + +def upload_automatic_retraining_parameters( + auto_retraining_params: dict, + gs_auto_retraining_params_path: str, + gs_pipeline_job_spec_path: str, + storage_bucket_name: str): + """Upload automatic pipeline retraining params from local to GCS + + Args: + auto_retraining_params: Pipeline parameter values to use when retraining the model. + gs_auto_retraining_params_path: GCS path of the retraining parameters. + gs_pipeline_job_spec_path: The GCS path of the pipeline job spec. + storage_bucket_name: Name of the storage bucket to write to. + """ + auto_retraining_params['gs_pipeline_spec_path'] = gs_pipeline_job_spec_path + serialized_params = json.dumps(auto_retraining_params, indent=4) + write_file('model_monitoring/automatic_retraining_parameters.json', + serialized_params, 'w') + + storage_client = storage.Client() + bucket = storage_client.get_bucket(storage_bucket_name) + filename = '/'.join(gs_auto_retraining_params_path.split('/')[3:]) + blob = bucket.blob(filename) + blob.upload_from_filename( + 'model_monitoring/automatic_retraining_parameters.json') + + +def create_or_update_sink(sink_name: str, + destination: str, + filter_: str): + """Creates or updates a sink to export logs to the given Pub/Sub topic. + + The filter determines which logs this sink matches and will be exported + to the destination.See + https://cloud.google.com/logging/docs/view/advanced_filters for more + filter information. + + Args: + sink_name: The name of the log sink + destination: The URI of the pub/sub topic to send the logs to. + filter_: The log filter for sending logs. + Filters only monitoring job anomalies. + """ + logging_client = logging.Client() + sink = logging_client.sink(sink_name) + + if sink.exists(): + sink = logging_client.sink(sink_name, + filter_=filter_, + destination=destination) + sink.update() + print(f'Updated Anomaly Log Sink {sink.name}.\n') + else: + sink = logging_client.sink(sink_name, + filter_=filter_, + destination=destination) + sink.create() + print(f'Created Anomaly Log Sink {sink.name}.\n') + + +def create_or_update_monitoring_job( + alert_emails: list, + auto_retraining_params: dict, + drift_thresholds: dict, + gs_auto_retraining_params_path: str, + job_display_name: str, + log_sink_name: str, + model_endpoint: str, + monitoring_interval: int, + monitoring_location: str, + project_id: str, + pubsub_topic_name: str, + sample_rate: float, + skew_thresholds: dict, + target_field: str, + training_dataset: str): + """Creates or updates a model monitoring job on the given model. + + Args: + alert_emails: Optional list of emails to send monitoring alerts. + Email alerts not used if this value is set to None. + auto_retraining_params: Pipeline parameter values to use when retraining the model. + Defaults to None; if left None, the model will not be retrained if an alert is generated. + drift_thresholds: Compares incoming data to data previously seen to check for drift. + job_display_name: Display name of the ModelDeploymentMonitoringJob. The name can be up to 128 characters + long and can be consist of any UTF-8 characters. + gs_auto_retraining_params_path: GCS location of the retraining parameters. + log_sink_name: Name of the log sink object. + model_endpoint: Endpoint resource name of the deployed model to monitoring. + Format: projects/{project}/locations/{location}/endpoints/{endpoint} + monitoring_interval: Configures model monitoring job scheduling interval in hours. + This defines how often the monitoring jobs are triggered. + monitoring_location: Location to retrieve ModelDeploymentMonitoringJob from. + project_id: The project ID. + pubsub_topic_name: The name of the pubsub topic to publish anomaly logs to (for automatic retraining). + sample_rate: Used for drift detection, specifies what percent of requests to the endpoint are randomly sampled + for drift detection analysis. This value most range between (0, 1]. + skew_thresholds: Compares incoming data to the training dataset to check for skew. + target_field: Prediction target column name in training dataset. + training_dataset: Training dataset used to train the deployed model. This field is required if + using skew detection. + """ + aiplatform.init(project=project_id, location=monitoring_location) + + # check if endpoint exists + endpoint_list = aiplatform.Endpoint.list(filter=f'endpoint="{model_endpoint.split("/")[-1]}"') + if not endpoint_list: + raise ValueError(f'Model endpoint {model_endpoint} not found in {monitoring_location}') + else: + endpoint = aiplatform.Endpoint(model_endpoint) + + # Set skew and drift thresholds + if skew_thresholds: + skew_config = model_monitoring.SkewDetectionConfig( + data_source=training_dataset, + skew_thresholds=skew_thresholds, + target_field=target_field) + else: + skew_config = None + + if drift_thresholds: + drift_config = model_monitoring.DriftDetectionConfig( + drift_thresholds=drift_thresholds) + else: + drift_config = None + + objective_config = model_monitoring.ObjectiveConfig( + skew_config, drift_config, explanation_config=None) + + # Create sampling configuration + random_sampling = model_monitoring.RandomSampleConfig( + sample_rate=sample_rate) + + # Create schedule configuration + schedule_config = model_monitoring.ScheduleConfig( + monitor_interval=monitoring_interval) + + if not alert_emails: + alert_emails = [] + + # Create alerting configuration. + alerting_config = model_monitoring.EmailAlertConfig( + user_emails=alert_emails, enable_logging=True) + + # check if job already exists + job_list = aiplatform.ModelDeploymentMonitoringJob.list( + filter=f'display_name="{job_display_name}"') + if not job_list: + # Create the monitoring job. + job = aiplatform.ModelDeploymentMonitoringJob.create( + display_name=job_display_name, + logging_sampling_strategy=random_sampling, + schedule_config=schedule_config, + alert_config=alerting_config, + objective_configs=objective_config, + project=project_id, + location=monitoring_location, + endpoint=endpoint, + enable_monitoring_pipeline_logs=True) + else: + # Update the monitoring job. + old_job_id = job_list[0].resource_name.split('/')[-1] + job = aiplatform.ModelDeploymentMonitoringJob(old_job_id).update( + display_name=job_display_name, + logging_sampling_strategy=random_sampling, + schedule_config=schedule_config, + alert_config=alerting_config, + objective_configs=objective_config, + enable_monitoring_pipeline_logs=True) + print(f'Updated monitoring job {old_job_id} with new arguments.') + + if auto_retraining_params: + # Filter to only send anomaly logs to pub/sub + job_id = job.resource_name.split('/')[-1] + monitoring_anomaly_log_filter = ( + f'resource.type="aiplatform.googleapis.com/ModelDeploymentMonitoringJob"\n' + f'resource.labels.location="{monitoring_location}"\n' + f'resource.labels.model_deployment_monitoring_job="{job_id}"\n' + f'logName="projects/{project_id}/logs/aiplatform.googleapis.com%2Fmodel_monitoring_anomaly"\n' + f'severity>=WARNING\n') + anomaly_log_destination = f'''pubsub.googleapis.com/projects/{project_id}/topics/{pubsub_topic_name}''' + # Create a log sink to send logs to pub/sub + create_or_update_sink( + sink_name=log_sink_name, + destination=anomaly_log_destination, + filter_=monitoring_anomaly_log_filter) + + print(f'All anomaly logs for this model monitoring job are being routed to pub/sub topic {pubsub_topic_name} for automatic retraining.') + print(f'Retraining will use the following parameters located at {gs_auto_retraining_params_path}: \n') + pp.pprint(auto_retraining_params) + + # Update service account to be able to publish to Pub/Sub + cloud_logs_sa = 'cloud-logs@system.gserviceaccount.com' + newline = '\n' + update_iam = ( + f'''gcloud projects add-iam-policy-binding {project_id} \{newline}''' + f'''--member="serviceAccount:{cloud_logs_sa}" \{newline}''' + f'''--role="roles/pubsub.publisher"''') + print(f'\nUpdating {cloud_logs_sa} with roles/pubsub.publisher') + execute_process(update_iam, to_null=True) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--config', type=str, + help='The config file for setting monitoring values.') + args = parser.parse_args() + + with open(args.config, 'r', encoding='utf-8') as config_file: + config = yaml.load(config_file, Loader=yaml.FullLoader) + + if config['monitoring']['auto_retraining_params']: + upload_automatic_retraining_parameters( + auto_retraining_params=config['monitoring']['auto_retraining_params'], + gs_auto_retraining_params_path=config['monitoring']['gs_auto_retraining_params_path'], + gs_pipeline_job_spec_path=config['pipelines']['gs_pipeline_job_spec_path'], + storage_bucket_name=config['gcp']['storage_bucket_name']) + + create_or_update_monitoring_job( + alert_emails=config['monitoring']['alert_emails'], + auto_retraining_params=config['monitoring']['auto_retraining_params'], + drift_thresholds=config['monitoring']['drift_thresholds'], + gs_auto_retraining_params_path=config['monitoring']['gs_auto_retraining_params_path'], + job_display_name=config['monitoring']['job_display_name'], + log_sink_name=config['monitoring']['log_sink_name'], + model_endpoint=config['monitoring']['model_endpoint'], + monitoring_interval=config['monitoring']['monitoring_interval'], + monitoring_location=config['monitoring']['monitoring_location'], + project_id=config['gcp']['project_id'], + pubsub_topic_name=config['gcp']['pubsub_topic_name'], + sample_rate=config['monitoring']['sample_rate'], + skew_thresholds=config['monitoring']['skew_thresholds'], + target_field=config['monitoring']['target_field'], + training_dataset=config['monitoring']['training_dataset']) + diff --git a/google_cloud_automlops/orchestration/templates/kfp/model_monitoring/requirements.txt.j2 b/google_cloud_automlops/orchestration/templates/kfp/model_monitoring/requirements.txt.j2 new file mode 100644 index 0000000..0469e86 --- /dev/null +++ b/google_cloud_automlops/orchestration/templates/kfp/model_monitoring/requirements.txt.j2 @@ -0,0 +1,4 @@ +google-cloud-aiplatform +google-cloud-logging +google-cloud-storage +pyyaml \ No newline at end of file diff --git a/google_cloud_automlops/orchestration/templates/kfp/pipelines/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/pipelines/__init__.py index 2379f87..70d7dec 100644 --- a/google_cloud_automlops/orchestration/templates/kfp/pipelines/__init__.py +++ b/google_cloud_automlops/orchestration/templates/kfp/pipelines/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC. All Rights Reserved. +# Copyright 2024 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/google_cloud_automlops/orchestration/templates/kfp/scripts/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/scripts/__init__.py index 2379f87..70d7dec 100644 --- a/google_cloud_automlops/orchestration/templates/kfp/scripts/__init__.py +++ b/google_cloud_automlops/orchestration/templates/kfp/scripts/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC. All Rights Reserved. +# Copyright 2024 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/google_cloud_automlops/orchestration/templates/kfp/scripts/create_model_monitoring_job.sh.j2 b/google_cloud_automlops/orchestration/templates/kfp/scripts/create_model_monitoring_job.sh.j2 new file mode 100644 index 0000000..fc1c04c --- /dev/null +++ b/google_cloud_automlops/orchestration/templates/kfp/scripts/create_model_monitoring_job.sh.j2 @@ -0,0 +1,9 @@ +#!/bin/bash +{{generated_license}} +# Creates a Vertex AI model monitoring job. +# This script should run from the {{base_dir}} directory +# Change directory in case this is not the script root. + +CONFIG_FILE=configs/defaults.yaml + +python3 -m model_monitoring.monitor --config $CONFIG_FILE \ No newline at end of file diff --git a/google_cloud_automlops/orchestration/templates/kfp/services/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/services/__init__.py index 2379f87..70d7dec 100644 --- a/google_cloud_automlops/orchestration/templates/kfp/services/__init__.py +++ b/google_cloud_automlops/orchestration/templates/kfp/services/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC. All Rights Reserved. +# Copyright 2024 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/__init__.py b/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/__init__.py index 2379f87..70d7dec 100644 --- a/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/__init__.py +++ b/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC. All Rights Reserved. +# Copyright 2024 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/main.py.j2 b/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/main.py.j2 index b518546..a7e29f9 100644 --- a/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/main.py.j2 +++ b/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/main.py.j2 @@ -10,15 +10,28 @@ import flask {% if pipeline_job_submission_service_type == 'cloud-functions' %}import functions_framework{% endif %} from google.cloud import aiplatform import google.cloud.logging +{% if setup_model_monitoring %}from google.cloud import storage -{% if pipeline_job_submission_service_type == 'cloud-run' %}app = flask.Flask(__name__){% endif %} -client = google.cloud.logging.Client(project="project") -client.setup_logging() - +NAMING_PREFIX = '{{naming_prefix}}'{% endif %} PROJECT_ID = '{{project_id}}' PIPELINE_ROOT = '{{pipeline_root}}' PIPELINE_JOB_RUNNER_SERVICE_ACCOUNT = '{{pipeline_job_runner_service_account}}' +{% if pipeline_job_submission_service_type == 'cloud-run' %}app = flask.Flask(__name__){% endif %} +client = google.cloud.logging.Client(project=PROJECT_ID) +client.setup_logging() + +{% if setup_model_monitoring %} +def read_gs_auto_retraining_params_file(): + storage_client = storage.Client(project=PROJECT_ID) + bucket_name = PIPELINE_ROOT.split('/')[2] + bucket = storage_client.get_bucket(bucket_name) + file_name = f'pipeline_root/{NAMING_PREFIX}/automatic_retraining_parameters.json' + blob = bucket.blob(file_name) + data = json.loads(blob.download_as_string(client=None)) + logging.info(f'Retraining using the following parameters located at {bucket_name}/{file_name}: \n{data}') + return data +{% endif %} {% if pipeline_job_submission_service_type == 'cloud-functions' %}@functions_framework.http{% elif pipeline_job_submission_service_type == 'cloud-run' %}@app.route('/', methods=['POST']){% endif %} def process_request({% if pipeline_job_submission_service_type == 'cloud-functions' %}request: flask.Request{% endif %}) -> flask.Response: """HTTP web service to trigger pipeline execution. @@ -47,6 +60,16 @@ def process_request({% if pipeline_job_submission_service_type == 'cloud-functio logging.info('JSON Recieved:') logging.info(data_payload) + optional_labels = {} + {% if setup_model_monitoring %} + try: + if data_payload['logName'] == f'projects/{PROJECT_ID}/logs/aiplatform.googleapis.com%2Fmodel_monitoring_anomaly': + logging.info('Model monitoring anomaly detected - triggering model retraining.') + data_payload = read_gs_auto_retraining_params_file() + optional_labels['trigger'] = 'monitoring_anomaly' + except KeyError: + pass + {% endif %} if 'gs_pipeline_spec_path' in data_payload: gs_pipeline_spec_path = data_payload['gs_pipeline_spec_path'] del data_payload['gs_pipeline_spec_path'] @@ -67,7 +90,8 @@ def process_request({% if pipeline_job_submission_service_type == 'cloud-functio pipeline_job_runner_service_account=PIPELINE_JOB_RUNNER_SERVICE_ACCOUNT, pipeline_params=data_payload, pipeline_spec_path=gs_pipeline_spec_path, - experiment=vertex_exp) + experiment=vertex_exp, + labels=optional_labels) return flask.make_response({ 'dashboard_uri': dashboard_uri, 'resource_name': resource_name @@ -85,7 +109,8 @@ def submit_pipeline( pipeline_spec_path: str, experiment: str, display_name: str = 'mlops-pipeline-run', - enable_caching: bool = False) -> Tuple[str, str]: + enable_caching: bool = False, + labels: dict = None) -> Tuple[str, str]: """Submits a pipeline run. Args: @@ -97,6 +122,7 @@ def submit_pipeline( experiment: Optional name of Vertex AI experiment. display_name: Name to call the pipeline. enable_caching: Should caching be enabled (Boolean) + labels: Optional labels to be added to the PipelineJob. """ logging.info('Pipeline Parms Configured:') logging.info(pipeline_params) @@ -107,7 +133,8 @@ def submit_pipeline( template_path = pipeline_spec_path, pipeline_root = pipeline_root, parameter_values = pipeline_params, - enable_caching = enable_caching) + enable_caching = enable_caching, + labels = labels) logging.info('AI Platform job built. Submitting...') job.submit( experiment=experiment, diff --git a/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/requirements.txt.j2 b/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/requirements.txt.j2 index 36167f4..3845685 100644 --- a/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/requirements.txt.j2 +++ b/google_cloud_automlops/orchestration/templates/kfp/services/submission_service/requirements.txt.j2 @@ -1,5 +1,6 @@ {{pinned_kfp_version}} google-cloud-aiplatform google-cloud-logging +google-cloud-storage Flask {% if pipeline_job_submission_service_type == 'cloud-functions' %}functions-framework==3.*{% elif pipeline_job_submission_service_type == 'cloud-run' %}gunicorn{% endif %} \ No newline at end of file diff --git a/google_cloud_automlops/utils/utils.py b/google_cloud_automlops/utils/utils.py index 99d0868..146d98f 100644 --- a/google_cloud_automlops/utils/utils.py +++ b/google_cloud_automlops/utils/utils.py @@ -233,17 +233,21 @@ def execute_process(command: str, to_null: bool): raise RuntimeError(f'Error executing process. {err}') from err -def validate_schedule(schedule_pattern: str, use_ci: str): - """Validates that the inputted schedule parameter aligns with the use_ci configuration. +def validate_use_ci(setup_model_monitoring: bool, schedule_pattern: str, use_ci: str): + """Validates that the inputted schedule parameter and model_monitoring parameter align with the + use_ci configuration. Note: this function does not validate that schedule_pattern is a properly formatted cron value. Cron format validation is done in the backend by GCP. Args: + setup_model_monitoring: Boolean parameter which specifies whether to set up a Vertex AI Model Monitoring Job. schedule_pattern: Cron formatted value used to create a Scheduled retrain job. use_ci: Flag that determines whether to use Cloud CI/CD. Raises: - Exception: If schedule is not cron formatted or use_ci validation fails. + Exception: If use_ci validation fails. """ + if setup_model_monitoring and not use_ci: + raise ValueError('use_ci must be set to True to use Model Monitoring.') if schedule_pattern != DEFAULT_SCHEDULE_PATTERN and not use_ci: raise ValueError('use_ci must be set to True to use Cloud Scheduler.') @@ -369,14 +373,15 @@ def create_default_config(artifact_repo_location: str, schedule_location: str, schedule_name: str, schedule_pattern: str, + setup_model_monitoring: bool, source_repo_branch: str, source_repo_name: str, source_repo_type: str, storage_bucket_location: str, storage_bucket_name: str, use_ci: bool, - vpc_connector: str): - """Creates defaults.yaml file contents. This defaults + vpc_connector: str) -> dict: + """Creates defaults.yaml file contents as a dict. This defaults file is used by subsequent functions and by the pipeline files themselves. @@ -400,6 +405,7 @@ def create_default_config(artifact_repo_location: str, schedule_location: The location of the scheduler resource. schedule_name: The name of the scheduler resource. schedule_pattern: Cron formatted value used to create a Scheduled retrain job. + setup_model_monitoring: Boolean parameter which specifies whether to set up a Vertex AI Model Monitoring Job. source_repo_branch: The branch to use in the source repository. source_repo_name: The name of the source repository to use. source_repo_type: The type of source repository to use (e.g. gitlab, github, etc.) @@ -409,49 +415,70 @@ def create_default_config(artifact_repo_location: str, vpc_connector: The name of the vpc connector to use. Returns: - str: Defaults yaml file content + dict: Defaults yaml file content """ - return ( - GENERATED_LICENSE + - f'# These values are descriptive only - do not change.\n' - f'# Rerun AutoMLOps.generate() to change these values.\n' - f'gcp:\n' - f' artifact_repo_location: {artifact_repo_location}\n' - f' artifact_repo_name: {artifact_repo_name}\n' - f' artifact_repo_type: {artifact_repo_type}\n' - f' base_image: {base_image}\n' - f' build_trigger_location: {build_trigger_location}\n' - f' build_trigger_name: {build_trigger_name}\n' - f' naming_prefix: {naming_prefix}\n' - f' pipeline_job_runner_service_account: {pipeline_job_runner_service_account}\n' - f' pipeline_job_submission_service_location: {pipeline_job_submission_service_location}\n' - f' pipeline_job_submission_service_name: {pipeline_job_submission_service_name}\n' - f' pipeline_job_submission_service_type: {pipeline_job_submission_service_type}\n' - f' project_id: {project_id}\n' - f' pubsub_topic_name: {pubsub_topic_name}\n' - f' schedule_location: {schedule_location}\n' - f' schedule_name: {schedule_name}\n' - f' schedule_pattern: {schedule_pattern}\n' - f' source_repository_branch: {source_repo_branch}\n' - f' source_repository_name: {source_repo_name}\n' - f' source_repository_type: {source_repo_type}\n' - f' storage_bucket_location: {storage_bucket_location}\n' - f' storage_bucket_name: {storage_bucket_name}\n' - f' vpc_connector: {vpc_connector}\n' - f'\n' - f'pipelines:\n' - f' gs_pipeline_job_spec_path: gs://{storage_bucket_name}/pipeline_root/{naming_prefix}/pipeline_job.json\n' - f' parameter_values_path: {GENERATED_PARAMETER_VALUES_PATH}\n' - f' pipeline_component_directory: components\n' - f' pipeline_job_spec_path: {GENERATED_PIPELINE_JOB_SPEC_PATH}\n' - f' pipeline_region: {storage_bucket_location}\n' - f' pipeline_storage_path: gs://{storage_bucket_name}/pipeline_root\n' - f'\n' - f'tooling:\n' - f' deployment_framework: {deployment_framework}\n' - f' provisioning_framework: {provisioning_framework}\n' - f' orchestration_framework: {orchestration_framework}\n' - f' use_ci: {use_ci}\n') + defaults = {} + defaults['gcp'] = {} + defaults['gcp']['artifact_repo_location'] = artifact_repo_location + defaults['gcp']['artifact_repo_name'] = artifact_repo_name + defaults['gcp']['artifact_repo_type'] = artifact_repo_type + defaults['gcp']['base_image'] = base_image + if use_ci: + defaults['gcp']['build_trigger_location'] = build_trigger_location + defaults['gcp']['build_trigger_name'] = build_trigger_name + defaults['gcp']['naming_prefix'] = naming_prefix + defaults['gcp']['pipeline_job_runner_service_account'] = pipeline_job_runner_service_account + if use_ci: + defaults['gcp']['pipeline_job_submission_service_location'] = pipeline_job_submission_service_location + defaults['gcp']['pipeline_job_submission_service_name'] = pipeline_job_submission_service_name + defaults['gcp']['pipeline_job_submission_service_type'] = pipeline_job_submission_service_type + defaults['gcp']['project_id'] = project_id + defaults['gcp']['setup_model_monitoring'] = setup_model_monitoring + if use_ci: + defaults['gcp']['pubsub_topic_name'] = pubsub_topic_name + defaults['gcp']['schedule_location'] = schedule_location + defaults['gcp']['schedule_name'] = schedule_name + defaults['gcp']['schedule_pattern'] = schedule_pattern + defaults['gcp']['source_repository_branch'] = source_repo_branch + defaults['gcp']['source_repository_name'] = source_repo_name + defaults['gcp']['source_repository_type'] = source_repo_type + defaults['gcp']['storage_bucket_location'] = storage_bucket_location + defaults['gcp']['storage_bucket_name'] = storage_bucket_name + if use_ci: + defaults['gcp']['vpc_connector'] = vpc_connector + + defaults['pipelines'] = {} + defaults['pipelines']['gs_pipeline_job_spec_path'] = f'gs://{storage_bucket_name}/pipeline_root/{naming_prefix}/pipeline_job.json' + defaults['pipelines']['parameter_values_path'] = GENERATED_PARAMETER_VALUES_PATH + defaults['pipelines']['pipeline_component_directory'] = 'components' + defaults['pipelines']['pipeline_job_spec_path'] = GENERATED_PIPELINE_JOB_SPEC_PATH + defaults['pipelines']['pipeline_region'] = storage_bucket_location + defaults['pipelines']['pipeline_storage_path'] = f'gs://{storage_bucket_name}/pipeline_root' + + defaults['tooling'] = {} + defaults['tooling']['deployment_framework'] = deployment_framework + defaults['tooling']['provisioning_framework'] = provisioning_framework + defaults['tooling']['orchestration_framework'] = orchestration_framework + defaults['tooling']['use_ci'] = use_ci + + if setup_model_monitoring: + # These fields to be set when AutoMLOps.monitor() is called + defaults['monitoring'] = {} + defaults['monitoring']['target_field'] = None + defaults['monitoring']['model_endpoint'] = None + defaults['monitoring']['alert_emails'] = None + defaults['monitoring']['auto_retraining_params'] = None + defaults['monitoring']['drift_thresholds'] = None + defaults['monitoring']['gs_auto_retraining_params_path'] = None + defaults['monitoring']['job_display_name'] = None + defaults['monitoring']['log_sink_name'] = None + defaults['monitoring']['monitoring_interval'] = None + defaults['monitoring']['monitoring_location'] = None + defaults['monitoring']['sample_rate'] = None + defaults['monitoring']['skew_thresholds'] = None + defaults['monitoring']['training_dataset'] = None + + return defaults def get_required_apis(defaults: dict) -> list: From 6fcc45f3753344a6815e63f1f8076c1bc0086e1e Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 3 Apr 2024 12:55:44 -0400 Subject: [PATCH 10/11] Fixed some formatting issues and moved formatting into a utils file --- google_cloud_automlops/AutoMLOps.py | 3 +- google_cloud_automlops/orchestration/base.py | 35 ++++-------------- google_cloud_automlops/orchestration/kfp.py | 10 ++--- google_cloud_automlops/utils/enums.py | 39 ++++++++++++++++++++ google_cloud_automlops/utils/utils.py | 10 +++-- 5 files changed, 57 insertions(+), 40 deletions(-) create mode 100644 google_cloud_automlops/utils/enums.py diff --git a/google_cloud_automlops/AutoMLOps.py b/google_cloud_automlops/AutoMLOps.py index bb7455d..4992504 100644 --- a/google_cloud_automlops/AutoMLOps.py +++ b/google_cloud_automlops/AutoMLOps.py @@ -47,7 +47,6 @@ GENERATED_RESOURCES_SH_FILE, GENERATED_SERVICES_DIRS, GENERATED_TERRAFORM_DIRS, - GENERATED_MODEL_MONITORING_DIRS, OUTPUT_DIR ) from google_cloud_automlops.utils.utils import ( @@ -66,7 +65,7 @@ write_yaml_file ) # Orchestration imports -from google_cloud_automlops.orchestration.base import ( +from google_cloud_automlops.utils.enums import ( Orchestrator, PipelineJobSubmitter ) diff --git a/google_cloud_automlops/orchestration/base.py b/google_cloud_automlops/orchestration/base.py index c0b3730..1de972e 100644 --- a/google_cloud_automlops/orchestration/base.py +++ b/google_cloud_automlops/orchestration/base.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Creates enums for orchestrator and submission service options as well as generic component, pipeline, and services objects.""" +"""Creates generic component, pipeline, and services objects.""" # pylint: disable=anomalous-backslash-in-string # pylint: disable=C0103 @@ -20,7 +20,6 @@ import ast import docstring_parser -from enum import Enum import inspect from typing import Callable, List, Optional, TypeVar, Union @@ -37,23 +36,6 @@ T = TypeVar('T') -class Orchestrator(Enum): - """Enum representing the available options for orchestration management.""" - - KFP = 'kfp' - # ARGO_WORKFLOWS = 'argo-workflows' # roadmap item - # TFX = 'tfx' # roadmap item - # AIRFLOW = 'airflow' # roadmap item - # RAY = 'ray' # roadmap item - - -class PipelineJobSubmitter(Enum): - """Enum representing the available options for the Pipeline Job submission service.""" - - CLOUD_FUNCTIONS = 'cloud-functions' - CLOUD_RUN = 'cloud-run' - - class BaseComponent(): """The Component object represents a component defined by the user. """ @@ -85,7 +67,7 @@ def __init__(self, self.name = func.__name__ self.packages_to_install = [] if not packages_to_install else packages_to_install - # Parse the docstring for description + # Parse the docstring for description self.parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) self.description = self.parsed_docstring.short_description @@ -110,7 +92,7 @@ def build(self): self.project_id = defaults['gcp']['project_id'] self.naming_prefix = defaults['gcp']['naming_prefix'] - raise NotImplementedError("Subclass needs to define this.") + raise NotImplementedError('Subclass needs to define this.') def _get_function_return_types(self) -> list: """Returns a formatted list of function return types. @@ -199,10 +181,9 @@ class BasePipeline(): def __init__(self, func: Optional[Callable] = None, - *, name: Optional[str] = None, description: Optional[str] = None, - comps_dict: dict): + comps_dict: dict = None): """Initiates a pipeline object created out of a function holding all necessary code. @@ -270,7 +251,7 @@ def build(self, self.project_id = defaults['gcp']['project_id'] self.gs_pipeline_job_spec_path = defaults['pipelines']['gs_pipeline_job_spec_path'] - raise NotImplementedError("Subclass needs to define this.") + raise NotImplementedError('Subclass needs to define this.') def get_pipeline_components(self, pipeline_func: Callable, comps_dict: dict): """Returns a list of components used within a given pipeline. @@ -282,7 +263,7 @@ def get_pipeline_components(self, pipeline_func: Callable, comps_dict: dict): Returns: List: Components from comps_dict used within the pipeline_func. """ - # Retrieves pipeline source code and parses it into an Abstract Syntax Tree (AST) + # Retrieves pipeline source code and parses it into an Abstract Syntax Tree (AST) code = inspect.getsource(pipeline_func) ast_tree = ast.parse(code) @@ -352,9 +333,9 @@ def build(self, def _build_monitoring(self): """Abstract method to create the model monitoring files. """ - raise NotImplementedError("Subclass needs to define this") + raise NotImplementedError('Subclass needs to define this') def _build_submission_services(self): """Abstract method to create the Dockerfile, requirements.txt, and main.py files of the services/submission_service directory. """ - raise NotImplementedError("Subclass needs to define this.") + raise NotImplementedError('Subclass needs to define this.') diff --git a/google_cloud_automlops/orchestration/kfp.py b/google_cloud_automlops/orchestration/kfp.py index 6762996..eae2d11 100644 --- a/google_cloud_automlops/orchestration/kfp.py +++ b/google_cloud_automlops/orchestration/kfp.py @@ -29,7 +29,7 @@ # Try backported to PY<37 `importlib_resources` from importlib_resources import files as import_files -from google_cloud_automlops.orchestration.base import BaseComponent, BasePipeline, BaseServices, BaseMonitoring +from google_cloud_automlops.orchestration.base import BaseComponent, BasePipeline, BaseServices from google_cloud_automlops.utils.utils import ( execute_process, get_components_list, @@ -72,7 +72,7 @@ class KFPComponent(BaseComponent): """ def __init__(self, - func: Optional[Callable] = None, + func: Optional[Callable] = None, packages_to_install: Optional[List[str]] = None): """Initiates a KFP Component object created out of a function holding all necessary code. @@ -237,17 +237,13 @@ def _update_params(self, params: list) -> list: class KFPPipeline(BasePipeline): """Creates a KFP specific Pipeline object for #TODO: add more - - Args: - Pipeline (object): Generic Pipeline object. """ def __init__(self, func: Optional[Callable] = None, - *, name: Optional[str] = None, description: Optional[str] = None, - comps_dict: dict) -> None: + comps_dict: dict = None) -> None: """Initiates a KFP pipeline object created out of a function holding all necessary code. diff --git a/google_cloud_automlops/utils/enums.py b/google_cloud_automlops/utils/enums.py new file mode 100644 index 0000000..ac93951 --- /dev/null +++ b/google_cloud_automlops/utils/enums.py @@ -0,0 +1,39 @@ +# Copyright 2023 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Creates enums for orchestrator and submission service options as well as generic component, +pipeline, and services objects.""" + +# pylint: disable=anomalous-backslash-in-string +# pylint: disable=C0103 +# pylint: disable=line-too-long + +from enum import Enum + + +class Orchestrator(Enum): + """Enum representing the available options for orchestration management.""" + + KFP = 'kfp' + # ARGO_WORKFLOWS = 'argo-workflows' # roadmap item + # TFX = 'tfx' # roadmap item + # AIRFLOW = 'airflow' # roadmap item + # RAY = 'ray' # roadmap item + + +class PipelineJobSubmitter(Enum): + """Enum representing the available options for the Pipeline Job submission service.""" + + CLOUD_FUNCTIONS = 'cloud-functions' + CLOUD_RUN = 'cloud-run' diff --git a/google_cloud_automlops/utils/utils.py b/google_cloud_automlops/utils/utils.py index 5b9b9dd..a85e46d 100644 --- a/google_cloud_automlops/utils/utils.py +++ b/google_cloud_automlops/utils/utils.py @@ -47,16 +47,18 @@ PLACEHOLDER_IMAGE ) +from google_cloud_automlops.utils.enums import ( + Orchestrator, + PipelineJobSubmitter +) + from google_cloud_automlops.deployments.enums import ( ArtifactRepository, CodeRepository, Deployer ) from google_cloud_automlops.provisioning.enums import Provisioner -from google_cloud_automlops.orchestration.enums import ( - Orchestrator, - PipelineJobSubmitter -) + def make_dirs(directories: list): """Makes directories with the specified names. From 7f6cc6758e0c06270cd72fbcaa7766ee0f761c27 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 5 Apr 2024 12:50:50 -0400 Subject: [PATCH 11/11] Code cleanup --- google_cloud_automlops/AutoMLOps.py | 15 +- google_cloud_automlops/orchestration/base.py | 136 ++++---- google_cloud_automlops/orchestration/kfp.py | 156 +++++---- google_cloud_automlops/utils/utils.py | 313 +++++++++---------- 4 files changed, 293 insertions(+), 327 deletions(-) diff --git a/google_cloud_automlops/AutoMLOps.py b/google_cloud_automlops/AutoMLOps.py index 4992504..e71ac42 100644 --- a/google_cloud_automlops/AutoMLOps.py +++ b/google_cloud_automlops/AutoMLOps.py @@ -357,12 +357,7 @@ def generate( name=pipeline_glob.name, description=pipeline_glob.description, comps_dict=components_dict) - kfppipe.build(base_image, - derived_custom_training_job_specs, - pipeline_params, - pubsub_topic_name, - use_ci, - setup_model_monitoring) + kfppipe.build(pipeline_params, derived_custom_training_job_specs) # Write kubeflow components code logging.info(f'Writing kubeflow components code to {BASE_DIR}components') @@ -377,13 +372,7 @@ def generate( if use_ci: logging.info(f'Writing submission service code to {BASE_DIR}services') defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) - KFPServices().build( - pipeline_storage_path=defaults['pipelines']['pipeline_storage_path'], - pipeline_job_runner_service_account = defaults['gcp']['pipeline_job_runner_service_account'], - pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'], - project_id=project_id, - setup_model_monitoring=setup_model_monitoring - ) + KFPServices().build() # Generate files required to provision resources if provisioning_framework == Provisioner.GCLOUD.value: diff --git a/google_cloud_automlops/orchestration/base.py b/google_cloud_automlops/orchestration/base.py index 1de972e..a8c9986 100644 --- a/google_cloud_automlops/orchestration/base.py +++ b/google_cloud_automlops/orchestration/base.py @@ -19,10 +19,11 @@ # pylint: disable=line-too-long import ast -import docstring_parser import inspect from typing import Callable, List, Optional, TypeVar, Union +import docstring_parser + from google_cloud_automlops.utils.utils import ( get_function_source_definition, read_yaml_file @@ -39,7 +40,6 @@ class BaseComponent(): """The Component object represents a component defined by the user. """ - def __init__(self, func: Optional[Callable] = None, packages_to_install: Optional[List[str]] = None): @@ -47,15 +47,15 @@ def __init__(self, all necessary code. Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - packages_to_install: A list of optional packages to install before - executing func. These will always be installed at component runtime. + func (Optional[Callable]): The python function to create a component from. The function + should have type annotations for all its arguments, indicating how it is intended to + be used (e.g. as an input/output Artifact object, a plain parameter, or a path to a + file). Defaults to None. + packages_to_install (Optional[List[str]]): A list of optional packages to install before + executing func. These will always be installed at component runtime. Defaults to None. Raises: - ValueError: Confirms that the input is an existing function. + ValueError: The parameter `func` is not an existing function. """ # Confirm the input is an existing function @@ -83,9 +83,13 @@ def __init__(self, self.naming_prefix = None def build(self): - """Instantiates an abstract built method to create and write task files. Also - reads in defaults file to save default arguments to attributes. + """Instantiates an abstract built method to create and write task files. Also reads in + defaults file to save default arguments to attributes. + + Raises: + NotImplementedError: The subclass has not defined the `build` method. """ + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) self.artifact_repo_location = defaults['gcp']['artifact_repo_location'] self.artifact_repo_name = defaults['gcp']['artifact_repo_name'] @@ -99,6 +103,7 @@ def _get_function_return_types(self) -> list: Returns: list: return value list with types converted to kubeflow spec. + Raises: Exception: If return type is provided and not a NamedTuple. """ @@ -133,8 +138,9 @@ def _get_function_parameters(self) -> list: Returns: list: Params list with types converted to kubeflow spec. + Raises: - Exception: If parameter type hints are not provided. + Exception: Parameter type hints are not provided. """ # Extract function parameter names and their descriptions from the function's docstring signature = inspect.signature(self.func) @@ -160,12 +166,15 @@ def _get_function_parameters(self) -> list: def maybe_strip_optional_from_annotation(self, annotation: T) -> T: """Strips 'Optional' from 'Optional[]' if applicable. - For example:: - Optional[str] -> str - str -> str - List[int] -> List[int] + + For example:: + Optional[str] -> str + str -> str + List[int] -> List[int] + Args: annotation: The original type annotation which may or may not has `Optional`. + Returns: The type inside Optional[] if Optional exists, otherwise the original type. """ @@ -188,14 +197,14 @@ def __init__(self, all necessary code. Args: - func: The python function to create a pipeline from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - name: The name of the pipeline. - description: Short description of what the pipeline does. - comps_list: Dictionary of potential components for pipeline to utilize imported - as the global held in AutoMLOps.py. + func (Optional[Callable]): The python function to create a pipeline from. The function + should have type annotations for all its arguments, indicating how it is intended to + be used (e.g. as an input/output Artifact object, a plain parameter, or a path to a + file). Defaults to None. + name (Optional[str]): The name of the pipeline. Defaults to None. + description (Optional[str]): Short description of what the pipeline does. Defaults to None. + comps_list (dict): Dictionary of potential components for pipeline to utilize imported + as the global held in AutoMLOps.py. Defaults to None. """ # Instantiate and set key pipeline attributes self.func = func @@ -216,14 +225,10 @@ def __init__(self, self.setup_model_monitoring = None def build(self, - base_image, - custom_training_job_specs, - pipeline_params, - pubsub_topic_name, - use_ci, - setup_model_monitoring): - """Instantiates an abstract built method to create and write pipeline files. Also - reads in defaults file to save default arguments to attributes. + pipeline_params: dict, + custom_training_job_specs: Optional[List] = None): + """Instantiates an abstract built method to create and write pipeline files. Also reads in + defaults file to save default arguments to attributes. Files created must include: 1. README.md @@ -231,29 +236,31 @@ def build(self, 3. Requirements.txt Args: - base_image (_type_): _description_ - custom_training_job_specs (_type_): _description_ - pipeline_params (_type_): _description_ - pubsub_topic_name (_type_): _description_ - use_ci (_type_): _description_ - setup_model_monitoring (_type_): _description_ + custom_training_job_specs (dict): Specifies the specs to run the training job with. + pipeline_params (Optional[List]): Dictionary containing runtime pipeline parameters. + Defaults to None. + + Raises: + NotImplementedError: The subclass has not defined the `build` method. """ # Save parameters as attributes - self.base_image = base_image self.custom_training_job_specs = custom_training_job_specs self.pipeline_params = pipeline_params - self.pubsub_topic_name = pubsub_topic_name - self.use_ci = use_ci - self.setup_model_monitoring = setup_model_monitoring # Extract additional attributes from defaults file defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) self.project_id = defaults['gcp']['project_id'] self.gs_pipeline_job_spec_path = defaults['pipelines']['gs_pipeline_job_spec_path'] + self.base_image = defaults['gcp']['base_image'] + self.pubsub_topic_name = defaults['gcp']['pubsub_topic_name'] + self.use_ci = defaults['tooling']['use_ci'] + self.setup_model_monitoring = defaults['gcp']['setup_model_monitoring'] raise NotImplementedError('Subclass needs to define this.') - def get_pipeline_components(self, pipeline_func: Callable, comps_dict: dict): + def get_pipeline_components(self, + pipeline_func: Callable, + comps_dict: dict) -> list: """Returns a list of components used within a given pipeline. Args: @@ -303,22 +310,26 @@ def __init__(self) -> None: # Set directory for files to be written to self.submission_service_base_dir = BASE_DIR + 'services/submission_service' - def build(self, - pipeline_storage_path, - pipeline_job_runner_service_account, - pipeline_job_submission_service_type, - project_id, - setup_model_monitoring): - """Constructs and writes a Dockerfile, requirements.txt, and - main.py to the services/submission_service directory. + def build(self): + """Constructs and writes files related to submission services and model monitoring. + + Files created under AutoMLOps/: + services/ + submission_service/ + Dockerfile + main.py + requirements.txt + model_monitoring/ (if requested) + monitor.py + requirements.txt """ - - # Read in defaults params - self.pipeline_storage_path = pipeline_storage_path - self.pipeline_job_runner_service_account = pipeline_job_runner_service_account - self.pipeline_job_submission_service_type = pipeline_job_submission_service_type - self.project_id = project_id - self.setup_model_monitoring = setup_model_monitoring + # Extract additional attributes from defaults file + defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) + self.pipeline_storage_path = defaults['pipelines']['pipeline_storage_path'] + self.pipeline_job_runner_service_account = defaults['gcp']['pipeline_job_runner_service_account'] + self.pipeline_job_submission_service_type = defaults['gcp']['pipeline_job_submission_service_type'] + self.project_id = defaults['gcp']['project_id'] + self.setup_model_monitoring = defaults['gcp']['setup_model_monitoring'] # Set directory for files to be written to self.submission_service_base_dir = BASE_DIR + 'services/submission_service' @@ -332,10 +343,17 @@ def build(self, def _build_monitoring(self): """Abstract method to create the model monitoring files. + + Raises: + NotImplementedError: The subclass has not defined the `_build_monitoring` method. """ raise NotImplementedError('Subclass needs to define this') def _build_submission_services(self): - """Abstract method to create the Dockerfile, requirements.txt, and main.py files of the services/submission_service directory. + """Abstract method to create the Dockerfile, requirements.txt, and main.py files of the + services/submission_service directory. + + Raises: + NotImplementedError: The subclass has not defined the `_build_submission_services` method. """ raise NotImplementedError('Subclass needs to define this.') diff --git a/google_cloud_automlops/orchestration/kfp.py b/google_cloud_automlops/orchestration/kfp.py index eae2d11..75f3f01 100644 --- a/google_cloud_automlops/orchestration/kfp.py +++ b/google_cloud_automlops/orchestration/kfp.py @@ -68,22 +68,21 @@ class KFPComponent(BaseComponent): """Creates a KFP specific Component object for #TODO: add more Args: - Component (object): Generic Component object. + BaseComponent (object): Generic Component object. """ def __init__(self, func: Optional[Callable] = None, packages_to_install: Optional[List[str]] = None): - """Initiates a KFP Component object created out of a function holding - all necessary code. + """Initiates a KFP Component object created out of a function holding all necessary code. Args: - func: The python function to create a component from. The function + func (Optional[Callable]): The python function to create a component from. The function should have type annotations for all its arguments, indicating how it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - packages_to_install: A list of optional packages to install before - executing func. These will always be installed at component runtime. + a plain parameter, or a path to a file). Defaults to None. + packages_to_install (Optional[List[str]]): A list of optional packages to install before + executing func. These will always be installed at component runtime. Defaults to None. """ super().__init__(func, packages_to_install) @@ -158,8 +157,11 @@ def build(self): contents=self.component_spec, mode='a') - def _get_packages_to_install_command(self): - """Returns a list of formatted list of commands, including code for tmp storage. + def _get_packages_to_install_command(self) -> list: + """Creates a list of formatted list of commands, including code for tmp storage. + + Returns: + list: Formatted commands to install necessary packages. #TODO: add more, where is this used """ newline = '\n' concat_package_list = ' '.join([repr(str(package)) for package in self.packages_to_install]) @@ -172,12 +174,12 @@ def _get_packages_to_install_command(self): f'''{newline}''') return ['sh', '-c', install_python_packages_script, self.src_code] - def _create_component_spec(self): - """Creates a tmp component scaffold which will be used by the formalize function. - Code is temporarily stored in component_spec['implementation']['container']['command']. + def _create_component_spec(self) -> dict: + """Creates a tmp component scaffold which will be used by the formalize function. Code is + temporarily stored in component_spec['implementation']['container']['command']. Returns: - _type_: _description_ #TODO: FILL OUT + dict: _description_ #TODO: FILL OUT """ # Instantiate component yaml attributes component_spec = {} @@ -203,20 +205,20 @@ def _create_component_spec(self): return component_spec def _update_params(self, params: list) -> list: - """Converts the parameter types from Python types - to Kubeflow types. Currently only supports + """Converts the parameter types from Python types to Kubeflow types. Currently only supports Python primitive types. Args: - params: Pipeline parameters. A list of dictionaries, - each param is a dict containing keys: - 'name': required, str param name. - 'type': required, python primitive type. - 'description': optional, str param desc. + params: Pipeline parameters. A list of dictionaries, Each param is a dict containing keys: + 'name': required, str param name. + 'type': required, python primitive type. + 'description': optional, str param desc. + Returns: list: Params list with converted types. + Raises: - Exception: If an inputted type is not a primitive. + ValueError: If an inputted type is not a primitive. """ python_kfp_types_mapper = { int: 'Integer', @@ -237,25 +239,26 @@ def _update_params(self, params: list) -> list: class KFPPipeline(BasePipeline): """Creates a KFP specific Pipeline object for #TODO: add more - """ + Args: + BasePipeline (object): Generic Pipeline object. + """ def __init__(self, func: Optional[Callable] = None, name: Optional[str] = None, description: Optional[str] = None, comps_dict: dict = None) -> None: - """Initiates a KFP pipeline object created out of a function holding - all necessary code. + """Initiates a KFP pipeline object created out of a function holding all necessary code. Args: - func: The python function to create a pipeline from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). - name: The name of the pipeline. - description: Short description of what the pipeline does. - comps_list: Dictionary of potential components for pipeline to utilize imported - as the global held in AutoMLOps.py. + func (Optional[Callable]): The python function to create a pipeline from. The functio + should have type annotations for all its arguments, indicating how it is intended + to be used (e.g. as an input/output Artifact object, a plain parameter, or a path + to a file). Defaults to None. + name (Optional[str]): The name of the pipeline. Defaults to None. + description (Optional[str]): Short description of what the pipeline does. Defaults to None. + comps_list (dict): Dictionary of potential components for pipeline to utilize imported + as the global held in AutoMLOps.py. Defaults to None. """ super().__init__( func=func, @@ -270,12 +273,8 @@ def __init__(self, + self._get_compile_step()) def build(self, - base_image, - custom_training_job_specs, - pipeline_params, - pubsub_topic_name, - use_ci, - setup_model_monitoring): + pipeline_params: dict, + custom_training_job_specs: Optional[List] = None): """Constructs files for running and managing Kubeflow pipelines. Files created under AutoMLOps/: @@ -295,19 +294,25 @@ def build(self, pipeline_runner.py requirements.txt runtime_parameters/pipeline_parameter_values.json + + Args: + custom_training_job_specs (dict): Specifies the specs to run the training job with. + pipeline_params (Optional[List]): Dictionary containing runtime pipeline parameters. Defaults + to None. + """ # Save parameters as attributes - self.base_image = base_image self.custom_training_job_specs = custom_training_job_specs self.pipeline_params = pipeline_params - self.pubsub_topic_name = pubsub_topic_name - self.use_ci = use_ci - self.setup_model_monitoring = setup_model_monitoring # Extract additional attributes from defaults file defaults = read_yaml_file(GENERATED_DEFAULTS_FILE) self.project_id = defaults['gcp']['project_id'] self.gs_pipeline_job_spec_path = defaults['pipelines']['gs_pipeline_job_spec_path'] + self.base_image = defaults['gcp']['base_image'] + self.pubsub_topic_name = defaults['gcp']['pubsub_topic_name'] + self.use_ci = defaults['tooling']['use_ci'] + self.setup_model_monitoring = defaults['gcp']['setup_model_monitoring'] # Build necessary folders make_dirs([ @@ -428,14 +433,10 @@ def build(self, write_file(BASE_DIR + GENERATED_PARAMETER_VALUES_PATH, serialized_params, 'w') def _get_pipeline_decorator(self): - """Creates the kfp pipeline decorator. - - Args: - name: The name of the pipeline. - description: Short description of what the pipeline does. + """Constructs the kfp pipeline decorator. Returns: - str: Python compile function call. + str: KFP pipeline decorator. """ name_str = f'''(\n name='{self.name}',\n''' desc_str = f''' description='{self.description}',\n''' if self.description else '' @@ -443,13 +444,10 @@ def _get_pipeline_decorator(self): return '@dsl.pipeline' + name_str + desc_str + ending_str def _get_compile_step(self): - """Creates the compile function call. - - Args: - func_name: The name of the pipeline function. + """Constructs the compile function call. Returns: - str: Python compile function call. + str: Compile function call. """ return ( f'\n' @@ -459,12 +457,14 @@ def _get_compile_step(self): f'\n' ) - def _create_component_base_requirements(self): - """Writes a requirements.txt to the component_base directory. - Infers pip requirements from the python srcfiles using - pipreqs. Takes user-inputted requirements, and addes some - default gcp packages as well as packages that are often missing - in setup.py files (e.g db_types, pyarrow, gcsfs, fsspec). + def _create_component_base_requirements(self) -> str: + """Writes a requirements.txt to the component_base directory. Infers pip requirements from + the python srcfiles using pipreqs. Takes user-inputted requirements, and addes some default + gcp packages as well as packages that are often missing in setup.py files (e.g db_types, + pyarrow, gcsfs, fsspec). TODO: update this as it returns a string, doesn't write a file. + + Returns: + str: TODO """ reqs_filename = f'{GENERATED_COMPONENT_BASE}/requirements.txt' default_gcp_reqs = [ @@ -539,28 +539,16 @@ class KFPServices(BaseServices): """Creates a KFP specific Services object for #TODO: add more Args: - Services (object): Generic Services object. + BaseServices (object): Generic Services object. """ - - def __init__(self) -> None: - """Initializes KFPServices Object. - """ - super().__init__() - - def build(self, - pipeline_storage_path, - pipeline_job_runner_service_account, - pipeline_job_submission_service_type, - project_id, - setup_model_monitoring): - super().build( - pipeline_storage_path, - pipeline_job_runner_service_account, - pipeline_job_submission_service_type, - project_id, - setup_model_monitoring) - def _build_monitoring(self): + """Writes files necessary for implementing model monitoring. Files created are: + scripts/ + create_model_monitoring_job.sh + model_monitoring/ + monitor.py + requirements.txt + """ # Writes script create_model_monitoring_job.sh which creates a Vertex AI model monitoring job write_and_chmod( filepath=GENERATED_MODEL_MONITORING_SH_FILE, @@ -586,10 +574,12 @@ def _build_monitoring(self): mode='w') def _build_submission_services(self): - """Writes the #TODO add more - services/submission_service/requirements.txt - services/submission_service/main.py - services/submission_service/Dockerfile + """Writes the files necessary for utilizing submission services. Files written are: + services/ + submission_service/ + Dockerfile + main.py + requirements.txt """ write_file( f'{self.submission_service_base_dir}/requirements.txt', diff --git a/google_cloud_automlops/utils/utils.py b/google_cloud_automlops/utils/utils.py index a85e46d..0f5688b 100644 --- a/google_cloud_automlops/utils/utils.py +++ b/google_cloud_automlops/utils/utils.py @@ -64,7 +64,7 @@ def make_dirs(directories: list): """Makes directories with the specified names. Args: - directories: Path of the directories to make. + directories (list): Path of the directories to make. """ for d in directories: try: @@ -74,13 +74,14 @@ def make_dirs(directories: list): def read_yaml_file(filepath: str) -> dict: - """Reads a yaml and returns file contents as a dict. - Defaults to utf-8 encoding. + """Reads a yaml and returns file contents as a dict. Defaults to utf-8 encoding. Args: - filepath: Path to the yaml. + filepath (str): Path to the yaml. + Returns: dict: Contents of the yaml. + Raises: Exception: If an error is encountered reading the file. """ @@ -97,11 +98,12 @@ def write_yaml_file(filepath: str, contents: dict, mode: str): """Writes a dictionary to yaml. Defaults to utf-8 encoding. Args: - filepath: Path to the file. - contents: Dictionary to be written to yaml. - mode: Read/write mode to be used. + filepath (str): Path to the file. + contents (dict): Dictionary to be written to yaml. + mode (str): Read/write mode to be used. + Raises: - Exception: If an error is encountered writing the file. + Exception: An error is encountered while writing the file. """ try: with open(filepath, mode, encoding='utf-8') as file: @@ -112,15 +114,16 @@ def write_yaml_file(filepath: str, contents: dict, mode: str): def read_file(filepath: str) -> str: - """Reads a file and returns contents as a string. - Defaults to utf-8 encoding. + """Reads a file and returns contents as a string. Defaults to utf-8 encoding. Args: - filepath: Path to the file. + filepath (str): Path to the file. + Returns: str: Contents of the file. + Raises: - Exception: If an error is encountered reading the file. + Exception: An error is encountered while reading the file. """ try: with open(filepath, 'r', encoding='utf-8') as file: @@ -135,11 +138,12 @@ def write_file(filepath: str, text: str, mode: str): """Writes a file at the specified path. Defaults to utf-8 encoding. Args: - filepath: Path to the file. - text: Text to be written to file. - mode: Read/write mode to be used. + filepath (str): Path to the file. + text (str): Text to be written to file. + mode (str): Read/write mode to be used. + Raises: - Exception: If an error is encountered writing the file. + Exception: An error is encountered writing the file. """ try: with open(filepath, mode, encoding='utf-8') as file: @@ -150,14 +154,14 @@ def write_file(filepath: str, text: str, mode: str): def write_and_chmod(filepath: str, text: str): - """Writes a file at the specified path and chmods the file - to allow for execution. + """Writes a file at the specified path and chmods the file to allow for execution. Args: - filepath: Path to the file. - text: Text to be written to file. + filepath (str): Path to the file. + text (str): Text to be written to file. + Raises: - Exception: If an error is encountered chmod-ing the file. + Exception: An error is encountered while chmod-ing the file. """ write_file(filepath, text, 'w') try: @@ -168,11 +172,10 @@ def write_and_chmod(filepath: str, text: str): def delete_file(filepath: str): - """Deletes a file at the specified path. - If it does not exist, pass. + """Deletes a file at the specified path. If it does not exist, pass. Args: - filepath: Path to the file. + filepath (str): Path to the file. """ try: os.remove(filepath) @@ -181,11 +184,12 @@ def delete_file(filepath: str): def get_components_list(full_path: bool = True) -> list: - """Reads yamls in the cache directory, verifies they are component - yamls, and returns the name of the files. + """Reads yamls in the cache directory, verifies they are component yamls, and returns the name + of the files. Args: - full_path: Boolean; if false, stores only the filename w/o extension. + full_path (bool): If false, stores only the filename w/o extension. + Returns: list: Contains the names or paths of all component yamls in the dir. """ @@ -205,7 +209,8 @@ def is_component_config(filepath: str) -> bool: """Checks to see if the given file is a component yaml. Args: - filepath: Path to a yaml file. + filepath (str): Path to a yaml file. + Returns: bool: Whether the given file is a component yaml. """ @@ -218,10 +223,11 @@ def execute_process(command: str, to_null: bool): """Executes an external shell process. Args: - command: The string of the command to execute. - to_null: Determines where to send output. + command (str): Command to execute. + to_null (bool): Determines where to send output. + Raises: - Exception: If an error occurs in executing the script. + Exception: An error occured while executing the script. """ stdout = subprocess.DEVNULL if to_null else None try: @@ -236,16 +242,18 @@ def execute_process(command: str, to_null: bool): def validate_use_ci(setup_model_monitoring: bool, schedule_pattern: str, use_ci: str): """Validates that the inputted schedule parameter and model_monitoring parameter align with the - use_ci configuration. + use_ci configuration. + Note: this function does not validate that schedule_pattern is a properly formatted cron value. Cron format validation is done in the backend by GCP. Args: - setup_model_monitoring: Boolean parameter which specifies whether to set up a Vertex AI Model Monitoring Job. - schedule_pattern: Cron formatted value used to create a Scheduled retrain job. - use_ci: Flag that determines whether to use Cloud CI/CD. + setup_model_monitoring (bool): Specifies whether to set up a Vertex AI Model Monitoring Job. + schedule_pattern (str): Cron formatted value used to create a Scheduled retrain job. + use_ci (bool): Specifies whether to use Cloud CI/CD. + Raises: - Exception: If use_ci validation fails. + Exception: use_ci validation failed. """ if setup_model_monitoring and not use_ci: raise ValueError('use_ci must be set to True to use Model Monitoring.') @@ -253,51 +261,19 @@ def validate_use_ci(setup_model_monitoring: bool, schedule_pattern: str, use_ci: raise ValueError('use_ci must be set to True to use Cloud Scheduler.') -def update_params(params: list) -> list: - """Converts the parameter types from Python types - to Kubeflow types. Currently only supports - Python primitive types. - - Args: - params: Pipeline parameters. A list of dictionaries, - each param is a dict containing keys: - 'name': required, str param name. - 'type': required, python primitive type. - 'description': optional, str param desc. - Returns: - list: Params list with converted types. - Raises: - Exception: If an inputted type is not a primitive. - """ - python_kfp_types_mapper = { - int: 'Integer', - str: 'String', - float: 'Float', - bool: 'Boolean', - list: 'JsonArray', - dict: 'JsonObject' - } - for param in params: - try: - param['type'] = python_kfp_types_mapper[param['type']] - except KeyError as err: - raise ValueError(f'Unsupported python type - we only support ' - f'primitive types at this time. {err}') from err - return params - - def get_function_source_definition(func: Callable) -> str: """Returns a formatted string of the source code. Args: - func: The python function to create a component from. The function - should have type annotations for all its arguments, indicating how - it is intended to be used (e.g. as an input/output Artifact object, - a plain parameter, or a path to a file). + func (Callable): The python function to create a component from. The function should have + type annotations for all its arguments, indicating how it is intended to be used (e.g. + as an input/output Artifact object, a plain parameter, or a path to a file). + Returns: str: The source code from the inputted function. + Raises: - Exception: If the preprocess operates failed. + Exception: The preprocess operations failed. """ source_code = inspect.getsource(func) source_code = textwrap.dedent(source_code) @@ -316,7 +292,7 @@ def stringify_job_spec_list(job_spec_list: list) -> list: """Takes in a list of job spec dictionaries and turns them into strings. Args: - job_spec: Dictionary with job spec info. e.g. + job_spec (list): Dictionary with job spec info. e.g. custom_training_job_specs = [{ 'component_spec': 'train_model', 'display_name': 'train-model-accelerated', @@ -347,10 +323,10 @@ def is_using_kfp_spec(image: str) -> bool: """Takes in an image string from a component yaml and determines if it came from kfp or not. Args: - image: image string. + image (str): Image string. #TODO: make this more informative Returns: - bool: is the component using kfp spec. + bool: Whether the component using kfp spec. """ return image != PLACEHOLDER_IMAGE @@ -382,41 +358,43 @@ def create_default_config(artifact_repo_location: str, storage_bucket_name: str, use_ci: bool, vpc_connector: str) -> dict: - """Creates defaults.yaml file contents as a dict. This defaults - file is used by subsequent functions and by the pipeline - files themselves. + """Creates defaults.yaml file contents as a dict. This defaults file is used by subsequent + functions and by the pipeline files themselves. Args: - artifact_repo_location: Region of the artifact repo (default use with Artifact Registry). - artifact_repo_name: Artifact repo name where components are stored (default use with Artifact Registry). - artifact_repo_type: The type of artifact repository to use (e.g. Artifact Registry, JFrog, etc.) - base_image: The image to use in the component base dockerfile. - build_trigger_location: The location of the build trigger (for cloud build). - build_trigger_name: The name of the build trigger (for cloud build). - deployment_framework: The CI tool to use (e.g. cloud build, github actions, etc.) - naming_prefix: Unique value used to differentiate pipelines and services across AutoMLOps runs. - orchestration_framework: The orchestration framework to use (e.g. kfp, tfx, etc.) - pipeline_job_runner_service_account: Service Account to run PipelineJobs. - pipeline_job_submission_service_location: The location of the cloud submission service. - pipeline_job_submission_service_name: The name of the cloud submission service. - pipeline_job_submission_service_type: The tool to host for the cloud submission service (e.g. cloud run, cloud functions). - project_id: The project ID. - provisioning_framework: The IaC tool to use (e.g. Terraform, Pulumi, etc.) - pubsub_topic_name: The name of the pubsub topic to publish to. - schedule_location: The location of the scheduler resource. - schedule_name: The name of the scheduler resource. - schedule_pattern: Cron formatted value used to create a Scheduled retrain job. - setup_model_monitoring: Boolean parameter which specifies whether to set up a Vertex AI Model Monitoring Job. - source_repo_branch: The branch to use in the source repository. - source_repo_name: The name of the source repository to use. - source_repo_type: The type of source repository to use (e.g. gitlab, github, etc.) - storage_bucket_location: Region of the GS bucket. - storage_bucket_name: GS bucket name where pipeline run metadata is stored. - use_ci: Flag that determines whether to use Cloud CI/CD. - vpc_connector: The name of the vpc connector to use. + artifact_repo_location (str): Region of the artifact repo (default use with Artifact Registry). + artifact_repo_name (str): Artifact repo name where components are stored (default use with + Artifact Registry). + artifact_repo_type (str): Type of artifact repository to use (e.g. Artifact Registry, JFrog, etc.) + base_image (str): Image to use in the component base dockerfile. + build_trigger_location (str): Location of the build trigger (for cloud build). + build_trigger_name (str): Name of the build trigger (for cloud build). + deployment_framework (str): Name of CI tool to use (e.g. cloud build, github actions, etc.) + naming_prefix (str): Unique value used to differentiate pipelines and services across + AutoMLOps runs. + orchestration_framework (str): Orchestration framework to use (e.g. kfp, tfx, etc.) + pipeline_job_runner_service_account (str): Service Account to run PipelineJobs. + pipeline_job_submission_service_location (str): Location of the cloud submission service. + pipeline_job_submission_service_name (str): Name of the cloud submission service. + pipeline_job_submission_service_type (str): Tool to host for the cloud submission service + (e.g. cloud run, cloud functions). + project_id (str): The project ID. + provisioning_framework (str): IaC tool to use (e.g. Terraform, Pulumi, etc.) + pubsub_topic_name (str): Name of the pubsub topic to publish to. + schedule_location (str): Location of the scheduler resource. + schedule_name (str): Name of the scheduler resource. + schedule_pattern (str): Cron formatted value used to create a Scheduled retrain job. + setup_model_monitoring (bool): Specifies whether to set up a Vertex AI Model Monitoring Job. + source_repo_branch (str): Branch to use in the source repository. + source_repo_name (str): Name of the source repository to use. + source_repo_type (str): Type of source repository to use (e.g. gitlab, github, etc.) + storage_bucket_location (str): Region of the GS bucket. + storage_bucket_name (str): GS bucket name where pipeline run metadata is stored. + use_ci (bool): Specifies whether to use Cloud CI/CD. + vpc_connector (str): Name of the vpc connector to use. Returns: - dict: Defaults yaml file content + dict: Defaults yaml file content. """ defaults = {} defaults['gcp'] = {} @@ -463,7 +441,7 @@ def create_default_config(artifact_repo_location: str, defaults['tooling']['use_ci'] = use_ci if setup_model_monitoring: - # These fields to be set when AutoMLOps.monitor() is called + # These fields will be set up if and when AutoMLOps.monitor() is called defaults['monitoring'] = {} defaults['monitoring']['target_field'] = None defaults['monitoring']['model_endpoint'] = None @@ -483,14 +461,14 @@ def create_default_config(artifact_repo_location: str, def get_required_apis(defaults: dict) -> list: - """Returns the list of required APIs based on the user tooling selection - determined during the generate() step. + """Returns the list of required APIs based on the user tooling selection determined during + the generate() step. Args: - defaults: Dictionary contents of the Defaults yaml file (config/defaults.yaml) + defaults (dict): Contents of the Defaults yaml file (config/defaults.yaml). Returns: - list: The list of required APIs + list: Required APIs. """ required_apis = [ 'cloudbuild.googleapis.com', @@ -521,15 +499,14 @@ def get_required_apis(defaults: dict) -> list: def get_provision_min_permissions(defaults: dict) -> list: - """Returns the list of minimum required permissions to run - the provision() step based on the user tooling selection - determined during the generate() step. + """Returns the list of minimum required permissions to run the provision() step based on the + user tooling selection determined during the generate() step. Args: - defaults: Dictionary contents of the Defaults yaml file (config/defaults.yaml) + defaults (dict): Contents of the Defaults yaml file (config/defaults.yaml). Returns: - list: The list of required permissions + list: Required permissions. """ required_permissions = [ 'serviceusage.services.enable', @@ -559,16 +536,15 @@ def get_provision_min_permissions(defaults: dict) -> list: def get_provision_recommended_roles(defaults: dict) -> list: - """Returns the list of recommended roles to run - the provision() step based on the user tooling selection - determined during the generate() step. These roles have - the minimum permissions required for provision. + """Creates the list of recommended roles to run the provision() step based on the user tooling + selection determined during the generate() step. These roles have the minimum permissions + required for provision. Args: - defaults: Dictionary contents of the Defaults yaml file (config/defaults.yaml) + defaults (dict): Contents of the Defaults yaml file (config/defaults.yaml). Returns: - list: The list of recommended roles + list: Recommended provision roles. """ recommended_roles = [ 'roles/serviceusage.serviceUsageAdmin', @@ -594,17 +570,16 @@ def get_provision_recommended_roles(defaults: dict) -> list: def get_deploy_with_precheck_min_permissions(defaults: dict) -> list: - """Returns the list of minimum required permissions to run - the deploy() step based on the user tooling selection - determined during the generate() step. This function is called - when precheck=True, which makes several API calls to determine if the infra - exists to run deploy() and increases the required list of permissions. + """Creates the list of minimum required permissions to run the deploy() step based on the user + tooling selection, determined during the generate() step. This function is called when + precheck=True, which makes several API calls to determine if the infra exists to run deploy() + and increases the required list of permissions. Args: - defaults: Dictionary contents of the Defaults yaml file (config/defaults.yaml) + defaults (dict): Contents of the Defaults yaml file (config/defaults.yaml). Returns: - list: The list of minimum permissions to deploy with precheck=True + list: Minimum permissions to deploy with precheck=True. """ recommended_permissions = [ 'serviceusage.services.get', @@ -629,17 +604,16 @@ def get_deploy_with_precheck_min_permissions(defaults: dict) -> list: def get_deploy_with_precheck_recommended_roles(defaults: dict) -> list: - """Returns the list of recommended roles to run - the deploy() step based on the user tooling selection - determined during the generate() step. This function is called - when precheck=True, which makes several API calls to determine if the infra - exists to run deploy() and increases the required list of permissions. + """Returns the list of recommended roles to run the deploy() step based on the user tooling + selection determined during the generate() step. This function is called when precheck=True, + which makes several API calls to determine if the infra exists to run deploy() and increases the + required list of permissions. Args: - defaults: Dictionary contents of the Defaults yaml file (config/defaults.yaml) + defaults (dict): Contents of the Defaults yaml file (config/defaults.yaml). Returns: - list: The list of recommended roles to deploy with precheck=True + list: Recommended roles to deploy with precheck=True. """ recommended_roles = [ 'roles/serviceusage.serviceUsageViewer', @@ -664,16 +638,15 @@ def get_deploy_with_precheck_recommended_roles(defaults: dict) -> list: def get_deploy_without_precheck_min_permissions(defaults: dict) -> list: - """Returns the list of minimum required permissions to run - the deploy() step based on the user tooling selection - determined during the generate() step. This function is called - when precheck=False, which decreases the required list of permissions. + """Creates the list of minimum required permissions to run the deploy() step based on the user + tooling selection determined during the generate() step. This function is called when + precheck=False, which decreases the required list of permissions. Args: - defaults: Dictionary contents of the Defaults yaml file (config/defaults.yaml) + defaults (dict): Contents of the Defaults yaml file (config/defaults.yaml). Returns: - list: The list of minimum permissions to deploy with precheck=False + list: Minimum permissions to deploy with precheck=False. """ recommended_permissions = [] if defaults['tooling']['use_ci']: @@ -685,16 +658,15 @@ def get_deploy_without_precheck_min_permissions(defaults: dict) -> list: def get_deploy_without_precheck_recommended_roles(defaults: dict) -> list: - """Returns the list of recommended roles to run - the deploy() step based on the user tooling selection - determined during the generate() step. This function is called - when precheck=False, which decreases the required list of permissions. + """Creates the list of recommended roles to run the deploy() step based on the user tooling + selection determined during the generate() step. This function is called when precheck=False, + which decreases the required list of permissions. Args: - defaults: Dictionary contents of the Defaults yaml file (config/defaults.yaml) + defaults (dict): Contents of the Defaults yaml file (config/defaults.yaml). Returns: - list: The list of recommended roles to deploy with precheck=False + list: Recommended roles to deploy with precheck=False. """ recommended_roles = [] if defaults['tooling']['use_ci']: @@ -706,15 +678,14 @@ def get_deploy_without_precheck_recommended_roles(defaults: dict) -> list: def get_model_monitoring_min_permissions(defaults: dict) -> list: - """Returns the list of minimum required permissions to run - the monitor() step based on the user tooling selection - determined during the generate() step. + """Creates the list of minimum required permissions to run the monitor() step based on the user + tooling selection determined during the generate() step. Args: - defaults: Dictionary contents of the Defaults yaml file (config/defaults.yaml) + defaults (dict): Contents of the Defaults yaml file (config/defaults.yaml). Returns: - list: The list of minimum permissions to create a monitoring job. + list: Minimum permissions to create a monitoring job. """ recommended_permissions = [ 'aiplatform.endpoints.list', @@ -728,15 +699,14 @@ def get_model_monitoring_min_permissions(defaults: dict) -> list: def get_model_monitoring_recommended_roles(defaults: dict) -> list: - """Returns the list of recommended roles to run - the monitor() step based on the user tooling selection - determined during the generate() step. + """Creates the list of recommended roles to run the monitor() step based on the user tooling + selection determined during the generate() step. Args: - defaults: Dictionary contents of the Defaults yaml file (config/defaults.yaml) + defaults (dict): Contents of the Defaults yaml file (config/defaults.yaml). Returns: - list: The list of recommended roles to create a monitoring job. + list: Recommended roles to create a monitoring job. """ recommended_roles = ['roles/aiplatform.user'] if defaults['monitoring']['auto_retraining_params']: @@ -748,8 +718,9 @@ def account_permissions_warning(operation: str, defaults: dict): """Logs the current gcloud account and generates warnings based on the operation being performed. Args: - operation: Specifies which operation is being performed. Available options {provision, deploy_with_precheck, deploy_without_precheck, model_monitoring} - defaults: Dictionary contents of the Defaults yaml file (config/defaults.yaml) + operation (str): Specifies which operation is being performed. Available options {provision, + deploy_with_precheck, deploy_without_precheck, model_monitoring}. + defaults (dict): Contents of the Defaults yaml file (config/defaults.yaml). """ bullet_nl = '\n-' gcp_account = subprocess.check_output( @@ -777,10 +748,10 @@ def account_permissions_warning(operation: str, defaults: dict): def check_installation_versions(provisioning_framework: str): """Checks the version of the provisioning tool (e.g. terraform, gcloud) and generates warning if - either the tool is not installed, or if it below the recommended version. + either the tool is not installed, or if it below the recommended version. Args: - provisioning_framework: The IaC tool to use (e.g. Terraform, Pulumi, etc.) + provisioning_framework (str): The IaC tool to use (e.g. Terraform, Pulumi, etc.). """ if provisioning_framework == Provisioner.GCLOUD.value: try: @@ -816,12 +787,11 @@ def check_installation_versions(provisioning_framework: str): def precheck_deployment_requirements(defaults: dict): - """Checks to see if the necessary MLOps infra exists to run - the deploy() step based on the user tooling selection - determined during the generate() step. + """Checks to see if the necessary MLOps infra exists to run the deploy() step based on the user + tooling selection determined during the generate() step. Args: - defaults: Dictionary contents of the Defaults yaml file (config/defaults.yaml) + defaults: Contents of the Defaults yaml file (config/defaults.yaml). """ use_ci = defaults['tooling']['use_ci'] artifact_repo_location = defaults['gcp']['artifact_repo_location'] @@ -974,7 +944,7 @@ def resources_generation_manifest(defaults: dict): """Logs urls of generated resources. Args: - defaults: Dictionary contents of the Defaults yaml file (config/defaults.yaml) + defaults (dict): Contents of the Defaults yaml file (config/defaults.yaml). """ logging.info('Please wait for this build job to complete.') logging.info('\n' @@ -1025,8 +995,7 @@ def render_jinja(template_path, **template_vars): Args: template_path (str): The path to the Jinja2 template file. - **template_vars: Keyword arguments representing variables to substitute - in the template. + **template_vars: Keyword arguments representing variables to substitute in the template. Returns: str: The rendered template as a string. @@ -1036,7 +1005,7 @@ def render_jinja(template_path, **template_vars): return template.render(**template_vars) def coalesce(*arg): - """Returns the first non-None value from a sequence of arguments. + """Creates the first non-None value from a sequence of arguments. Returns: The first non-None argument, or None if all arguments are None.