diff --git a/.github/workflows/deploy_docs_to_gh_pages.yaml b/.github/workflows/deploy_docs_to_gh_pages.yaml index 7bf5a377..a1dd2cf1 100644 --- a/.github/workflows/deploy_docs_to_gh_pages.yaml +++ b/.github/workflows/deploy_docs_to_gh_pages.yaml @@ -28,7 +28,7 @@ jobs: - name: Setup Python Environment uses: actions/setup-python@v3 with: - python-version: '3.8' + python-version: '3.10' - name: Install Python Dependencies run: | diff --git a/docs/cmf_client/Getting Started with cmf.md b/docs/cmf_client/Getting Started with cmf.md index 6dcafe9a..a4ef46de 100644 --- a/docs/cmf_client/Getting Started with cmf.md +++ b/docs/cmf_client/Getting Started with cmf.md @@ -13,7 +13,7 @@ Follow the below-mentioned steps for the end-to-end setup of cmf-client:- **Pre-Requisites** -- Python 3.8+ +- Python 3.9+ - Git latest version **Install cmf library i.e. cmflib** diff --git a/docs/examples/getting_started.md b/docs/examples/getting_started.md index cc466f6e..cb9a1cb0 100644 --- a/docs/examples/getting_started.md +++ b/docs/examples/getting_started.md @@ -4,7 +4,7 @@ > [anaconda](https://docs.anaconda.com/anaconda/install/linux/) to manage python virtual environments. > This example was tested in the following environments: > -> - `Ubuntu-22.04 with python-3.8.15` +> - `Ubuntu-22.04 with python-3.10` This example demonstrates how CMF tracks a metadata associated with executions of various machine learning (ML) pipelines. ML pipelines differ from other pipelines (e.g., data Extract-Transform-Load pipelines) by the presence of @@ -43,7 +43,7 @@ mkdir cmf_getting_started_example cd cmf_getting_started_example # Create and activate Python virtual environment (the Python version may need to be adjusted depending on your system) -conda create -n cmf_getting_started_example python=3.8 +conda create -n cmf_getting_started_example python=3.10 conda activate cmf_getting_started_example # Clone the CMF project from GitHub and install CMF diff --git a/docs/index.md b/docs/index.md index 4531f482..e7e8aaee 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,20 +7,20 @@ models and performance metrics) recorded by the framework are versioned and iden ## Installation #### 1. Pre-Requisites: -* 3.8>= Python <=3.9 +* 3.9>= Python <=3.11 * Git latest version #### 2. Set up Python Virtual Environment: === "Using Conda" ```shell - conda create -n cmf python=3.8 + conda create -n cmf python=3.10 conda activate cmf ``` === "Using VirtualEnv" ```shell - virtualenv --python=3.8 .cmf + virtualenv --python=3.10 .cmf source .cmf/bin/activate ``` @@ -40,8 +40,6 @@ models and performance metrics) recorded by the framework are versioned and iden After installing CMF, proceed to configure CMF server and client. For detailed configuration instructions, refer to the [Quick start with cmf-client](./cmf_client/step-by-step.md) page. -### [Jupyter Lab docker container with CMF pre-installed](#docker-section) - ## Introduction Complex ML projects rely on `ML pipelines` to train and test ML models. An ML pipeline is a sequence of stages where each stage performs a particular task, such as data loading, pre-processing, ML model training and testing stages. @@ -247,6 +245,7 @@ cmf = cmf.Cmf( ) ``` +### [Jupyter Lab docker container with CMF pre-installed](#docker-section) ## Use a Jupyterlab Docker environment with CMF pre-installed CMF has a docker-compose file which creates two docker containers, - JupyterLab Notebook Environment with CMF pre installed. diff --git a/examples/example-get-started/src/featurize.py b/examples/example-get-started/src/featurize.py index 37699b34..3a9e594c 100644 --- a/examples/example-get-started/src/featurize.py +++ b/examples/example-get-started/src/featurize.py @@ -74,7 +74,7 @@ def featurize(input_dir: str, output_dir: str) -> None: output_ds = Dataset(train=os.path.join(output_dir, "train.pkl"), test=os.path.join(output_dir, "test.pkl")) graph_env = os.getenv("NEO4J", "False") graph = True if graph_env == "True" or graph_env == "TRUE" else False - metawriter = cmf.Cmf(filename="mlmd", pipeline_name="Test-env", graph=graph) + metawriter = cmf.Cmf(filepath="mlmd", pipeline_name="Test-env", graph=graph) _ = metawriter.create_context(pipeline_stage="Featurize") _ = metawriter.create_execution(execution_type="Featurize-execution", custom_properties=params) diff --git a/examples/example-get-started/src/parse.py b/examples/example-get-started/src/parse.py index 8f18b920..87992ce4 100644 --- a/examples/example-get-started/src/parse.py +++ b/examples/example-get-started/src/parse.py @@ -61,7 +61,7 @@ def parse(input_file: str, output_dir: str) -> None: random.seed(params["seed"]) graph_env = os.getenv("NEO4J", "False") graph = True if graph_env == "True" or graph_env == "TRUE" else False - metawriter = cmf.Cmf(filename="mlmd", pipeline_name="Test-env", graph=graph) + metawriter = cmf.Cmf(filepath="mlmd", pipeline_name="Test-env", graph=graph) _ = metawriter.create_context(pipeline_stage="Prepare", custom_properties={"user-metadata1": "metadata_value"}) _ = metawriter.create_execution(execution_type="Prepare", custom_properties=params) _ = metawriter.log_dataset(input_file, "input", custom_properties={"user-metadata1": "metadata_value"}) diff --git a/examples/example-get-started/src/test.py b/examples/example-get-started/src/test.py index 2eb542c9..ae55e032 100644 --- a/examples/example-get-started/src/test.py +++ b/examples/example-get-started/src/test.py @@ -48,7 +48,7 @@ def test(model_dir: str, dataset_dir: str, output_dir: str) -> None: ) graph_env = os.getenv("NEO4J", "False") graph = True if graph_env == "True" or graph_env == "TRUE" else False - metawriter = cmf.Cmf(filename="mlmd", pipeline_name="Test-env", graph=graph) + metawriter = cmf.Cmf(filepath="mlmd", pipeline_name="Test-env", graph=graph) _ = metawriter.create_context(pipeline_stage="Evaluate") _ = metawriter.create_execution(execution_type="Evaluate-execution") diff --git a/examples/example-get-started/src/train.py b/examples/example-get-started/src/train.py index 4d38be98..1f75cf65 100644 --- a/examples/example-get-started/src/train.py +++ b/examples/example-get-started/src/train.py @@ -39,7 +39,7 @@ def train(input_dir: str, output_dir: str) -> None: params = yaml.safe_load(open("params.yaml"))["train"] graph_env = os.getenv("NEO4J", "False") graph = True if graph_env == "True" or graph_env == "TRUE" else False - metawriter = cmf.Cmf(filename="mlmd", pipeline_name="Test-env", graph=graph) + metawriter = cmf.Cmf(filepath="mlmd", pipeline_name="Test-env", graph=graph) _ = metawriter.create_context(pipeline_stage="Train") _ = metawriter.create_execution(execution_type="Train-execution", custom_properties=params) diff --git a/pyproject.toml b/pyproject.toml index 0244af35..90929820 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,9 @@ [project] name = "cmflib" -version = "0.0.8" +version = "0.0.9" dependencies = [ - "ml-metadata==1.11.0", - "dvc[ssh,s3]==2.27.0", + "ml-metadata==1.15.0", + "dvc[ssh,s3]==3.51.1", "pandas", "retrying", "pyarrow", @@ -19,7 +19,7 @@ authors = [ ] description = "Track metadata for AI pipeline" readme = "README.md" -requires-python = ">=3.8,<3.10" +requires-python = ">=3.9,<=3.11" classifiers = [ "Programming Language :: Python :: 3", "Operating System :: POSIX :: Linux", diff --git a/server/Dockerfile b/server/Dockerfile index 3e44ecb0..34d6717a 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -14,8 +14,8 @@ # limitations under the License. ### -# Use python 3.8-slim-buster as base image. -FROM python:3.8-slim-buster +# Use python 3.10-slim-buster as base image. +FROM python:3.10-slim-buster #Update the proxy if needed #ENV http_proxy http://web-proxy.corp.hpecorp.net:8080 diff --git a/setup.py b/setup.py index 209921d0..116f9a50 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup, find_packages -VERSION = '0.0.8' +VERSION = '0.0.9' DESCRIPTION = 'Metadata Python Package' LONG_DESCRIPTION = 'Metadata framework storing AI metadata into MLMD' @@ -13,8 +13,8 @@ description=DESCRIPTION, long_description=LONG_DESCRIPTION, packages=find_packages(), - install_requires=["ml-metadata==1.11.0", - "dvc[ssh,s3]==2.27.0", "pandas", "retrying", "pyarrow", "neo4j", \ + install_requires=["ml-metadata==1.15.0", + "dvc[ssh,s3]==3.51.1", "pandas", "retrying", "pyarrow", "neo4j", \ "scikit-learn", "tabulate", "click", "minio", "paramiko"], # add any additional packages that # needs to be installed along with your package. Eg: 'caer'