Upgraded the ml-metadata version and dvc version to the latest (#178)

* Upgraded the ml-metadata version and dvc version to the latest * Upgraded the ml-metadata version and dvc version to the latest * fixing merge issues * Doc updates
HewlettPackard · May 31, 2024 · b9c8538 · b9c8538
1 parent e852960
commit b9c8538
Show file tree

Hide file tree

Showing 11 changed files with 21 additions and 22 deletions.
diff --git a/.github/workflows/deploy_docs_to_gh_pages.yaml b/.github/workflows/deploy_docs_to_gh_pages.yaml
@@ -28,7 +28,7 @@ jobs:
  - name: Setup Python Environment
  uses: actions/setup-python@v3
  with:
- python-version: '3.8'
+ python-version: '3.10'
 
  - name: Install Python Dependencies
  run: |

diff --git a/docs/cmf_client/Getting Started with cmf.md b/docs/cmf_client/Getting Started with cmf.md
@@ -13,7 +13,7 @@ Follow the below-mentioned steps for the end-to-end setup of cmf-client:-
 
 **Pre-Requisites**
 
-- Python 3.8+
+- Python 3.9+
 - Git latest version
 
 **Install cmf library i.e. cmflib**

diff --git a/docs/examples/getting_started.md b/docs/examples/getting_started.md
@@ -4,7 +4,7 @@
 > [anaconda](https://docs.anaconda.com/anaconda/install/linux/) to manage python virtual environments.
 > This example was tested in the following environments: 
 > 
-> - `Ubuntu-22.04 with python-3.8.15`
+> - `Ubuntu-22.04 with python-3.10`
 
 This example demonstrates how CMF tracks a metadata associated with executions of various machine learning (ML) 
 pipelines. ML pipelines differ from other pipelines (e.g., data Extract-Transform-Load pipelines) by the presence of
@@ -43,7 +43,7 @@ mkdir cmf_getting_started_example
 cd cmf_getting_started_example
 
 # Create and activate Python virtual environment (the Python version may need to be adjusted depending on your system)
-conda create -n cmf_getting_started_example python=3.8 
+conda create -n cmf_getting_started_example python=3.10 
 conda activate cmf_getting_started_example
 
 # Clone the CMF project from GitHub and install CMF

diff --git a/docs/index.md b/docs/index.md
@@ -7,20 +7,20 @@ models and performance metrics) recorded by the framework are versioned and iden
 ## Installation
 
 #### 1. Pre-Requisites:
-* 3.8>= Python <=3.9
+* 3.9>= Python <=3.11
 * Git latest version
 
 #### 2. Set up Python Virtual Environment:
 
 === "Using Conda"
  ```shell
- conda create -n cmf python=3.8
+ conda create -n cmf python=3.10
  conda activate cmf
  ```
 
 === "Using VirtualEnv" 
  ```shell
- virtualenv --python=3.8 .cmf
+ virtualenv --python=3.10 .cmf
  source .cmf/bin/activate
  ```
 
@@ -40,8 +40,6 @@ models and performance metrics) recorded by the framework are versioned and iden
 After installing CMF, proceed to configure CMF server and client. For detailed configuration instructions, refer to the [Quick start with cmf-client](./cmf_client/step-by-step.md) page.
 
 
-### [Jupyter Lab docker container with CMF pre-installed](#docker-section)
-
 ## Introduction
 Complex ML projects rely on `ML pipelines` to train and test ML models. An ML pipeline is a sequence of stages where
 each stage performs a particular task, such as data loading, pre-processing, ML model training and testing stages.
@@ -247,6 +245,7 @@ cmf = cmf.Cmf(
 )
 ```
 
+### [Jupyter Lab docker container with CMF pre-installed](#docker-section)
 ## <a name="docker-section"></a> Use a Jupyterlab Docker environment with CMF pre-installed
 CMF has a docker-compose file which creates two docker containers,
 - JupyterLab Notebook Environment with CMF pre installed.

diff --git a/examples/example-get-started/src/featurize.py b/examples/example-get-started/src/featurize.py
@@ -74,7 +74,7 @@ def featurize(input_dir: str, output_dir: str) -> None:
  output_ds = Dataset(train=os.path.join(output_dir, "train.pkl"), test=os.path.join(output_dir, "test.pkl"))
  graph_env = os.getenv("NEO4J", "False")
  graph = True if graph_env == "True" or graph_env == "TRUE" else False
- metawriter = cmf.Cmf(filename="mlmd", pipeline_name="Test-env", graph=graph)
+ metawriter = cmf.Cmf(filepath="mlmd", pipeline_name="Test-env", graph=graph)
 
  _ = metawriter.create_context(pipeline_stage="Featurize")
  _ = metawriter.create_execution(execution_type="Featurize-execution", custom_properties=params)

diff --git a/examples/example-get-started/src/parse.py b/examples/example-get-started/src/parse.py
@@ -61,7 +61,7 @@ def parse(input_file: str, output_dir: str) -> None:
  random.seed(params["seed"])
  graph_env = os.getenv("NEO4J", "False")
  graph = True if graph_env == "True" or graph_env == "TRUE" else False
- metawriter = cmf.Cmf(filename="mlmd", pipeline_name="Test-env", graph=graph)
+ metawriter = cmf.Cmf(filepath="mlmd", pipeline_name="Test-env", graph=graph)
  _ = metawriter.create_context(pipeline_stage="Prepare", custom_properties={"user-metadata1": "metadata_value"})
  _ = metawriter.create_execution(execution_type="Prepare", custom_properties=params)
  _ = metawriter.log_dataset(input_file, "input", custom_properties={"user-metadata1": "metadata_value"})

diff --git a/examples/example-get-started/src/test.py b/examples/example-get-started/src/test.py
@@ -48,7 +48,7 @@ def test(model_dir: str, dataset_dir: str, output_dir: str) -> None:
  )
  graph_env = os.getenv("NEO4J", "False")
  graph = True if graph_env == "True" or graph_env == "TRUE" else False
- metawriter = cmf.Cmf(filename="mlmd", pipeline_name="Test-env", graph=graph)
+ metawriter = cmf.Cmf(filepath="mlmd", pipeline_name="Test-env", graph=graph)
  _ = metawriter.create_context(pipeline_stage="Evaluate")
  _ = metawriter.create_execution(execution_type="Evaluate-execution")
 

diff --git a/examples/example-get-started/src/train.py b/examples/example-get-started/src/train.py
@@ -39,7 +39,7 @@ def train(input_dir: str, output_dir: str) -> None:
  params = yaml.safe_load(open("params.yaml"))["train"]
  graph_env = os.getenv("NEO4J", "False")
  graph = True if graph_env == "True" or graph_env == "TRUE" else False
- metawriter = cmf.Cmf(filename="mlmd", pipeline_name="Test-env", graph=graph)
+ metawriter = cmf.Cmf(filepath="mlmd", pipeline_name="Test-env", graph=graph)
  _ = metawriter.create_context(pipeline_stage="Train")
  _ = metawriter.create_execution(execution_type="Train-execution", custom_properties=params)
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,9 +1,9 @@
 [project]
 name = "cmflib"
-version = "0.0.8"
+version = "0.0.9"
 dependencies = [
- "ml-metadata==1.11.0",
- "dvc[ssh,s3]==2.27.0",
+ "ml-metadata==1.15.0",
+ "dvc[ssh,s3]==3.51.1",
  "pandas",
  "retrying",
  "pyarrow",
@@ -19,7 +19,7 @@ authors = [
 ]
 description = "Track metadata for AI pipeline"
 readme = "README.md"
-requires-python = ">=3.8,<3.10"
+requires-python = ">=3.9,<=3.11"
 classifiers = [
  "Programming Language :: Python :: 3",
  "Operating System :: POSIX :: Linux",

diff --git a/server/Dockerfile b/server/Dockerfile
@@ -14,8 +14,8 @@
 # limitations under the License.
 ###
 
-# Use python 3.8-slim-buster as base image.
-FROM python:3.8-slim-buster
+# Use python 3.10-slim-buster as base image.
+FROM python:3.10-slim-buster
 
 #Update the proxy if needed
 #ENV http_proxy http://web-proxy.corp.hpecorp.net:8080

diff --git a/setup.py b/setup.py
@@ -1,6 +1,6 @@
 from setuptools import setup, find_packages
 
-VERSION = '0.0.8'
+VERSION = '0.0.9'
 DESCRIPTION = 'Metadata Python Package'
 LONG_DESCRIPTION = 'Metadata framework storing AI metadata into MLMD'
 
@@ -13,8 +13,8 @@
  description=DESCRIPTION,
  long_description=LONG_DESCRIPTION,
  packages=find_packages(),
- install_requires=["ml-metadata==1.11.0",
- "dvc[ssh,s3]==2.27.0", "pandas", "retrying", "pyarrow", "neo4j", \
+ install_requires=["ml-metadata==1.15.0",
+ "dvc[ssh,s3]==3.51.1", "pandas", "retrying", "pyarrow", "neo4j", \
  "scikit-learn", "tabulate", "click", "minio", "paramiko"], # add any additional packages that
  # needs to be installed along with your package. Eg: 'caer'