From 8aed798c1b1656b8fd3b1da7a61fa38462ff3e20 Mon Sep 17 00:00:00 2001 From: Matt Camp Date: Fri, 22 Dec 2023 12:01:00 -0600 Subject: [PATCH 01/11] add S3 support for artifacts --- src/MLFlowClient.jl | 3 +++ src/loggers.jl | 39 ++++++++++++++++++++++++++++++--------- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 0c24670..fb352fd 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -19,6 +19,9 @@ using URIs using JSON using ShowCases using FilePathsBase: AbstractPath +using AWSS3 +using AWS # For configuring the AWS credentials and region + include("types/core.jl") export diff --git a/src/loggers.jl b/src/loggers.jl index 183befd..ea297a3 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -86,16 +86,37 @@ path of the artifact that was created. function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::AbstractString, data) mlflowrun = getrun(mlf, run_id) artifact_uri = mlflowrun.info.artifact_uri - mkpath(artifact_uri) - filepath = joinpath(artifact_uri, basefilename) - try - f = open(filepath, "w") - write(f, data) - close(f) - catch e - error("Unable to create artifact $(filepath): $e") + + if !startswith(artifact_uri, "s3://") + mkpath(artifact_uri) + filepath = joinpath(artifact_uri, basefilename) + try + open(filepath, "w") do f + write(f, data) + end + catch e + error("Unable to create artifact $(filepath): $e") + end + else + # Configure for MinIO or AWS S3 based on environment variable + s3config = haskey(ENV, "MLFLOW_S3_ENDPOINT_URL") ? + AWSConfig(aws_access_key_id = ENV["AWS_ACCESS_KEY_ID"], + aws_secret_access_key = ENV["AWS_SECRET_ACCESS_KEY"], + aws_region = ENV["AWS_REGION"], # or any specific region + s3_endpoint = ENV["MLFLOW_S3_ENDPOINT_URL"]) : + global_aws_config() # default AWS configuration + + bucket, path = split_s3_uri(artifact_uri) + filepath = joinpath(path, basefilename) + + try + s3_put(s3config, bucket, filepath, data) + catch e + error("Unable to upload artifact to S3 $(filepath): $e") + end end - filepath + + return filepath end logartifact(mlf::MLFlow, run::MLFlowRun, basefilename::AbstractString, data) = logartifact(mlf, run.info, basefilename, data) From cc95b9028a0a0ffb59ba397f38414a25d1b9226a Mon Sep 17 00:00:00 2001 From: Matt Camp Date: Fri, 22 Dec 2023 12:16:57 -0600 Subject: [PATCH 02/11] update deps --- Project.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Project.toml b/Project.toml index d5c8d70..a6f14e8 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,8 @@ authors = ["@deyandyankov and contributors"] version = "0.4.4" [deps] +AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc" +AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" From 6df8c18b22a898adbbbec76cdb76d5ec3f982343 Mon Sep 17 00:00:00 2001 From: Matt Camp Date: Fri, 22 Dec 2023 15:30:04 -0600 Subject: [PATCH 03/11] wip --- src/MLFlowClient.jl | 3 ++- src/loggers.jl | 26 ++++++++++++++++---------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index fb352fd..026d4a4 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -20,7 +20,8 @@ using JSON using ShowCases using FilePathsBase: AbstractPath using AWSS3 -using AWS # For configuring the AWS credentials and region +using Minio +using AWS: AbstractAWSConfig, AWSConfig, AWSCredentials, global_aws_config, set_global_aws_config include("types/core.jl") diff --git a/src/loggers.jl b/src/loggers.jl index ea297a3..0a960ae 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -98,19 +98,25 @@ function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::Abstract error("Unable to create artifact $(filepath): $e") end else - # Configure for MinIO or AWS S3 based on environment variable - s3config = haskey(ENV, "MLFLOW_S3_ENDPOINT_URL") ? - AWSConfig(aws_access_key_id = ENV["AWS_ACCESS_KEY_ID"], - aws_secret_access_key = ENV["AWS_SECRET_ACCESS_KEY"], - aws_region = ENV["AWS_REGION"], # or any specific region - s3_endpoint = ENV["MLFLOW_S3_ENDPOINT_URL"]) : - global_aws_config() # default AWS configuration + region = get(ENV, "AWS_REGION", "") # Optional, defaults to empty if not set - bucket, path = split_s3_uri(artifact_uri) - filepath = joinpath(path, basefilename) + if haskey(ENV, "MLFLOW_S3_ENDPOINT_URL") + s3creds = AWSCredentials() + s3config = MinioConfig(ENV["MLFLOW_S3_ENDPOINT_URL"], s3creds; region=region) + else + s3config = global_aws_config() # default AWS configuration + end + + filepath = joinpath(artifact_uri, basefilename) try - s3_put(s3config, bucket, filepath, data) + open(joinpath("/tmp/",basefilename), "w") do f + write(f, data) + end + open(joinpath("/tmp/",basefilename), "r") do f + file_data = read(f) + s3_put(s3config, artifact_uri, filepath, file_data) + end catch e error("Unable to upload artifact to S3 $(filepath): $e") end From e37887122a8e66bdf6c38f9912c1345b707948f4 Mon Sep 17 00:00:00 2001 From: Matt Camp Date: Fri, 22 Dec 2023 15:52:38 -0600 Subject: [PATCH 04/11] should be good --- src/loggers.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/loggers.jl b/src/loggers.jl index 0a960ae..4efe61e 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -99,6 +99,10 @@ function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::Abstract end else region = get(ENV, "AWS_REGION", "") # Optional, defaults to empty if not set + + if region == "" + region = get(ENV, "AWS_DEFUALT_REGION", "") + end if haskey(ENV, "MLFLOW_S3_ENDPOINT_URL") s3creds = AWSCredentials() @@ -108,6 +112,7 @@ function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::Abstract end filepath = joinpath(artifact_uri, basefilename) + artifact_uri = artifact_uri[6:end] try open(joinpath("/tmp/",basefilename), "w") do f @@ -115,8 +120,9 @@ function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::Abstract end open(joinpath("/tmp/",basefilename), "r") do f file_data = read(f) - s3_put(s3config, artifact_uri, filepath, file_data) + s3_put(s3config, artifact_uri, basefilename, data) end + rm(joinpath("/tmp",basefilename)) catch e error("Unable to upload artifact to S3 $(filepath): $e") end From 3f989a5a32e221fb7a230d47c642ca17175798f0 Mon Sep 17 00:00:00 2001 From: Matt Camp Date: Tue, 26 Dec 2023 08:06:28 -0600 Subject: [PATCH 05/11] Add Minio --- Project.toml | 1 + src/MLFlowClient.jl | 2 +- src/loggers.jl | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index a6f14e8..b42b148 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +Minio = "4281f0d9-7ae0-406e-9172-b7277c1efa20" ShowCases = "605ecd9f-84a6-4c9e-81e2-4798472b76a3" URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 026d4a4..bb12936 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -21,7 +21,7 @@ using ShowCases using FilePathsBase: AbstractPath using AWSS3 using Minio -using AWS: AbstractAWSConfig, AWSConfig, AWSCredentials, global_aws_config, set_global_aws_config +using AWS: AbstractAWSConfig, AWSConfig, AWSCredentials, global_aws_config include("types/core.jl") diff --git a/src/loggers.jl b/src/loggers.jl index 4efe61e..c5a2b8e 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -112,9 +112,10 @@ function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::Abstract end filepath = joinpath(artifact_uri, basefilename) - artifact_uri = artifact_uri[6:end] + artifact_uri = artifact_uri[6:end] # get rid of s3:// so s3_put doesnt' complain try + #TODO: Figure out the correct IO stream way of doing this open(joinpath("/tmp/",basefilename), "w") do f write(f, data) end From 77e527bdf5f2035033c1a90fefbabb19d15247f6 Mon Sep 17 00:00:00 2001 From: Matt Camp Date: Tue, 26 Dec 2023 12:02:10 -0600 Subject: [PATCH 06/11] fix typop --- src/loggers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/loggers.jl b/src/loggers.jl index c5a2b8e..9c69178 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -101,7 +101,7 @@ function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::Abstract region = get(ENV, "AWS_REGION", "") # Optional, defaults to empty if not set if region == "" - region = get(ENV, "AWS_DEFUALT_REGION", "") + region = get(ENV, "AWS_DEFAULT_REGION", "") end if haskey(ENV, "MLFLOW_S3_ENDPOINT_URL") From c87ddd72885bb30b6e17563a60efd8714616775a Mon Sep 17 00:00:00 2001 From: Matt Camp Date: Tue, 26 Dec 2023 13:31:09 -0600 Subject: [PATCH 07/11] support subdirs --- src/loggers.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/loggers.jl b/src/loggers.jl index 9c69178..5919471 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -85,7 +85,8 @@ path of the artifact that was created. """ function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::AbstractString, data) mlflowrun = getrun(mlf, run_id) - artifact_uri = mlflowrun.info.artifact_uri + artifact_uri = joinpath(mlflowrun.info.artifact_uri,dirname(basefilename)) + basefilename = basename(basefilename) if !startswith(artifact_uri, "s3://") mkpath(artifact_uri) From d71019fec9cedaa6a615ea3ee5621f311057e410 Mon Sep 17 00:00:00 2001 From: Matt Camp Date: Tue, 26 Dec 2023 13:34:09 -0600 Subject: [PATCH 08/11] update docstring somewhat --- src/loggers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/loggers.jl b/src/loggers.jl index 5919471..c65237e 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -74,7 +74,7 @@ Stores an artifact (file) in the run's artifact location. # Arguments - `mlf::MLFlow`: [`MLFlow`](@ref) onfiguration. Currently not used, but when this method is extended to support `S3`, information from `mlf` will be needed. - `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref) or `String`. -- `basefilename`: name of the file to be written. +- `basefilename`: name of the file to be written; can contain a folder such as `model/mycode.jl` which the folder will be created in the artifact directory. - `data`: artifact content, an object that can be written directly to a file handle. # Throws From f8c0e8ad32fb022fbfc86d4673df8e46e126774f Mon Sep 17 00:00:00 2001 From: Matt Camp Date: Thu, 28 Dec 2023 10:50:24 -0600 Subject: [PATCH 09/11] fix typo --- src/loggers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/loggers.jl b/src/loggers.jl index c65237e..b79f450 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -122,7 +122,7 @@ function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::Abstract end open(joinpath("/tmp/",basefilename), "r") do f file_data = read(f) - s3_put(s3config, artifact_uri, basefilename, data) + s3_put(s3config, artifact_uri, basefilename, file_data) end rm(joinpath("/tmp",basefilename)) catch e From 1a34fd90b68fd2da2a1999a8590f61702bf6c069 Mon Sep 17 00:00:00 2001 From: Matt Camp Date: Thu, 28 Dec 2023 10:58:02 -0600 Subject: [PATCH 10/11] fix trailing / bug i found --- src/loggers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/loggers.jl b/src/loggers.jl index b79f450..c2eded1 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -113,7 +113,7 @@ function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::Abstract end filepath = joinpath(artifact_uri, basefilename) - artifact_uri = artifact_uri[6:end] # get rid of s3:// so s3_put doesnt' complain + artifact_uri = rstrip(artifact_uri[6:end], '/')# get rid of s3:// so s3_put doesnt' complain try #TODO: Figure out the correct IO stream way of doing this From 290ac2aa7f38afbf583970f89f3d78f9238b41ff Mon Sep 17 00:00:00 2001 From: Matt Camp Date: Mon, 12 Feb 2024 10:12:38 -0600 Subject: [PATCH 11/11] artifact path is in the python api and was missing here --- src/loggers.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/loggers.jl b/src/loggers.jl index c2eded1..6fe11cd 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -83,9 +83,9 @@ Stores an artifact (file) in the run's artifact location. # Returns path of the artifact that was created. """ -function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::AbstractString, data) +function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::AbstractString, data; artifact_path="") mlflowrun = getrun(mlf, run_id) - artifact_uri = joinpath(mlflowrun.info.artifact_uri,dirname(basefilename)) + artifact_uri = joinpath(mlflowrun.info.artifact_uri,artifact_path,dirname(basefilename)) basefilename = basename(basefilename) if !startswith(artifact_uri, "s3://")