diff --git a/Project.toml b/Project.toml index d5c8d70..b42b148 100644 --- a/Project.toml +++ b/Project.toml @@ -4,10 +4,13 @@ authors = ["@deyandyankov and contributors"] version = "0.4.4" [deps] +AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc" +AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +Minio = "4281f0d9-7ae0-406e-9172-b7277c1efa20" ShowCases = "605ecd9f-84a6-4c9e-81e2-4798472b76a3" URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index 0c24670..bb12936 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -19,6 +19,10 @@ using URIs using JSON using ShowCases using FilePathsBase: AbstractPath +using AWSS3 +using Minio +using AWS: AbstractAWSConfig, AWSConfig, AWSCredentials, global_aws_config + include("types/core.jl") export diff --git a/src/loggers.jl b/src/loggers.jl index 183befd..6fe11cd 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -74,7 +74,7 @@ Stores an artifact (file) in the run's artifact location. # Arguments - `mlf::MLFlow`: [`MLFlow`](@ref) onfiguration. Currently not used, but when this method is extended to support `S3`, information from `mlf` will be needed. - `run`: one of [`MLFlowRun`](@ref), [`MLFlowRunInfo`](@ref) or `String`. -- `basefilename`: name of the file to be written. +- `basefilename`: name of the file to be written; can contain a folder such as `model/mycode.jl` which the folder will be created in the artifact directory. - `data`: artifact content, an object that can be written directly to a file handle. # Throws @@ -83,19 +83,54 @@ Stores an artifact (file) in the run's artifact location. # Returns path of the artifact that was created. """ -function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::AbstractString, data) +function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::AbstractString, data; artifact_path="") mlflowrun = getrun(mlf, run_id) - artifact_uri = mlflowrun.info.artifact_uri - mkpath(artifact_uri) - filepath = joinpath(artifact_uri, basefilename) - try - f = open(filepath, "w") - write(f, data) - close(f) - catch e - error("Unable to create artifact $(filepath): $e") + artifact_uri = joinpath(mlflowrun.info.artifact_uri,artifact_path,dirname(basefilename)) + basefilename = basename(basefilename) + + if !startswith(artifact_uri, "s3://") + mkpath(artifact_uri) + filepath = joinpath(artifact_uri, basefilename) + try + open(filepath, "w") do f + write(f, data) + end + catch e + error("Unable to create artifact $(filepath): $e") + end + else + region = get(ENV, "AWS_REGION", "") # Optional, defaults to empty if not set + + if region == "" + region = get(ENV, "AWS_DEFAULT_REGION", "") + end + + if haskey(ENV, "MLFLOW_S3_ENDPOINT_URL") + s3creds = AWSCredentials() + s3config = MinioConfig(ENV["MLFLOW_S3_ENDPOINT_URL"], s3creds; region=region) + else + s3config = global_aws_config() # default AWS configuration + end + + filepath = joinpath(artifact_uri, basefilename) + artifact_uri = rstrip(artifact_uri[6:end], '/')# get rid of s3:// so s3_put doesnt' complain + + try + #TODO: Figure out the correct IO stream way of doing this + open(joinpath("/tmp/",basefilename), "w") do f + write(f, data) + end + open(joinpath("/tmp/",basefilename), "r") do f + file_data = read(f) + s3_put(s3config, artifact_uri, basefilename, file_data) + end + rm(joinpath("/tmp",basefilename)) + catch e + error("Unable to upload artifact to S3 $(filepath): $e") + end end - filepath + + return filepath end logartifact(mlf::MLFlow, run::MLFlowRun, basefilename::AbstractString, data) = logartifact(mlf, run.info, basefilename, data)