From ec7e5759652554deb729ec86bcb28420ea624f89 Mon Sep 17 00:00:00 2001 From: mahiki Date: Thu, 8 Feb 2024 22:06:29 -0800 Subject: [PATCH] Dataset: enable default datastore based on .env --- Project.toml | 2 +- src/Datasets/Datasets.jl | 2 +- src/Datasets/config.jl | 14 +++++++++++--- test/config/config.jl | 4 +++- test/dataset/dataset.jl | 2 ++ 5 files changed, 18 insertions(+), 6 deletions(-) diff --git a/Project.toml b/Project.toml index 8b0da36..699c417 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PrefectInterfaces" uuid = "25d49962-0f22-42a0-bb44-b427e1ded1d4" authors = ["mahiki "] -version = "0.3.1" +version = "0.3.2" [deps] AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc" diff --git a/src/Datasets/Datasets.jl b/src/Datasets/Datasets.jl index eff4faa..fceeca1 100644 --- a/src/Datasets/Datasets.jl +++ b/src/Datasets/Datasets.jl @@ -62,7 +62,7 @@ julia> df = read(ds) """ @with_kw struct Dataset <: AbstractPrefectInterface dataset_name::String - datastore_type::String = "local" + datastore_type::String = PrefectDatastoreNames().default dataset_type::String = "extracts" file_format::String = "csv" rundate::Date = Dates.today() diff --git a/src/Datasets/config.jl b/src/Datasets/config.jl index f8b78f6..a486164 100644 --- a/src/Datasets/config.jl +++ b/src/Datasets/config.jl @@ -1,14 +1,17 @@ """ - PrefectDatastoreNames(remote::String, local::String) <: AbstractPrefectInterface + PrefectDatastoreNames(remote::String, local::String, default::String) <: AbstractPrefectInterface + A struct to store the names of Prefect blocks which reference local and remote file storage. -The default constructor pulls the names from ENV variables +The default constructor pulls the names from ENV variables, and default field default is "local". PREFECT_DATA_BLOCK_REMOTE PREFECT_DATA_BLOCK_LOCAL + PREFECT_DATASTORE_DEFAULT_TYPE ∈ ["local", "remote"] """ mutable struct PrefectDatastoreNames <: AbstractPrefectInterface remote::AbstractString var"local"::AbstractString + default::AbstractString end PrefectDatastoreNames() = begin if ! (haskey(ENV, "PREFECT_DATA_BLOCK_REMOTE") && haskey(ENV, "PREFECT_DATA_BLOCK_LOCAL")) @@ -18,6 +21,11 @@ PrefectDatastoreNames() = begin "PREFECT_DATA_BLOCK_LOCAL" throw(KeyError("PREFECT_DATA_BLOCK_REMOTE and/or PREFECT_DATA_BLOCK_LOCAL")) else - PrefectDatastoreNames(ENV["PREFECT_DATA_BLOCK_REMOTE"], ENV["PREFECT_DATA_BLOCK_LOCAL"]) + PrefectDatastoreNames( + ENV["PREFECT_DATA_BLOCK_REMOTE"] + , ENV["PREFECT_DATA_BLOCK_LOCAL"] + , get(ENV, "PREFECT_DATASTORE_DEFAULT_TYPE", "local") + ) end end +PrefectDatastoreNames(x, y) = PrefectDatastoreNames(x, y, get(ENV, "PREFECT_DATASTORE_DEFAULT_TYPE", "local")) diff --git a/test/config/config.jl b/test/config/config.jl index f6f1abf..2b6fd49 100644 --- a/test/config/config.jl +++ b/test/config/config.jl @@ -18,11 +18,13 @@ api = PrefectAPI("https://api.prefect.cloud/api/accounts/0eEXAMPLE", "abcd1234") # Datasets dst = Datasets.PrefectDatastoreNames() -@test propertynames(dst) == (:remote, :local) +@test propertynames(dst) == (:remote, :local, :default) @test dst.remote == "s3-bucket/willowdata" @test dst.local == "local-file-system/willowdata" +@test dst.default == "local" @test typeof(dst) == Datasets.PrefectDatastoreNames ndst = Datasets.PrefectDatastoreNames("s3/barchetta", "lfs/spirit-of-radio") @test ndst.remote == "s3/barchetta" @test ndst.local == "lfs/spirit-of-radio" +@test ndst.default == "local" \ No newline at end of file diff --git a/test/dataset/dataset.jl b/test/dataset/dataset.jl index ebe1813..991a5c7 100644 --- a/test/dataset/dataset.jl +++ b/test/dataset/dataset.jl @@ -30,6 +30,8 @@ d2 = Dataset(dataset_name="test_dataset_2", rundate_type="latest", rundate=Date( d3 = Dataset(dataset_name="test_dataset_3", rundate_type="specific", rundate=Dates.today()) d4 = Dataset(dataset_name="test_dataset_4", rundate_type="specific", rundate=Date("2020-11-03")) +@test d1.datastore_type == "local" + @test Datasets.rundate_path_selector(d1) == (read = "extracts/csv/latest/dataset=test_dataset_1/data.csv", write = ["extracts/csv/latest/dataset=test_dataset_1/data.csv", "extracts/csv/dataset=test_dataset_1/rundate=$(Dates.today())/data.csv"]) @test Datasets.rundate_path_selector(d2) == (read = "extracts/csv/latest/dataset=test_dataset_2/data.csv", write = ["extracts/csv/latest/dataset=test_dataset_2/data.csv"])