Skip to content

Commit

Permalink
Dataset: enable default datastore based on .env
Browse files Browse the repository at this point in the history
  • Loading branch information
mahiki committed Feb 9, 2024
1 parent 4904372 commit ec7e575
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "PrefectInterfaces"
uuid = "25d49962-0f22-42a0-bb44-b427e1ded1d4"
authors = ["mahiki <[email protected]>"]
version = "0.3.1"
version = "0.3.2"

[deps]
AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc"
Expand Down
2 changes: 1 addition & 1 deletion src/Datasets/Datasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ julia> df = read(ds)
"""
@with_kw struct Dataset <: AbstractPrefectInterface
dataset_name::String
datastore_type::String = "local"
datastore_type::String = PrefectDatastoreNames().default
dataset_type::String = "extracts"
file_format::String = "csv"
rundate::Date = Dates.today()
Expand Down
14 changes: 11 additions & 3 deletions src/Datasets/config.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
"""
PrefectDatastoreNames(remote::String, local::String) <: AbstractPrefectInterface
PrefectDatastoreNames(remote::String, local::String, default::String) <: AbstractPrefectInterface
A struct to store the names of Prefect blocks which reference local and remote file storage.
The default constructor pulls the names from ENV variables
The default constructor pulls the names from ENV variables, and default field default is "local".
PREFECT_DATA_BLOCK_REMOTE
PREFECT_DATA_BLOCK_LOCAL
PREFECT_DATASTORE_DEFAULT_TYPE ∈ ["local", "remote"]
"""
mutable struct PrefectDatastoreNames <: AbstractPrefectInterface
remote::AbstractString
var"local"::AbstractString
default::AbstractString
end
PrefectDatastoreNames() = begin
if ! (haskey(ENV, "PREFECT_DATA_BLOCK_REMOTE") && haskey(ENV, "PREFECT_DATA_BLOCK_LOCAL"))
Expand All @@ -18,6 +21,11 @@ PrefectDatastoreNames() = begin
"PREFECT_DATA_BLOCK_LOCAL"
throw(KeyError("PREFECT_DATA_BLOCK_REMOTE and/or PREFECT_DATA_BLOCK_LOCAL"))
else
PrefectDatastoreNames(ENV["PREFECT_DATA_BLOCK_REMOTE"], ENV["PREFECT_DATA_BLOCK_LOCAL"])
PrefectDatastoreNames(
ENV["PREFECT_DATA_BLOCK_REMOTE"]
, ENV["PREFECT_DATA_BLOCK_LOCAL"]
, get(ENV, "PREFECT_DATASTORE_DEFAULT_TYPE", "local")
)
end
end
PrefectDatastoreNames(x, y) = PrefectDatastoreNames(x, y, get(ENV, "PREFECT_DATASTORE_DEFAULT_TYPE", "local"))
4 changes: 3 additions & 1 deletion test/config/config.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ api = PrefectAPI("https://api.prefect.cloud/api/accounts/0eEXAMPLE", "abcd1234")

# Datasets
dst = Datasets.PrefectDatastoreNames()
@test propertynames(dst) == (:remote, :local)
@test propertynames(dst) == (:remote, :local, :default)
@test dst.remote == "s3-bucket/willowdata"
@test dst.local == "local-file-system/willowdata"
@test dst.default == "local"
@test typeof(dst) == Datasets.PrefectDatastoreNames

ndst = Datasets.PrefectDatastoreNames("s3/barchetta", "lfs/spirit-of-radio")
@test ndst.remote == "s3/barchetta"
@test ndst.local == "lfs/spirit-of-radio"
@test ndst.default == "local"
2 changes: 2 additions & 0 deletions test/dataset/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ d2 = Dataset(dataset_name="test_dataset_2", rundate_type="latest", rundate=Date(
d3 = Dataset(dataset_name="test_dataset_3", rundate_type="specific", rundate=Dates.today())
d4 = Dataset(dataset_name="test_dataset_4", rundate_type="specific", rundate=Date("2020-11-03"))

@test d1.datastore_type == "local"

@test Datasets.rundate_path_selector(d1) == (read = "extracts/csv/latest/dataset=test_dataset_1/data.csv", write = ["extracts/csv/latest/dataset=test_dataset_1/data.csv", "extracts/csv/dataset=test_dataset_1/rundate=$(Dates.today())/data.csv"])

@test Datasets.rundate_path_selector(d2) == (read = "extracts/csv/latest/dataset=test_dataset_2/data.csv", write = ["extracts/csv/latest/dataset=test_dataset_2/data.csv"])
Expand Down

2 comments on commit ec7e575

@mahiki
Copy link
Owner Author

@mahiki mahiki commented on ec7e575 Feb 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register

Release Notes:

Dataset Default Datastore Type

  • yes its more tightly coupling to Prefect, its bc of 'Blocks' paradigm
  • set PREFECT_DATASTORE_DEFAULT_TYPE in env, or explicitly.
  • tests ok.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/100537

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.3.2 -m "<description of version>" ec7e5759652554deb729ec86bcb28420ea624f89
git push origin v0.3.2

Please sign in to comment.