Skip to content

Commit

Permalink
Merge branch 'override_attributes'
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexander-Barth committed Mar 15, 2022
2 parents 64399f1 + 246ddbb commit 91b1315
Show file tree
Hide file tree
Showing 7 changed files with 223 additions and 83 deletions.
4 changes: 3 additions & 1 deletion docs/src/dataset.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@ NCDataset
```

Useful functions that operate on datasets are:

```@docs
keys(ds::NCDataset)
haskey
getindex(ds::NCDataset,varname::AbstractString)
variable
cfvariable
sync
close
path
NCDatasets.path
ncgen
varbyattrib
write
Expand Down
2 changes: 1 addition & 1 deletion docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ To get started quickly see the [Quickstart](@ref) section. Otherwise see the fol
* [Dimensions](@ref) : accessing/creating NetCDF dimensions
* [Variables](@ref) : accessing/examining the variables (or dimensions) stored within a NetCDF dataset.
* [Attributes](@ref) : accessing/creating NetCDF attributes
* See [Performance tips](@ref), [Known issues](@ref), [Experimental features](@ref) for more information.
* See [Performance tips](@ref performance_tips), [Known issues](@ref), [Experimental features](@ref) for more information.

## Quickstart

Expand Down
222 changes: 141 additions & 81 deletions src/cfvariable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -254,50 +254,160 @@ function _boundsParentVar(ds,varname)
end
end

return ""
return nothing
end
end


"""
calendar,time_origin,time_factor = calendar_time(
attrib;
calendar = nothing,
time_origin = nothing,
time_factor = nothing)
)
_getattrib(ds,v,parentname,attribname,default)
Get a NetCDF attribute, looking also at the parent variable name
(linked via the bounds attribute as following the CF conventions).
The default value is returned if the attribute cannot be found.
"""
function _getattrib(ds,v,parentname,attribname,default)
val = get(v.attrib,attribname,nothing)
if val !== nothing
return val
else
if parentname === nothing
return default
else
vp = variable(ds,parentname)
return get(vp.attrib,attribname,default)
end
end
end

"""
function _calendar_time(
attrib;
calendar = nothing,
time_origin = nothing,
time_factor = nothing,
)
v = cfvariable(ds::NCDataset,varname::AbstractString; <attrib> = <value>)
if haskey(attrib,"units")
units = attrib["units"]
if (units isa String) && occursin(" since ",units)
calendar = lowercase(get(attrib,"calendar","standard"))
try
time_origin,time_factor = CFTime.timeunits(units, calendar)
catch
# keep defaults
end
Return the NetCDF variable `varname` in the dataset `ds` as a
`NCDataset.CFVariable`. The keyword argument `<attrib>` are
the NetCDF attributes (`fillvalue`, `missing_value`, `scale_factor`, `add_offset`,
`units` and `calendar`) relevant to the CF conventions.
By specifing the value of these attributes, the one can override the value
specified in the NetCDF file. If the attribute is set to `nothing`, then
the attribute is not loaded and the corresponding transformation is ignored.
This function is similar to `ds[varname]` with the additional flexibility that
some variable attributes can be overridden.
Example:
```julia
NCDataset("foo.nc","c") do ds
defVar(ds,"data",[10., 11., 12., 13.], ("time",), attrib = Dict(
"add_offset" => 10.,
"scale_factor" => 0.2))
end
# The stored (packed) valued are [0., 5., 10., 15.]
# since 0.2 .* [0., 5., 10., 15.] .+ 10 is [10., 11., 12., 13.]
ds = NCDataset("foo.nc");
@show ds["data"].var[:]
# returns [0., 5., 10., 15.]
@show cfvariable(ds,"data")[:]
# returns [10., 11., 12., 13.]
# neither add_offset nor scale_factor are applied
@show cfvariable(ds,"data", add_offset = nothing, scale_factor = nothing)[:]
# returns [0, 5, 10, 15]
# add_offset is applied but not scale_factor
@show cfvariable(ds,"data", scale_factor = nothing)[:]
# returns [10, 15, 20, 25]
# 0 is declared as the fill value (add_offset and scale_factor are applied as usual)
@show cfvariable(ds,"data", fillvalue = 0)[:]
# return [missing, 11., 12., 13.]
# Use the time units: days since 2000-01-01
@show cfvariable(ds,"data", units = "days since 2000-01-01")[:]
# returns [DateTime(2000,1,11), DateTime(2000,1,12), DateTime(2000,1,13), DateTime(2000,1,14)]
close(ds)
```
"""
function cfvariable(ds,
varname;
_v = variable(ds,varname),
# special case for bounds variable who inherit
# units and calendar from parent variables
_parentname = _boundsParentVar(ds,varname),
fillvalue = get(_v.attrib,"_FillValue",nothing),
# missing_value can be a vector
missing_value = get(_v.attrib,"missing_value",eltype(_v)[]),
#valid_min = get(_v.attrib,"valid_min",nothing),
#valid_max = get(_v.attrib,"valid_max",nothing),
#valid_range = get(_v.attrib,"valid_range",nothing),
scale_factor = get(_v.attrib,"scale_factor",nothing),
add_offset = get(_v.attrib,"add_offset",nothing),
# look also at parent if defined
units = _getattrib(ds,_v,_parentname,"units",nothing),
calendar = _getattrib(ds,_v,_parentname,"calendar",nothing),
)

v = _v
T = eltype(v)

time_origin = nothing
time_factor = nothing

if (units isa String) && occursin(" since ",units)
if calendar == nothing
calendar = "standard"
elseif calendar isa String
calendar = lowercase(calendar)
end
try
time_origin,time_factor = CFTime.timeunits(units, calendar)
catch
# ignore, warning is emited by CFTime.timeunits
end
end

scaledtype = T
if eltype(v) <: Number
if scale_factor !== nothing
scaledtype = promote_type(scaledtype, typeof(scale_factor))
end

if add_offset !== nothing
scaledtype = promote_type(scaledtype, typeof(add_offset))
end
end

return calendar,time_origin,time_factor
storage_attrib = (
fillvalue = fillvalue,
missing_values = (missing_value...,),
scale_factor = scale_factor,
add_offset = add_offset,
calendar = calendar,
time_origin = time_origin,
time_factor = time_factor,
)

rettype = _get_rettype(ds, calendar, fillvalue, missing_value, scaledtype)

return CFVariable{rettype,ndims(v),typeof(v),typeof(_v.attrib),typeof(storage_attrib)}(
v,_v.attrib,storage_attrib)

end

export cfvariable

"""
v = getindex(ds::NCDataset,varname::AbstractString)
Return the NetCDF variable `varname` in the dataset `ds` as a
`NCDataset.CFVariable`. The CF convention are honored when the
`NCDataset.CFVariable`. The following CF convention are honored when the
variable is indexed:
* `_FillValue` or `missing_value` (which can be a list) will be returned as `missing`
* `_FillValue` or `missing_value` (which can be a list) will be returned as `missing`. `NCDatasets` does not use implicitely the default NetCDF fill values when reading data.
* `scale_factor` and `add_offset` are applied (output = `scale_factor` * data_in_file + `add_offset`)
* time variables (recognized by the units attribute and possibly the calendar attribute) are returned usually as
`DateTime` object. Note that `DateTimeAllLeap`, `DateTimeNoLeap` and
Expand All @@ -324,65 +434,15 @@ variables:
In this case, the variable `time_bnds` uses the units and calendar of `time`
because both variables are related thought the bounds attribute following the CF conventions.
See also cfvariable
"""
function Base.getindex(ds::AbstractDataset,varname::SymbolOrString)
v = variable(ds,varname)
T = eltype(v)

fillvalue = get(v.attrib,"_FillValue",nothing)
# missing_value can be a vector
missing_values = get(v.attrib,"missing_value",T[])
valid_min = get(v.attrib,"valid_min",nothing)
valid_max = get(v.attrib,"valid_max",nothing)
valid_range = get(v.attrib,"valid_range",nothing)
scale_factor = get(v.attrib,"scale_factor",nothing)
add_offset = get(v.attrib,"add_offset",nothing)

# special case for bounds variable who inherit
# units and calendar from parent variables
parentname = _boundsParentVar(ds,varname)

if parentname === ""
calendar,time_origin,time_factor = _calendar_time(v.attrib)
else
calendar,time_origin,time_factor = _calendar_time(variable(ds,parentname).attrib)
calendar,time_origin,time_factor = _calendar_time(
v.attrib;
calendar = calendar,
time_origin = time_origin,
time_factor = time_factor
)
end


scaledtype = T
if eltype(v) <: Number
if scale_factor !== nothing
scaledtype = promote_type(scaledtype, typeof(scale_factor))
end

if add_offset !== nothing
scaledtype = promote_type(scaledtype, typeof(add_offset))
end
end

storage_attrib = (
fillvalue = fillvalue,
missing_values = (T.(missing_values)...,),
scale_factor = scale_factor,
add_offset = add_offset,
calendar = calendar,
time_origin = time_origin,
time_factor = time_factor,
)

rettype = _get_rettype(ds, calendar, fillvalue, scaledtype)

return CFVariable{rettype,ndims(v),typeof(v),typeof(v.attrib),typeof(storage_attrib)}(
v,v.attrib,storage_attrib)
return cfvariable(ds, varname)
end

function _get_rettype(ds, calendar, fillvalue, rettype)

function _get_rettype(ds, calendar, fillvalue, missing_value, rettype)
# rettype can be a date if calendar is different from nothing
if calendar !== nothing
DT = nothing
Expand All @@ -402,7 +462,7 @@ function _get_rettype(ds, calendar, fillvalue, rettype)
end
end

if fillvalue !== nothing
if (fillvalue !== nothing) || (!isempty(missing_value))
rettype = Union{Missing,rettype}
end
return rettype
Expand Down
1 change: 1 addition & 0 deletions src/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ Base.keys(ds::NCDataset) = listVar(ds.ncid)

"""
path(ds::NCDataset)
Return the file path (or the opendap URL) of the NCDataset `ds`
"""
path(ds::NCDataset) = nc_inq_path(ds.ncid)
Expand Down
3 changes: 3 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,9 @@ println("NetCDF version: ",NCDatasets.nc_inq_libvers())

include("test_chunk_cache.jl")
include("test_enum.jl")

include("test_missing_value.jl")
include("test_override_attrib.jl")
end

@testset "NetCDF4 groups" begin
Expand Down
36 changes: 36 additions & 0 deletions test/test_missing_value.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
using NCDatasets
using DataStructures

# same behaviour as Python's netCDF4 1.5.8 and XArray 0.21.1

filename = tempname()
# The mode "c" stands for creating a new file (clobber)
ds = NCDataset(filename,"c")


ds.dim["dim"] = 3

# single missing value

missing_value = 123.
v = defVar(ds,"var1",Float64,("dim",), attrib = OrderedDict("missing_value" => missing_value))
data = [0., 1., 123.]
v.var[:] = data
@test isequal(v[:],[0.,1.,missing])

# 2 missing values
missing_value = [123., 124.]
v = defVar(ds,"var2",Float64,("dim",), attrib = OrderedDict("missing_value" => missing_value))
data = [0., 123., 124.]
v.var[:] = data
@test isequal(v[:],[0.,missing,missing])


# missing values of wrong type
v = defVar(ds,"var3",Float64,("dim",), attrib = OrderedDict("missing_value" => "value of wrong type"))
data = [0., 1., 2.]
v.var[:] = data
@test isequal(v[:],[0.,1.,2.])

close(ds)

38 changes: 38 additions & 0 deletions test/test_override_attrib.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
using NCDatasets
fname = tempname()


NCDataset(fname,"c") do ds
defVar(ds,"data",[10., 11., 12., 13.], ("time",), attrib = Dict(
"add_offset" => 10.,
"scale_factor" => 0.2))
end

# stored valued are [0., 5., 10., 15.]
# since 0.2 .* [0., 5., 10., 15.] .+ 10 is [10., 11., 12., 13.]

ds = NCDataset(fname);

@test ds["data"].var[:] == [0., 5., 10., 15.]

@test cfvariable(ds,"data")[:] == [10., 11., 12., 13.]

# neither add_offset nor scale_factor are applied
@test cfvariable(ds,"data", add_offset = nothing, scale_factor = nothing)[:] == [0, 5, 10, 15]

# add_offset is applied but not scale_factor
@test cfvariable(ds,"data", scale_factor = nothing)[:] == [10, 15, 20, 25]

# 0 is declared a fill value (add_offset and scale_factor are applied as usual)
@test isequal(cfvariable(ds,"data", fillvalue = 0)[:], [missing, 11., 12., 13.])

# 0 and 5 are declared a missing value
@test isequal(cfvariable(ds,"data", missing_value = (0,5))[:], [missing, missing, 12., 13.])


@test cfvariable(ds,"data", units = "days since 2000-01-01")[:] == [
DateTime(2000,1,11), DateTime(2000,1,12), DateTime(2000,1,13), DateTime(2000,1,14)]

close(ds)

#rm(fname)

0 comments on commit 91b1315

Please sign in to comment.