-
Notifications
You must be signed in to change notification settings - Fork 32
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DiskArrays for Variable
's
#205
Changes from 32 commits
d75c6cb
378be0d
cb35349
75fcfa4
c3dbfbc
012678b
88f3a5f
1bac35e
33816fe
d11cc27
9e03a54
821b67f
e0191ea
a7551b7
b6a4296
7ef519c
4371216
f13a09d
2e8199b
5605f4c
fce58de
3cba846
52acdac
6b0ab28
508baca
6dec5a0
bfb9f16
1f2f5e4
43a1e5b
9a2735b
94c6310
90e585b
7d32de5
3705d64
ec87d27
6f0c381
fd9579d
bb58df1
d6fbe43
0873d6d
7bd3961
fc4e099
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -309,85 +309,73 @@ end | |||||
nomissing(a::AbstractArray,value) = a | ||||||
export nomissing | ||||||
|
||||||
|
||||||
function Base.getindex(v::Variable,indexes::Int...) | ||||||
function readblock!(v::Variable, aout, indexes::TI...) where TI <: Union{AbstractUnitRange,StepRange} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
This dispatch was not correct so we were using the fallback for I get roughly the same performance as |
||||||
datamode(v.ds) | ||||||
return nc_get_var1(eltype(v),v.ds.ncid,v.varid,[i-1 for i in indexes[ndims(v):-1:1]]) | ||||||
_read_data_from_nc!(v, aout, indexes...) | ||||||
return aout | ||||||
end | ||||||
|
||||||
function Base.setindex!(v::Variable{T,N},data,indexes::Int...) where N where T | ||||||
@debug "$(@__LINE__)" | ||||||
datamode(v.ds) | ||||||
# use zero-based indexes and reversed order | ||||||
nc_put_var1(v.ds.ncid,v.varid,[i-1 for i in indexes[ndims(v):-1:1]],T(data)) | ||||||
return data | ||||||
function _read_data_from_nc!(v::Variable, aout, indexes::Int...) | ||||||
aout .= nc_get_var1(eltype(v),v.ds.ncid,v.varid,[i-1 for i in reverse(indexes)]) | ||||||
end | ||||||
|
||||||
function Base.getindex(v::Variable{T,N},indexes::Colon...) where {T,N} | ||||||
datamode(v.ds) | ||||||
data = Array{T,N}(undef,size(v)) | ||||||
nc_get_var!(v.ds.ncid,v.varid,data) | ||||||
|
||||||
# special case for scalar NetCDF variable | ||||||
if N == 0 | ||||||
return data[] | ||||||
else | ||||||
return data | ||||||
end | ||||||
function _read_data_from_nc!(v::Variable{T,N}, aout, indexes::TR...) where {T,N} where TR <: Union{StepRange{Int,Int},UnitRange{Int}} | ||||||
start,count,stride,jlshape = ncsub(indexes) | ||||||
nc_get_vars!(v.ds.ncid,v.varid,start,count,stride,aout) | ||||||
end | ||||||
|
||||||
function Base.setindex!(v::Variable{T,N},data::T,indexes::Colon...) where {T,N} | ||||||
@debug "setindex! colon $data" | ||||||
datamode(v.ds) # make sure that the file is in data mode | ||||||
tmp = fill(data,size(v)) | ||||||
nc_put_var(v.ds.ncid,v.varid,tmp) | ||||||
return data | ||||||
function _read_data_from_nc!(v::Variable{T,N}, aout, indexes::Union{Int,Colon,AbstractRange{<:Integer}}...) where {T,N} | ||||||
sz = size(v) | ||||||
start,count,stride = ncsub2(sz,indexes...) | ||||||
jlshape = _shape_after_slice(sz,indexes...) | ||||||
nc_get_vars!(v.ds.ncid,v.varid,start,count,stride,aout) | ||||||
end | ||||||
|
||||||
# union types cannot be used to avoid ambiguity | ||||||
for data_type = [Number, String, Char] | ||||||
@eval begin | ||||||
# call to v .= 123 | ||||||
function Base.setindex!(v::Variable{T,N},data::$data_type) where {T,N} | ||||||
@debug "setindex! $data" | ||||||
datamode(v.ds) # make sure that the file is in data mode | ||||||
tmp = fill(convert(T,data),size(v)) | ||||||
nc_put_var(v.ds.ncid,v.varid,tmp) | ||||||
return data | ||||||
end | ||||||
_read_data_from_nc!(v::Variable, aout) = _read_data_from_nc!(v, aout, 1) | ||||||
|
||||||
Base.setindex!(v::Variable,data::$data_type,indexes::Colon...) = setindex!(v::Variable,data) | ||||||
function writeblock!(v::Variable, data, indexes::TI...) where TI <: Union{AbstractUnitRange,StepRange} | ||||||
datamode(v.ds) | ||||||
_write_data_to_nc(v, data, indexes...) | ||||||
return data | ||||||
end | ||||||
|
||||||
function Base.setindex!(v::Variable{T,N},data::$data_type,indexes::StepRange{Int,Int}...) where {T,N} | ||||||
datamode(v.ds) # make sure that the file is in data mode | ||||||
start,count,stride,jlshape = ncsub(indexes[1:ndims(v)]) | ||||||
tmp = fill(convert(T,data),jlshape) | ||||||
nc_put_vars(v.ds.ncid,v.varid,start,count,stride,tmp) | ||||||
return data | ||||||
end | ||||||
end | ||||||
function _write_data_to_nc(v::Variable{T,N},data,indexes::Int...) where {T,N} | ||||||
nc_put_var1(v.ds.ncid,v.varid,[i-1 for i in reverse(indexes)],T(data[1])) | ||||||
end | ||||||
|
||||||
function Base.setindex!(v::Variable{T,N},data::AbstractArray{T,N},indexes::Colon...) where {T,N} | ||||||
datamode(v.ds) # make sure that the file is in data mode | ||||||
_write_data_to_nc(v::Variable, data) = _write_data_to_nc(v, data, 1) | ||||||
|
||||||
nc_put_var(v.ds.ncid,v.varid,data) | ||||||
return data | ||||||
function _write_data_to_nc(v::Variable{T, N}, data, indexes::StepRange{Int,Int}...) where {T, N} | ||||||
start,count,stride,jlshape = ncsub(indexes) | ||||||
nc_put_vars(v.ds.ncid,v.varid,start,count,stride,T.(data)) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably DiskArrays.jl wont let you do this optimisation either. You may need to override these methods on Just the But you can also leave this optimisation for later too. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think by defining |
||||||
end | ||||||
|
||||||
function Base.setindex!(v::Variable{T,N},data::AbstractArray{T2,N},indexes::Colon...) where {T,T2,N} | ||||||
datamode(v.ds) # make sure that the file is in data mode | ||||||
tmp = | ||||||
if T <: Integer | ||||||
round.(T,data) | ||||||
else | ||||||
convert(Array{T,N},data) | ||||||
end | ||||||
function _write_data_to_nc(v::Variable, data, indexes::Union{AbstractRange{<:Integer}}...) | ||||||
ind = prod(length.(indexes)) == 1 ? first.(indexes) : normalizeindexes(size(v),indexes) | ||||||
return _write_data_to_nc(v, data, ind...) | ||||||
end | ||||||
|
||||||
nc_put_var(v.ds.ncid,v.varid,tmp) | ||||||
return data | ||||||
function grow!(v::CFVariable, data, indexes::Union{Integer, Colon}...) | ||||||
unlimdims = unlimited(Dimensions(CommonDataModel.dataset(v))) | ||||||
length(unlimdims) == 1 || error("Only 1 unlimited dimension is supported") | ||||||
alldims = dimnames(v) | ||||||
iunlimdim = findfirst(==(unlimdims[1]), alldims) | ||||||
icol = findfirst(x -> x isa Colon, indexes) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if there are other colons? shouldn't you just check that the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right, I must admit I implemented this pretty rapidly, and I didn't really test it. But I guess we'll have to look more deeply into it given the issue above! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As you saw with the bugs in DiskArrays edge cases its actually pretty hard to do it properly. |
||||||
iunlimdim == icol || ArgumentError("The Colon in the arguments must match the unlimited dimension.") | ||||||
inds = replace(indexes, Colon() => 1:length(data)) | ||||||
v[inds...] = data | ||||||
return v | ||||||
end | ||||||
|
||||||
getchunksize(v::Variable) = getchunksize(haschunks(v),v) | ||||||
getchunksize(::DiskArrays.Chunked, v::Variable) = chunking(v)[2] | ||||||
# getchunksize(::DiskArrays.Unchunked, v::Variable) = DiskArrays.estimate_chunksize(v) | ||||||
getchunksize(::DiskArrays.Unchunked, v::Variable) = size(v) | ||||||
eachchunk(v::CFVariable) = eachchunk(v.var) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are defined on |
||||||
haschunks(v::CFVariable) = haschunks(v.var) | ||||||
eachchunk(v::Variable) = DiskArrays.GridChunks(v, Tuple(getchunksize(v))) | ||||||
haschunks(v::Variable) = (chunking(v)[1] == :contiguous ? DiskArrays.Unchunked() : DiskArrays.Chunked()) | ||||||
|
||||||
_normalizeindex(n,ind::Base.OneTo) = 1:1:ind.stop | ||||||
_normalizeindex(n,ind::Colon) = 1:1:n | ||||||
_normalizeindex(n,ind::Int) = ind:1:ind | ||||||
|
@@ -441,74 +429,5 @@ end | |||||
return start,count,stride | ||||||
end | ||||||
|
||||||
function Base.getindex(v::Variable{T,N},indexes::TR...) where {T,N} where TR <: Union{StepRange{Int,Int},UnitRange{Int}} | ||||||
start,count,stride,jlshape = ncsub(indexes[1:N]) | ||||||
data = Array{T,N}(undef,jlshape) | ||||||
|
||||||
datamode(v.ds) | ||||||
nc_get_vars!(v.ds.ncid,v.varid,start,count,stride,data) | ||||||
return data | ||||||
end | ||||||
|
||||||
function Base.setindex!(v::Variable{T,N},data::T,indexes::StepRange{Int,Int}...) where {T,N} | ||||||
datamode(v.ds) # make sure that the file is in data mode | ||||||
start,count,stride,jlshape = ncsub(indexes[1:ndims(v)]) | ||||||
tmp = fill(data,jlshape) | ||||||
nc_put_vars(v.ds.ncid,v.varid,start,count,stride,tmp) | ||||||
return data | ||||||
end | ||||||
|
||||||
function Base.setindex!(v::Variable{T,N},data::Array{T,N},indexes::StepRange{Int,Int}...) where {T,N} | ||||||
datamode(v.ds) # make sure that the file is in data mode | ||||||
start,count,stride,jlshape = ncsub(indexes[1:ndims(v)]) | ||||||
nc_put_vars(v.ds.ncid,v.varid,start,count,stride,data) | ||||||
return data | ||||||
end | ||||||
|
||||||
# data can be Array{T2,N} or BitArray{N} | ||||||
function Base.setindex!(v::Variable{T,N},data::AbstractArray,indexes::StepRange{Int,Int}...) where {T,N} | ||||||
datamode(v.ds) # make sure that the file is in data mode | ||||||
start,count,stride,jlshape = ncsub(indexes[1:ndims(v)]) | ||||||
|
||||||
tmp = convert(Array{T,ndims(data)},data) | ||||||
nc_put_vars(v.ds.ncid,v.varid,start,count,stride,tmp) | ||||||
|
||||||
return data | ||||||
end | ||||||
|
||||||
|
||||||
|
||||||
|
||||||
function Base.getindex(v::Variable{T,N},indexes::Union{Int,Colon,AbstractRange{<:Integer}}...) where {T,N} | ||||||
sz = size(v) | ||||||
start,count,stride = ncsub2(sz,indexes...) | ||||||
jlshape = _shape_after_slice(sz,indexes...) | ||||||
data = Array{T}(undef,jlshape) | ||||||
|
||||||
datamode(v.ds) | ||||||
nc_get_vars!(v.ds.ncid,v.varid,start,count,stride,data) | ||||||
|
||||||
return data | ||||||
end | ||||||
|
||||||
# NetCDF scalars indexed as [] | ||||||
Base.getindex(v::Variable{T, 0}) where T = v[1] | ||||||
|
||||||
|
||||||
|
||||||
function Base.setindex!(v::Variable,data,indexes::Union{Int,Colon,AbstractRange{<:Integer}}...) | ||||||
ind = normalizeindexes(size(v),indexes) | ||||||
|
||||||
# make arrays out of scalars (arrays can have zero dimensions) | ||||||
if (ndims(data) == 0) && !(data isa AbstractArray) | ||||||
data = fill(data,length.(ind)) | ||||||
end | ||||||
|
||||||
return v[ind...] = data | ||||||
end | ||||||
|
||||||
|
||||||
Base.getindex(v::Union{MFVariable,DeferVariable,Variable},ci::CartesianIndices) = v[ci.indices...] | ||||||
Base.setindex!(v::Union{MFVariable,DeferVariable,Variable},data,ci::CartesianIndices) = setindex!(v,data,ci.indices...) | ||||||
|
||||||
|
||||||
Base.getindex(v::Union{MFVariable,DeferVariable},ci::CartesianIndices) = v[ci.indices...] | ||||||
Base.setindex!(v::Union{MFVariable,DeferVariable},data,ci::CartesianIndices) = setindex!(v,data,ci.indices...) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are we aiming to move more of these to DiskArrays.jl?
Just having
Variable
means this PR doesn't allow Rasters.jl integration of gribb or netcdf.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm experimenting with this on this PR: JuliaGeo/CommonDataModel.jl/pull/9.
The issue I'm currently facing is, I think, related to the growth of unlimited dimensions. As you predicted, this causes problems when CFVariable implements DiskArray.
Unfortunately, I can't show you the error stack properly here since it needs the combination of this branch with JuliaGeo/CommonDataModel.jl/pull/9. Is there a way to tell github to run CI with a specific branch of a dependency?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can add a Project.toml and Mainfest.toml to /test if you need too, but its a bit of fiddling and maybe no better than just pasting errors in issues.
Yes that is fairly predictable. You will need to rebuild the chunking after
grow!
.