Skip to content

Commit

Permalink
Retry if the download of a GZip file fails
Browse files Browse the repository at this point in the history
  • Loading branch information
diegozea committed Feb 20, 2023
1 parent 6ef73d2 commit 4ceba63
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 20 deletions.
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
## MIToS.jl Release Notes

### Changes from v2.11.0 to v2.11.1

* MIToS now checks the magic number of gzip files immediately after download. If
the gzip file does not have the correct header, MIToS will attempt to download
it again. In Julia versions below 1.2, it will retry the download once. In
Julia 1.2 or higher, it will retry the download five times, using an
ExponentialBackOff.

### Changes from v2.10.0 to v2.11.0

* *[breaking change]* `getCA` returns `missing` if a `PDBResidue` has no CA atom
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "MIToS"
uuid = "51bafb47-8a16-5ded-8b04-24ef4eede0b5"
version = "2.11.0"
version = "2.11.1"

[deps]
ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63"
Expand Down
68 changes: 49 additions & 19 deletions src/Utils/Read.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,33 @@ import Base: read
"`FileFormat` is used for write special `parse` (and `read`) methods on it."
abstract type FileFormat end

"This function raises an error if a GZip file doesn't have the 0x1f8b magic number."
function _check_gzip_file(filename)
if endswith(filename, ".gz")
open(filename, "r") do fh
magic = read(fh, UInt16)
# 0x1f8b is the magic number for GZip files
# However, some files use 0x8b1f.
# For example, the file test/data/18gs.xml.gz uses 0x8b1f.
if magic != 0x1f8b && magic != 0x8b1f
throw(ErrorException("$filename is not a GZip file!"))
end
end
end
filename
end

function _download_file(url::AbstractString, filename::AbstractString;
kargs...)
kargs = _modify_kargs_for_proxy(url; kargs...)
kargs_dict = Dict(kargs...)
headers = pop!(kargs_dict, "headers", Dict{String,String}())
with_logger(ConsoleLogger(stderr, Logging.Warn)) do
HTTP.download(url, filename, headers; kargs_dict...)
end
_check_gzip_file(filename)
end

"""
`download_file` uses **HTTP.jl** to download files from the web. It takes the file url as
first argument and, optionally, a path to save it.
Expand All @@ -22,18 +49,21 @@ julia> download_file("http://www.uniprot.org/uniprot/P69905.fasta","seq.fasta",
```
"""
function download_file(url::AbstractString, filename::AbstractString;
kargs...)
kargs = _modify_kargs_for_proxy(url; kargs...)
kargs_dict = Dict(kargs...)
headers = pop!(kargs_dict, "headers", Dict{String,String}())
with_logger(ConsoleLogger(stderr, Logging.Warn)) do
HTTP.download(url, filename, headers; kargs_dict...)
kargs...)
if VERSION >= v"1.2.0"
retry(_download_file, delays=ExponentialBackOff(n=5))(url, filename; kargs...)
else
retry(_download_file)(url, filename; kargs...)
end
end

function download_file(url::AbstractString;
kargs...)
download_file(url, tempname(); kargs...)
kargs...)
name = tempname()
if endswith(url, ".gz")
name *= ".gz"
end
download_file(url, name; kargs...)
end

"""
Expand All @@ -44,14 +74,14 @@ variables.
function _modify_kargs_for_proxy(url; kargs...)
if startswith(lowercase(url), "http://")
proxy_env_var = "HTTPS_PROXY"
elseif startswith(lowercase(url),"https://")
elseif startswith(lowercase(url), "https://")
proxy_env_var = "HTTPS_PROXY"
else
return kargs
end
if !(:proxy in keys(kargs)) && proxy_env_var in keys(ENV)
kw = Dict()
for (k,v) in kargs
for (k, v) in kargs
kw[k] = v
end
kw[:proxy] = ENV[proxy_env_var]
Expand All @@ -62,7 +92,7 @@ end

"Create an iterable object that will yield each line from a stream **or string**."
lineiterator(string::String) = eachline(IOBuffer(string))
lineiterator(stream::IO) = eachline(stream)
lineiterator(stream::IO) = eachline(stream)

"""
Returns the `filename`.
Expand All @@ -82,9 +112,9 @@ isnotemptyfile(filename) = isfile(filename) && filesize(filename) > 0

# for using with download, since filename doesn't have file extension
function _read(completename::AbstractString,
filename::AbstractString,
format::Type{T},
args...; kargs...) where T <: FileFormat
filename::AbstractString,
format::Type{T},
args...; kargs...) where {T<:FileFormat}
check_file(filename)
if endswith(completename, ".xml.gz") || endswith(completename, ".xml")
document = parse_file(filename)
Expand Down Expand Up @@ -113,11 +143,11 @@ the file is downloaded with `download` in a temporal file.
Gzipped files should end on `.gz`.
"""
function read(completename::AbstractString,
format::Type{T},
args...; kargs...) where T <: FileFormat
if startswith(completename, "http://") ||
startswith(completename, "https://") ||
startswith(completename, "ftp://")
format::Type{T},
args...; kargs...) where {T<:FileFormat}
if startswith(completename, "http://") ||
startswith(completename, "https://") ||
startswith(completename, "ftp://")

filename = download_file(completename, headers=Dict("Accept-Encoding" => "identity",))
try
Expand Down
30 changes: 30 additions & 0 deletions test/Utils/GeneralUtils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,34 @@ end
end
end
end

@testset "Test _check_gzip_file" begin
for file in readdir(DATA)
filename = joinpath(DATA, file)
if file != "2vqc.xml.gz" # is a decompressed file that has a wrong .gz extension
@test MIToS.Utils._check_gzip_file(filename) == filename
else
@test_throws ErrorException MIToS.Utils._check_gzip_file(filename)
end
end
end

@testset "Download a gz file" begin
# Use https://www.rcsb.org/pdb/files/3NIR.pdb.gz to test downloading a gz file
# without a filename
filename = ""
try
filename = download_file("https://www.rcsb.org/pdb/files/3NIR.pdb.gz")
@test endswith(filename, ".gz")
@test MIToS.Utils._check_gzip_file(filename) == filename
finally
if isfile(filename)
rm(filename)
end
end
end
end




2 comments on commit 4ceba63

@diegozea
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/78087

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v2.11.1 -m "<description of version>" 4ceba63f6a03a60634bd0d3728204b6c2962a67d
git push origin v2.11.1

Please sign in to comment.