From 680d6f267b2dc59a661cbc4f30689d67f7601f1a Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Thu, 12 Sep 2024 08:28:19 -0400 Subject: [PATCH 1/3] RFC: Introduce ArtifactDownloads stdlib This shamelessly copies out of Pkg all of the bits required to actually download artifacts, etc. The motivation for this stdlib is to reduce the loading / dependency cost for downloading artifacts, esp. via LazyArtifacts. With this PR: ```console $ ./julia -e '@time using MicroMamba' 0.179380 seconds (188.20 k allocations: 10.800 MiB) ``` versus master: ```console $ ./julia -e '@time using MicroMamba' 1.857587 seconds (932.05 k allocations: 56.118 MiB, 6.27% gc time, 13.45% compilation time: 3% of which was recompilation) ``` --- src/staticdata.c | 2 +- stdlib/ArtifactDownloads/Project.toml | 23 + .../src/ArtifactDownloads.jl | 615 +++++++++++++++ stdlib/ArtifactDownloads/src/GitTools.jl | 353 +++++++++ .../ArtifactDownloads/src/MiniProgressBars.jl | 125 +++ .../ArtifactDownloads/src/PlatformEngines.jl | 711 ++++++++++++++++++ stdlib/ArtifactDownloads/src/Utils.jl | 141 ++++ stdlib/Artifacts/src/Artifacts.jl | 2 +- stdlib/Makefile | 2 +- stdlib/Manifest.toml | 13 +- stdlib/Project.toml | 1 + stdlib/stdlib.mk | 4 +- test/choosetests.jl | 1 + 13 files changed, 1984 insertions(+), 9 deletions(-) create mode 100644 stdlib/ArtifactDownloads/Project.toml create mode 100644 stdlib/ArtifactDownloads/src/ArtifactDownloads.jl create mode 100644 stdlib/ArtifactDownloads/src/GitTools.jl create mode 100644 stdlib/ArtifactDownloads/src/MiniProgressBars.jl create mode 100644 stdlib/ArtifactDownloads/src/PlatformEngines.jl create mode 100644 stdlib/ArtifactDownloads/src/Utils.jl diff --git a/src/staticdata.c b/src/staticdata.c index b991dfe8f37f3..04eff4a45b41d 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -890,7 +890,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_ } if (immediate) // must be things that can be recursively handled, and valid as type parameters - assert(jl_is_immutable(t) || jl_is_typevar(v) || jl_is_symbol(v) || jl_is_svec(v)); + assert(jl_is_immutable(t) || jl_is_typevar(v) || jl_is_symbol(v) || jl_is_svec(v) || jl_is_module(v)); if (layout->npointers == 0) { // bitstypes do not require recursion diff --git a/stdlib/ArtifactDownloads/Project.toml b/stdlib/ArtifactDownloads/Project.toml new file mode 100644 index 0000000000000..07374040c711d --- /dev/null +++ b/stdlib/ArtifactDownloads/Project.toml @@ -0,0 +1,23 @@ +name = "ArtifactDownloads" +uuid = "6ee59702-05f1-4877-b811-03813c0d76c5" +version = "1.11.0" + +[deps] +Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +FileWatching = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" +LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +p7zip_jll = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" +TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" + +# TODO: Remove Printf + +[extras] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Test"] diff --git a/stdlib/ArtifactDownloads/src/ArtifactDownloads.jl b/stdlib/ArtifactDownloads/src/ArtifactDownloads.jl new file mode 100644 index 0000000000000..cdfeed8c3bc98 --- /dev/null +++ b/stdlib/ArtifactDownloads/src/ArtifactDownloads.jl @@ -0,0 +1,615 @@ +module ArtifactDownloads + +include("Utils.jl") +include("MiniProgressBars.jl") +include("GitTools.jl") +include("PlatformEngines.jl") + +import TOML +import Base: SHA1 +import SHA: sha256 + +import ..Utils: set_readonly, pkg_server, can_fancyprint, stderr_f, printpkgstyle, + write_env_usage, can_symlink, parse_toml +import ..GitTools +import ..PlatformEngines: package, download_verify_unpack + +import Base.BinaryPlatforms: AbstractPlatform, HostPlatform, triplet +import Artifacts: artifact_names, ARTIFACTS_DIR_OVERRIDE, ARTIFACT_OVERRIDES, artifact_path, + artifacts_dirs, pack_platform!, unpack_platform, load_artifacts_toml, + query_override, with_artifacts_directory, load_overrides, artifact_exists, + artifact_meta, select_downloadable_artifacts + +# export create_artifact, artifact_exists, artifact_path, remove_artifact, verify_artifact, + # artifact_meta, artifact_hash, bind_artifact!, unbind_artifact!, download_artifact, + # find_artifacts_toml, ensure_artifact_installed, @artifact_str, archive_artifact, + # select_downloadable_artifacts + +""" + create_artifact(f::Function) + +Creates a new artifact by running `f(artifact_path)`, hashing the result, and moving it +to the artifact store (`~/.julia/artifacts` on a typical installation). Returns the +identifying tree hash of this artifact. +""" +function create_artifact(f::Function) + # Ensure the `artifacts` directory exists in our default depot + artifacts_dir = first(artifacts_dirs()) + mkpath(artifacts_dir) + + # Temporary directory where we'll do our creation business + temp_dir = mktempdir(artifacts_dir) + + try + # allow the user to do their work inside the temporary directory + f(temp_dir) + + # Calculate the tree hash for this temporary directory + artifact_hash = SHA1(GitTools.tree_hash(temp_dir)) + + # If we created a dupe, just let the temp directory get destroyed. It's got the + # same contents as whatever already exists after all, so it doesn't matter. Only + # move its contents if it actually contains new contents. Note that we explicitly + # set `honor_overrides=false` here, as we wouldn't want to drop things into the + # system directory by accidentally creating something with the same content-hash + # as something that was foolishly overridden. This should be virtually impossible + # unless the user has been very unwise, but let's be cautious. + new_path = artifact_path(artifact_hash; honor_overrides=false) + _mv_temp_artifact_dir(temp_dir, new_path) + + # Give the people what they want + return artifact_hash + finally + # Always attempt to cleanup + rm(temp_dir; recursive=true, force=true) + end +end + +""" + _mv_temp_artifact_dir(temp_dir::String, new_path::String)::Nothing +Either rename the directory at `temp_dir` to `new_path` and set it to read-only +or if `new_path` artifact already exists try to do nothing. +""" +function _mv_temp_artifact_dir(temp_dir::String, new_path::String)::Nothing + if !isdir(new_path) + # This next step is like + # `mv(temp_dir, new_path)`. + # However, `mv` defaults to `cp` if `rename` returns an error. + # `cp` is not atomic, so avoid the potential of calling it. + err = ccall(:jl_fs_rename, Int32, (Cstring, Cstring), temp_dir, new_path) + # Ignore rename error, but ensure `new_path` exists. + if !isdir(new_path) + error("$(repr(new_path)) could not be made") + end + chmod(new_path, filemode(dirname(new_path))) + set_readonly(new_path) + end + nothing +end + +""" + remove_artifact(hash::SHA1; honor_overrides::Bool=false) + +Removes the given artifact (identified by its SHA1 git tree hash) from disk. Note that +if an artifact is installed in multiple depots, it will be removed from all of them. If +an overridden artifact is requested for removal, it will be silently ignored; this method +will never attempt to remove an overridden artifact. + +In general, we recommend that you use `Pkg.gc()` to manage artifact installations and do +not use `remove_artifact()` directly, as it can be difficult to know if an artifact is +being used by another package. +""" +function remove_artifact(hash::SHA1) + if query_override(hash) !== nothing + # We never remove overridden artifacts. + return + end + + # Get all possible paths (rooted in all depots) + possible_paths = artifacts_dirs(bytes2hex(hash.bytes)) + for path in possible_paths + if isdir(path) + rm(path; recursive=true, force=true) + end + end +end + +""" + verify_artifact(hash::SHA1; honor_overrides::Bool=false) + +Verifies that the given artifact (identified by its SHA1 git tree hash) is installed on- +disk, and retains its integrity. If the given artifact is overridden, skips the +verification unless `honor_overrides` is set to `true`. +""" +function verify_artifact(hash::SHA1; honor_overrides::Bool=false) + # Silently skip overridden artifacts unless we really ask for it + if !honor_overrides + if query_override(hash) !== nothing + return true + end + end + + # If it doesn't even exist, then skip out + if !artifact_exists(hash) + return false + end + + # Otherwise actually run the verification + return all(hash.bytes .== GitTools.tree_hash(artifact_path(hash))) +end + +""" + archive_artifact(hash::SHA1, tarball_path::String; honor_overrides::Bool=false) + +Archive an artifact into a tarball stored at `tarball_path`, returns the SHA256 of the +resultant tarball as a hexadecimal string. Throws an error if the artifact does not +exist. If the artifact is overridden, throws an error unless `honor_overrides` is set. +""" +function archive_artifact(hash::SHA1, tarball_path::String; honor_overrides::Bool=false) + if !honor_overrides + if query_override(hash) !== nothing + error("Will not archive an overridden artifact unless `honor_overrides` is set!") + end + end + + if !artifact_exists(hash) + error("Unable to archive artifact $(bytes2hex(hash.bytes)): does not exist!") + end + + # TODO: We may not need this... + # (removes `PlatformEngines.package`) + + # Package it up + package(artifact_path(hash), tarball_path) + + # Calculate its sha256 and return that + return open(tarball_path, "r") do io + return bytes2hex(sha256(io)) + end +end + +""" + bind_artifact!(artifacts_toml::String, name::String, hash::SHA1; + platform::Union{AbstractPlatform,Nothing} = nothing, + download_info::Union{Vector{Tuple},Nothing} = nothing, + lazy::Bool = false, + force::Bool = false) + +Writes a mapping of `name` -> `hash` within the given `(Julia)Artifacts.toml` file. If +`platform` is not `nothing`, this artifact is marked as platform-specific, and will be +a multi-mapping. It is valid to bind multiple artifacts with the same name, but +different `platform`s and `hash`'es within the same `artifacts_toml`. If `force` is set +to `true`, this will overwrite a pre-existant mapping, otherwise an error is raised. + +`download_info` is an optional vector that contains tuples of URLs and a hash. These +URLs will be listed as possible locations where this artifact can be obtained. If `lazy` +is set to `true`, even if download information is available, this artifact will not be +downloaded until it is accessed via the `artifact"name"` syntax, or +`ensure_artifact_installed()` is called upon it. +""" +function bind_artifact!(artifacts_toml::String, name::String, hash::SHA1; + platform::Union{AbstractPlatform,Nothing} = nothing, + download_info::Union{Vector{<:Tuple},Nothing} = nothing, + lazy::Bool = false, + force::Bool = false) + # First, check to see if this artifact is already bound: + if isfile(artifacts_toml) + artifact_dict = parse_toml(artifacts_toml) + + if !force && haskey(artifact_dict, name) + meta = artifact_dict[name] + if !isa(meta, Vector) + error("Mapping for '$name' within $(artifacts_toml) already exists!") + elseif any(isequal(platform), unpack_platform(x, name, artifacts_toml) for x in meta) + error("Mapping for '$name'/$(triplet(platform)) within $(artifacts_toml) already exists!") + end + end + else + artifact_dict = Dict{String, Any}() + end + + # Otherwise, the new piece of data we're going to write out is this dict: + meta = Dict{String,Any}( + "git-tree-sha1" => bytes2hex(hash.bytes), + ) + + # If we're set to be lazy, then lazy we shall be + if lazy + meta["lazy"] = true + end + + # Integrate download info, if it is given. We represent the download info as a + # vector of dicts, each with its own `url` and `sha256`, since different tarballs can + # expand to the same tree hash. + if download_info !== nothing + meta["download"] = [ + Dict("url" => dl[1], + "sha256" => dl[2], + ) for dl in download_info + ] + end + + if platform === nothing + artifact_dict[name] = meta + else + # Add platform-specific keys to our `meta` dict + pack_platform!(meta, platform) + + # Insert this entry into the list of artifacts + if !haskey(artifact_dict, name) + artifact_dict[name] = [meta] + else + # Delete any entries that contain identical platforms + artifact_dict[name] = filter( + x -> unpack_platform(x, name, artifacts_toml) != platform, + artifact_dict[name] + ) + push!(artifact_dict[name], meta) + end + end + + # Spit it out onto disk + let artifact_dict = artifact_dict + parent_dir = dirname(artifacts_toml) + temp_artifacts_toml = isempty(parent_dir) ? tempname(pwd()) : tempname(parent_dir) + open(temp_artifacts_toml, "w") do io + TOML.print(io, artifact_dict, sorted=true) + end + mv(temp_artifacts_toml, artifacts_toml; force=true) + end + + # Mark that we have used this Artifact.toml + write_env_usage(artifacts_toml, "artifact_usage.toml") + return +end + + +""" + unbind_artifact!(artifacts_toml::String, name::String; platform = nothing) + +Unbind the given `name` from an `(Julia)Artifacts.toml` file. +Silently fails if no such binding exists within the file. +""" +function unbind_artifact!(artifacts_toml::String, name::String; + platform::Union{AbstractPlatform,Nothing} = nothing) + artifact_dict = parse_toml(artifacts_toml) + if !haskey(artifact_dict, name) + return + end + + if platform === nothing + delete!(artifact_dict, name) + else + artifact_dict[name] = filter( + x -> unpack_platform(x, name, artifacts_toml) != platform, + artifact_dict[name] + ) + end + + open(artifacts_toml, "w") do io + TOML.print(io, artifact_dict, sorted=true) + end + return +end + +""" + download_artifact(tree_hash::SHA1, tarball_url::String, tarball_hash::String; + verbose::Bool = false, io::IO=stderr) + +Download/install an artifact into the artifact store. Returns `true` on success, +returns an error object on failure. + +!!! compat "Julia 1.8" + As of Julia 1.8 this function returns the error object rather than `false` when + failure occurs +""" +function download_artifact( + tree_hash::SHA1, + tarball_url::String, + tarball_hash::Union{String, Nothing} = nothing; + verbose::Bool = false, + quiet_download::Bool = false, + io::IO=stderr_f(), +) + if artifact_exists(tree_hash) + return true + end + + # Ensure the `artifacts` directory exists in our default depot + artifacts_dir = first(artifacts_dirs()) + mkpath(artifacts_dir) + # expected artifact path + dst = joinpath(artifacts_dir, bytes2hex(tree_hash.bytes)) + + # We download by using a temporary directory. We do this because the download may + # be corrupted or even malicious; we don't want to clobber someone else's artifact + # by trusting the tree hash that has been given to us; we will instead download it + # to a temporary directory, calculate the true tree hash, then move it to the proper + # location only after knowing what it is, and if something goes wrong in the process, + # everything should be cleaned up. + + # Temporary directory where we'll do our creation business + temp_dir = mktempdir(artifacts_dir) + + try + download_verify_unpack(tarball_url, tarball_hash, temp_dir, ignore_existence=true, verbose=verbose, + quiet_download=quiet_download, io=io) + calc_hash = SHA1(GitTools.tree_hash(temp_dir)) + + # Did we get what we expected? If not, freak out. + if calc_hash.bytes != tree_hash.bytes + msg = """ + Tree Hash Mismatch! + Expected git-tree-sha1: $(bytes2hex(tree_hash.bytes)) + Calculated git-tree-sha1: $(bytes2hex(calc_hash.bytes)) + """ + # Since tree hash calculation is rather fragile and file system dependent, + # we allow setting JULIA_PKG_IGNORE_HASHES=1 to ignore the error and move + # the artifact to the expected location and return true + ignore_hash_env_set = Base.get(ENV, "JULIA_PKG_IGNORE_HASHES", "") != "" + if ignore_hash_env_set + ignore_hash = Base.get_bool_env("JULIA_PKG_IGNORE_HASHES", false) + ignore_hash === nothing && @error( + "Invalid ENV[\"JULIA_PKG_IGNORE_HASHES\"] value", + ENV["JULIA_PKG_IGNORE_HASHES"], + ) + ignore_hash = something(ignore_hash, false) + else + # default: false except Windows users who can't symlink + ignore_hash = Sys.iswindows() && + !mktempdir(can_symlink, artifacts_dir) + end + if ignore_hash + desc = ignore_hash_env_set ? + "Environment variable \$JULIA_PKG_IGNORE_HASHES is true" : + "System is Windows and user cannot create symlinks" + msg *= "\n$desc: \ + ignoring hash mismatch and moving \ + artifact to the expected location" + @error(msg) + else + error(msg) + end + end + # Move it to the location we expected + _mv_temp_artifact_dir(temp_dir, dst) + catch err + @debug "download_artifact error" tree_hash tarball_url tarball_hash err + if isa(err, InterruptException) + rethrow(err) + end + # If something went wrong during download, return the error + return err + finally + # Always attempt to cleanup + rm(temp_dir; recursive=true, force=true) + end + return true +end + +""" + ensure_artifact_installed(name::String, artifacts_toml::String; + platform::AbstractPlatform = HostPlatform(), + pkg_uuid::Union{Base.UUID,Nothing}=nothing, + verbose::Bool = false, + quiet_download::Bool = false, + io::IO=stderr) + +Ensures an artifact is installed, downloading it via the download information stored in +`artifacts_toml` if necessary. Throws an error if unable to install. +""" +function ensure_artifact_installed(name::String, artifacts_toml::String; + platform::AbstractPlatform = HostPlatform(), + pkg_uuid::Union{Base.UUID,Nothing}=nothing, + verbose::Bool = false, + quiet_download::Bool = false, + io::IO=stderr_f()) + meta = artifact_meta(name, artifacts_toml; pkg_uuid=pkg_uuid, platform=platform) + if meta === nothing + error("Cannot locate artifact '$(name)' in '$(artifacts_toml)'") + end + + return ensure_artifact_installed(name, meta, artifacts_toml; platform=platform, + verbose=verbose, quiet_download=quiet_download, io=io) +end + +function ensure_artifact_installed(name::String, meta::Dict, artifacts_toml::String; + platform::AbstractPlatform = HostPlatform(), + verbose::Bool = false, + quiet_download::Bool = false, + io::IO=stderr_f()) + hash = SHA1(meta["git-tree-sha1"]) + + if !artifact_exists(hash) + errors = Any[] + # first try downloading from Pkg server + # TODO: only do this if Pkg server knows about this package + if (server = pkg_server()) !== nothing + url = "$server/artifact/$hash" + download_success = let url=url + @debug "Downloading artifact from Pkg server" name artifacts_toml platform url + with_show_download_info(io, name, quiet_download) do + download_artifact(hash, url; verbose=verbose, quiet_download=quiet_download, io=io) + end + end + # download_success is either `true` or an error object + if download_success === true + return artifact_path(hash) + else + @debug "Failed to download artifact from Pkg server" download_success + push!(errors, (url, download_success)) + end + end + + # If this artifact does not exist on-disk already, ensure it has download + # information, then download it! + if !haskey(meta, "download") + error("Cannot automatically install '$(name)'; no download section in '$(artifacts_toml)'") + end + + # Attempt to download from all sources + for entry in meta["download"] + url = entry["url"] + tarball_hash = entry["sha256"] + download_success = let url=url + @debug "Downloading artifact" name artifacts_toml platform url + with_show_download_info(io, name, quiet_download) do + download_artifact(hash, url, tarball_hash; verbose=verbose, quiet_download=quiet_download, io=io) + end + end + # download_success is either `true` or an error object + if download_success === true + return artifact_path(hash) + else + @debug "Failed to download artifact" download_success + push!(errors, (url, download_success)) + end + end + errmsg = """ + Unable to automatically download/install artifact '$(name)' from sources listed in '$(artifacts_toml)'. + Sources attempted: + """ + for (url, err) in errors + errmsg *= "- $(url)\n" + errmsg *= " Error: $(sprint(showerror, err))\n" + end + error(errmsg) + else + return artifact_path(hash) + end +end + +function with_show_download_info(f, io, name, quiet_download) + fancyprint = can_fancyprint(io) + if !quiet_download + if fancyprint + # print_progress_bottom(io) + ansi_clearline = "\e[2K" + ansi_movecol1 = "\e[1G" + ansi_moveup(n::Int) = string("\e[", n, "A") + print(io, "\e[S" * ansi_moveup(1) * ansi_clearline * ansi_movecol1) + end + printpkgstyle(io, :Downloading, "artifact: $name") + end + success = false + try + result = f() + success = result === true + return result + finally + if !quiet_download + fancyprint && print(io, "\033[1A") # move cursor up one line + fancyprint && print(io, "\033[2K") # clear line + if success + fancyprint && printpkgstyle(io, :Downloaded, "artifact: $name") + else + printpkgstyle(io, :Failure, "artifact: $name", color = :red) + end + end + end +end + +""" + ensure_all_artifacts_installed(artifacts_toml::String; + platform = HostPlatform(), + pkg_uuid = nothing, + include_lazy = false, + verbose = false, + quiet_download = false, + io::IO=stderr) + +Installs all non-lazy artifacts from a given `(Julia)Artifacts.toml` file. `package_uuid` must +be provided to properly support overrides from `Overrides.toml` entries in depots. + +If `include_lazy` is set to `true`, then lazy packages will be installed as well. + +This function is deprecated and should be replaced with the following snippet: + + artifacts = select_downloadable_artifacts(artifacts_toml; platform, include_lazy) + for name in keys(artifacts) + ensure_artifact_installed(name, artifacts[name], artifacts_toml; platform=platform) + end + +!!! warning + This function is deprecated in Julia 1.6 and will be removed in a future version. + Use `select_downloadable_artifacts()` and `ensure_artifact_installed()` instead. +""" +function ensure_all_artifacts_installed(artifacts_toml::String; + platform::AbstractPlatform = HostPlatform(), + pkg_uuid::Union{Nothing,Base.UUID} = nothing, + include_lazy::Bool = false, + verbose::Bool = false, + quiet_download::Bool = false, + io::IO=stderr_f()) + # This function should not be called anymore; use `select_downloadable_artifacts()` directly. + Base.depwarn("`ensure_all_artifacts_installed()` is deprecated; iterate over `select_downloadable_artifacts()` output with `ensure_artifact_installed()`.", :ensure_all_artifacts_installed) + # Collect all artifacts we're supposed to install + artifacts = select_downloadable_artifacts(artifacts_toml; platform, include_lazy, pkg_uuid) + for name in keys(artifacts) + # Otherwise, let's try and install it! + ensure_artifact_installed(name, artifacts[name], artifacts_toml; platform=platform, + verbose=verbose, quiet_download=quiet_download, io=io) + end +end + +""" + extract_all_hashes(artifacts_toml::String; + platform = HostPlatform(), + pkg_uuid = nothing, + include_lazy = false) + +Extract all hashes from a given `(Julia)Artifacts.toml` file. `package_uuid` must +be provided to properly support overrides from `Overrides.toml` entries in depots. + +If `include_lazy` is set to `true`, then lazy packages will be installed as well. +""" +function extract_all_hashes(artifacts_toml::String; + platform::AbstractPlatform = HostPlatform(), + pkg_uuid::Union{Nothing,Base.UUID} = nothing, + include_lazy::Bool = false) + hashes = Base.SHA1[] + if !isfile(artifacts_toml) + return hashes + end + + artifact_dict = load_artifacts_toml(artifacts_toml; pkg_uuid=pkg_uuid) + + for name in keys(artifact_dict) + # Get the metadata about this name for the requested platform + meta = artifact_meta(name, artifact_dict, artifacts_toml; platform=platform) + + # If there are no instances of this name for the desired platform, skip it + meta === nothing && continue + + # If it's a lazy one and we aren't including lazy ones, skip + if Base.get(meta, "lazy", false) && !include_lazy + continue + end + + # Otherwise, add it to the list! + push!(hashes, Base.SHA1(meta["git-tree-sha1"])) + end + + return hashes +end + +# Support `AbstractString`s, but avoid compilers needing to track backedges for callers +# of these functions in case a user defines a new type that is `<: AbstractString` +archive_artifact(hash::SHA1, tarball_path::AbstractString; kwargs...) = + archive_artifact(hash, string(tarball_path)::String; kwargs...) +bind_artifact!(artifacts_toml::AbstractString, name::AbstractString, hash::SHA1; kwargs...) = + bind_artifact!(string(artifacts_toml)::String, string(name)::String, hash; kwargs...) +unbind_artifact!(artifacts_toml::AbstractString, name::AbstractString) = + unbind_artifact!(string(artifacts_toml)::String, string(name)::String) +download_artifact(tree_hash::SHA1, tarball_url::AbstractString, args...; kwargs...) = + download_artifact(tree_hash, string(tarball_url)::String, args...; kwargs...) +ensure_artifact_installed(name::AbstractString, artifacts_toml::AbstractString; kwargs...) = + ensure_artifact_installed(string(name)::String, string(artifacts_toml)::String; kwargs...) +ensure_artifact_installed(name::AbstractString, meta::Dict, artifacts_toml::AbstractString; kwargs...) = + ensure_artifact_installed(string(name)::String, meta, string(artifacts_toml)::String; kwargs...) +ensure_all_artifacts_installed(artifacts_toml::AbstractString; kwargs...) = + ensure_all_artifacts_installed(string(artifacts_toml)::String; kwargs...) +extract_all_hashes(artifacts_toml::AbstractString; kwargs...) = + extract_all_hashes(string(artifacts_toml)::String; kwargs...) + +end # module ArtifactDownloads diff --git a/stdlib/ArtifactDownloads/src/GitTools.jl b/stdlib/ArtifactDownloads/src/GitTools.jl new file mode 100644 index 0000000000000..c687bab278664 --- /dev/null +++ b/stdlib/ArtifactDownloads/src/GitTools.jl @@ -0,0 +1,353 @@ +module GitTools + +import SHA +import LibGit2 + +import ..MiniProgressBars: MiniProgressBar, start_progress, end_progress, show_progress +import ..Utils: can_fancyprint, printpkgstyle, stdout_f + +use_cli_git() = Base.get_bool_env("JULIA_PKG_USE_CLI_GIT", false) + +function transfer_progress(progress::Ptr{LibGit2.TransferProgress}, p::Any) + progress = unsafe_load(progress) + @assert haskey(p, :transfer_progress) + bar = p[:transfer_progress] + @assert typeof(bar) == MiniProgressBar + if progress.total_deltas != 0 + bar.header = "Resolving Deltas:" + bar.max = progress.total_deltas + bar.current = progress.indexed_deltas + else + bar.max = progress.total_objects + bar.current = progress.received_objects + end + show_progress(stdout_f(), bar) + return Cint(0) +end + +const GIT_REGEX = + r"^(?:(?git|ssh|https)://)?(?:[\w\.\+\-:]+@)?(?.+?)(?()/|:)(?.+?)(?:\.git)?$" +const GIT_PROTOCOLS = Dict{String, Union{Nothing, String}}() +const GIT_USERS = Dict{String, Union{Nothing, String}}() + +@deprecate setprotocol!(proto::Union{Nothing, AbstractString}) setprotocol!(protocol = proto) false + +function setprotocol!(; + domain::AbstractString="github.com", + protocol::Union{Nothing, AbstractString}=nothing, + user::Union{Nothing, AbstractString}=(protocol == "ssh" ? "git" : nothing) +) + domain = lowercase(domain) + GIT_PROTOCOLS[domain] = protocol + GIT_USERS[domain] = user +end + +function normalize_url(url::AbstractString) + # LibGit2 is fussy about trailing slash. Make sure there is none. + url = rstrip(url, '/') + m = match(GIT_REGEX, url) + m === nothing && return url + + host = m[:hostname] + path = "$(m[:path]).git" + + proto = get(GIT_PROTOCOLS, lowercase(host), nothing) + + if proto === nothing + url + else + user = get(GIT_USERS, lowercase(host), nothing) + user = user === nothing ? "" : "$user@" + + "$proto://$user$host/$path" + end +end + +function ensure_clone(io::IO, target_path, url; kwargs...) + if ispath(target_path) + return LibGit2.GitRepo(target_path) + else + return GitTools.clone(io, url, target_path; kwargs...) + end +end + +function checkout_tree_to_path(repo::LibGit2.GitRepo, tree::LibGit2.GitObject, path::String) + GC.@preserve path begin + opts = LibGit2.CheckoutOptions( + checkout_strategy = LibGit2.Consts.CHECKOUT_FORCE, + target_directory = Base.unsafe_convert(Cstring, path) + ) + LibGit2.checkout_tree(repo, tree, options=opts) + end +end + +function clone(io::IO, url, source_path; header=nothing, credentials=nothing, kwargs...) + url = String(url)::String + source_path = String(source_path)::String + @assert !isdir(source_path) || isempty(readdir(source_path)) + url = normalize_url(url) + printpkgstyle(io, :Cloning, header === nothing ? "git-repo `$url`" : header) + bar = MiniProgressBar(header = "Fetching:", color = Base.info_color()) + fancyprint = can_fancyprint(io) + callbacks = if fancyprint + LibGit2.Callbacks( + :transfer_progress => ( + @cfunction(transfer_progress, Cint, (Ptr{LibGit2.TransferProgress}, Any)), + bar, + ) + ) + else + LibGit2.Callbacks() + end + fancyprint && start_progress(io, bar) + if credentials === nothing + credentials = LibGit2.CachedCredentials() + end + try + if use_cli_git() + cmd = `git clone --quiet $url $source_path` + try + run(pipeline(cmd; stdout=devnull)) + catch err + artifactserror("The command $(cmd) failed, error: $err") + end + return LibGit2.GitRepo(source_path) + else + mkpath(source_path) + return LibGit2.clone(url, source_path; callbacks=callbacks, credentials=credentials, kwargs...) + end + catch err + rm(source_path; force=true, recursive=true) + err isa LibGit2.GitError || err isa InterruptException || rethrow() + if err isa InterruptException + artifactserror("git clone of `$url` interrupted") + elseif (err.class == LibGit2.Error.Net && err.code == LibGit2.Error.EINVALIDSPEC) || + (err.class == LibGit2.Error.Repository && err.code == LibGit2.Error.ENOTFOUND) + artifactserror("git repository not found at `$(url)`") + else + artifactserror("failed to clone from $(url), error: $err") + end + finally + Base.shred!(credentials) + fancyprint && end_progress(io, bar) + end +end + +function fetch(io::IO, repo::LibGit2.GitRepo, remoteurl=nothing; header=nothing, credentials=nothing, refspecs=[""], kwargs...) + if remoteurl === nothing + remoteurl = LibGit2.with(LibGit2.get(LibGit2.GitRemote, repo, "origin")) do remote + LibGit2.url(remote) + end + end + fancyprint = can_fancyprint(io) + remoteurl = normalize_url(remoteurl) + printpkgstyle(io, :Updating, header === nothing ? "git-repo `$remoteurl`" : header) + bar = MiniProgressBar(header = "Fetching:", color = Base.info_color()) + fancyprint = can_fancyprint(io) + callbacks = if fancyprint + LibGit2.Callbacks( + :transfer_progress => ( + @cfunction(transfer_progress, Cint, (Ptr{LibGit2.TransferProgress}, Any)), + bar, + ) + ) + else + LibGit2.Callbacks() + end + fancyprint && start_progress(io, bar) + if credentials === nothing + credentials = LibGit2.CachedCredentials() + end + try + if use_cli_git() + let remoteurl=remoteurl + cd(LibGit2.path(repo)) do + cmd = `git fetch -q $remoteurl $(only(refspecs))` + try + run(pipeline(cmd; stdout=devnull)) + catch err + artifactserror("The command $(cmd) failed, error: $err") + end + end + end + else + return LibGit2.fetch(repo; remoteurl=remoteurl, callbacks=callbacks, refspecs=refspecs, kwargs...) + end + catch err + err isa LibGit2.GitError || rethrow() + if (err.class == LibGit2.Error.Repository && err.code == LibGit2.Error.ERROR) + artifactserror("Git repository not found at '$(remoteurl)'") + else + artifactserror("failed to fetch from $(remoteurl), error: $err") + end + finally + Base.shred!(credentials) + fancyprint && end_progress(io, bar) + end +end + + +# This code gratefully adapted from https://github.com/simonbyrne/GitX.jl +@enum GitMode mode_dir=0o040000 mode_normal=0o100644 mode_executable=0o100755 mode_symlink=0o120000 mode_submodule=0o160000 +Base.string(mode::GitMode) = string(UInt32(mode); base=8) +Base.print(io::IO, mode::GitMode) = print(io, string(mode)) + +function gitmode(path::AbstractString) + # Windows doesn't deal with executable permissions in quite the same way, + # `stat()` gives a different answer than we actually want, so we use + # `isexecutable()` which uses `uv_fs_access()` internally. On other + # platforms however, we just want to check via `stat()`. + function isexec(p) + @static if Sys.iswindows() + return Sys.isexecutable(p) + end + return !iszero(filemode(p) & 0o100) + end + if islink(path) + return mode_symlink + elseif isdir(path) + return mode_dir + elseif isexec(path) + return mode_executable + else + return mode_normal + end +end + +""" + blob_hash(HashType::Type, path::AbstractString) + +Calculate the git blob hash of a given path. +""" +function blob_hash(::Type{HashType}, path::AbstractString) where HashType + ctx = HashType() + if islink(path) + datalen = length(readlink(path)) + else + datalen = filesize(path) + end + + # First, the header + SHA.update!(ctx, Vector{UInt8}("blob $(datalen)\0")) + + # Next, read data in in chunks of 4KB + buff = Vector{UInt8}(undef, 4*1024) + + try + if islink(path) + SHA.update!(ctx, Vector{UInt8}(readlink(path))) + else + open(path, "r") do io + while !eof(io) + num_read = readbytes!(io, buff) + SHA.update!(ctx, buff, num_read) + end + end + end + catch e + if isa(e, InterruptException) + rethrow(e) + end + @warn("Unable to open $(path) for hashing; git-tree-sha1 likely suspect") + end + + # Finish it off and return the digest! + return SHA.digest!(ctx) +end +blob_hash(path::AbstractString) = blob_hash(SHA.SHA1_CTX, path) + +""" + contains_files(root::AbstractString) + +Helper function to determine whether a directory contains files; e.g. it is a +direct parent of a file or it contains some other directory that itself is a +direct parent of a file. This is used to exclude directories from tree hashing. +""" +function contains_files(path::AbstractString) + st = lstat(path) + ispath(st) || throw(ArgumentError("non-existent path: $(repr(path))")) + isdir(st) || return true + for p in readdir(path) + contains_files(joinpath(path, p)) && return true + end + return false +end + + +""" + tree_hash(HashType::Type, root::AbstractString) + +Calculate the git tree hash of a given path. +""" +function tree_hash(::Type{HashType}, root::AbstractString; debug_out::Union{IO,Nothing} = nothing, indent::Int=0) where HashType + entries = Tuple{String, Vector{UInt8}, GitMode}[] + for f in sort(readdir(root; join=true); by = f -> gitmode(f) == mode_dir ? f*"/" : f) + # Skip `.git` directories + if basename(f) == ".git" + continue + end + + filepath = abspath(f) + mode = gitmode(filepath) + if mode == mode_dir + # If this directory contains no files, then skip it + contains_files(filepath) || continue + + # Otherwise, hash it up! + child_stream = nothing + if debug_out !== nothing + child_stream = IOBuffer() + end + hash = tree_hash(HashType, filepath; debug_out=child_stream, indent=indent+1) + if debug_out !== nothing + indent_str = "| "^indent + println(debug_out, "$(indent_str)+ [D] $(basename(filepath)) - $(bytes2hex(hash))") + print(debug_out, String(take!(child_stream))) + println(debug_out, indent_str) + end + else + hash = blob_hash(HashType, filepath) + if debug_out !== nothing + indent_str = "| "^indent + mode_str = mode == mode_normal ? "F" : "X" + println(debug_out, "$(indent_str)[$(mode_str)] $(basename(filepath)) - $(bytes2hex(hash))") + end + end + push!(entries, (basename(filepath), hash, mode)) + end + + content_size = 0 + for (n, h, m) in entries + content_size += ndigits(UInt32(m); base=8) + 1 + sizeof(n) + 1 + sizeof(h) + end + + # Return the hash of these entries + ctx = HashType() + SHA.update!(ctx, Vector{UInt8}("tree $(content_size)\0")) + for (name, hash, mode) in entries + SHA.update!(ctx, Vector{UInt8}("$(mode) $(name)\0")) + SHA.update!(ctx, hash) + end + return SHA.digest!(ctx) +end +tree_hash(root::AbstractString; debug_out::Union{IO,Nothing} = nothing) = tree_hash(SHA.SHA1_CTX, root; debug_out) + +function check_valid_HEAD(repo) + try LibGit2.head(repo) + catch err + artifactserror("invalid git HEAD ($(err.msg))") + end +end + +function git_file_stream(repo::LibGit2.GitRepo, spec::String; fakeit::Bool=false)::IO + blob = try LibGit2.GitBlob(repo, spec) + catch err + err isa LibGit2.GitError && err.code == LibGit2.Error.ENOTFOUND || rethrow() + fakeit && return devnull + end + iob = IOBuffer(LibGit2.content(blob)) + close(blob) + return iob +end + +end # module diff --git a/stdlib/ArtifactDownloads/src/MiniProgressBars.jl b/stdlib/ArtifactDownloads/src/MiniProgressBars.jl new file mode 100644 index 0000000000000..5967ce2eb4273 --- /dev/null +++ b/stdlib/ArtifactDownloads/src/MiniProgressBars.jl @@ -0,0 +1,125 @@ +module MiniProgressBars + +export MiniProgressBar, start_progress, end_progress, show_progress, print_progress_bottom + +import Printf: @sprintf + +# Until Base.format_bytes supports sigdigits +function pkg_format_bytes(bytes; binary=true, sigdigits::Integer=3) + units = binary ? Base._mem_units : Base._cnt_units + factor = binary ? 1024 : 1000 + bytes, mb = Base.prettyprint_getunits(bytes, length(units), Int64(factor)) + if mb == 1 + return string(Int(bytes), " ", Base._mem_units[mb], bytes==1 ? "" : "s") + else + return string(Base.Ryu.writefixed(Float64(bytes), sigdigits), binary ? " $(units[mb])" : "$(units[mb])B") + end +end + +Base.@kwdef mutable struct MiniProgressBar + max::Int = 1 + header::String = "" + color::Symbol = :nothing + width::Int = 40 + current::Int = 0 + status::String = "" # If not empty this string replaces the bar + prev::Int = 0 + has_shown::Bool = false + time_shown::Float64 = 0.0 + mode::Symbol = :percentage # :percentage :int :data + always_reprint::Bool = false + indent::Int = 4 + main::Bool = true +end + +const PROGRESS_BAR_TIME_GRANULARITY = Ref(1 / 30.0) # 30 fps +const PROGRESS_BAR_PERCENTAGE_GRANULARITY = Ref(0.1) + +function start_progress(io::IO, _::MiniProgressBar) + ansi_disablecursor = "\e[?25l" + print(io, ansi_disablecursor) +end + +function show_progress(io::IO, p::MiniProgressBar; termwidth=nothing, carriagereturn=true) + if p.max == 0 + perc = 0.0 + prev_perc = 0.0 + else + perc = p.current / p.max * 100 + prev_perc = p.prev / p.max * 100 + end + # Bail early if we are not updating the progress bar, + # Saves printing to the terminal + if !p.always_reprint && p.has_shown && !((perc - prev_perc) > PROGRESS_BAR_PERCENTAGE_GRANULARITY[]) + return + end + t = time() + if !p.always_reprint && p.has_shown && (t - p.time_shown) < PROGRESS_BAR_TIME_GRANULARITY[] + return + end + p.time_shown = t + p.prev = p.current + p.has_shown = true + + progress_text = if p.mode == :percentage + @sprintf "%2.1f %%" perc + elseif p.mode == :int + string(p.current, "/", p.max) + elseif p.mode == :data + lpad(string(pkg_format_bytes(p.current; sigdigits=1), "/", pkg_format_bytes(p.max; sigdigits=1)), 20) + else + error("Unknown mode $(p.mode)") + end + termwidth = @something termwidth displaysize(io)[2] + max_progress_width = max(0, min(termwidth - textwidth(p.header) - textwidth(progress_text) - 10 , p.width)) + n_filled = ceil(Int, max_progress_width * perc / 100) + n_left = max_progress_width - n_filled + headers = split(p.header) + to_print = sprint(; context=io) do io + print(io, " "^p.indent) + if p.main + printstyled(io, headers[1], " "; color=:green, bold=true) + length(headers) > 1 && printstyled(io, join(headers[2:end], ' '), " ") + else + print(io, p.header, " ") + end + print(io, " ") + if !isempty(p.status) + print(io, p.status) + else + printstyled(io, "━"^n_filled; color=p.color) + printstyled(io, perc >= 95 ? "━" : "╸"; color=p.color) + printstyled(io, "━"^n_left, " "; color=:light_black) + print(io, progress_text) + end + carriagereturn && print(io, "\r") + end + # Print everything in one call + print(io, to_print) +end + +function end_progress(io, p::MiniProgressBar) + ansi_enablecursor = "\e[?25h" + ansi_clearline = "\e[2K" + print(io, ansi_enablecursor * ansi_clearline) +end + +# Useful when writing a progress bar in the bottom +# makes the bottom progress bar not flicker +# prog = MiniProgressBar(...) +# prog.end = n +# for progress in 1:n +# print_progress_bottom(io) +# println("stuff") +# prog.current = progress +# showprogress(io, prog) +# end +# +function print_progress_bottom(io::IO) + ansi_clearline = "\e[2K" + ansi_movecol1 = "\e[1G" + ansi_moveup(n::Int) = string("\e[", n, "A") + print(io, "\e[S" * ansi_moveup(1) * ansi_clearline * ansi_movecol1) +end + +end diff --git a/stdlib/ArtifactDownloads/src/PlatformEngines.jl b/stdlib/ArtifactDownloads/src/PlatformEngines.jl new file mode 100644 index 0000000000000..d5167c86d241f --- /dev/null +++ b/stdlib/ArtifactDownloads/src/PlatformEngines.jl @@ -0,0 +1,711 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +# Content in this file is extracted from BinaryProvider.jl, see LICENSE.method + +module PlatformEngines + +import TOML # parsefile, print +import Tar # several +import SHA # sha256 +import Downloads # download +import ..Utils: pkg_server, depots1, can_fancyprint, stderr_f +import ..MiniProgressBars: MiniProgressBar, start_progress, end_progress, show_progress + +import Base.BinaryPlatforms: HostPlatform, triplet +import p7zip_jll + +export verify, unpack, package, download_verify_unpack + +const EXE7Z_LOCK = ReentrantLock() +const EXE7Z = Ref{String}() + +function exe7z() + # If the JLL is available, use the wrapper function defined in there + if p7zip_jll.is_available() + return p7zip_jll.p7zip() + end + + lock(EXE7Z_LOCK) do + if !isassigned(EXE7Z) + EXE7Z[] = find7z() + end + return Cmd([EXE7Z[]]) + end +end + +function find7z() + name = "7z" + Sys.iswindows() && (name = "$name.exe") + for dir in (joinpath("..", "libexec"), ".") + path = normpath(Sys.BINDIR::String, dir, name) + isfile(path) && return path + end + path = Sys.which(name) + path !== nothing && return path + error("7z binary not found") +end + +is_secure_url(url::AbstractString) = + occursin(r"^(https://|\w+://(127\.0\.0\.1|localhost)(:\d+)?($|/))"i, url) + +function get_server_dir( + url :: AbstractString, + server :: Union{AbstractString, Nothing} = pkg_server(), +) + server === nothing && return + url == server || startswith(url, "$server/") || return + m = match(r"^\w+://([^\\/]+)(?:$|/)", server) + if m === nothing + @warn "malformed Pkg server value" server + return + end + isempty(Base.DEPOT_PATH) && return + invalid_filename_chars = [':', '/', '<', '>', '"', '/', '\\', '|', '?', '*'] + dir = join(replace(c -> c in invalid_filename_chars ? '_' : c, collect(String(m[1])))) + return joinpath(depots1(), "servers", dir) +end + +const AUTH_ERROR_HANDLERS = Pair{Union{String, Regex},Any}[] + +function handle_auth_error(url, err; verbose::Bool = false) + handled, should_retry = false, false + for (scheme, handler) in AUTH_ERROR_HANDLERS + occursin(scheme, url) || continue + handled, should_retry = handler(url, pkg_server(), err) + handled && break + end + handled && should_retry && return get_auth_header(url; verbose = verbose) + return nothing +end + +""" + register_auth_error_handler(urlscheme::Union{AbstractString, Regex}, f) + +Registers `f` as the topmost handler for failures in package server authentication. + +A handler is only invoked if `occursin(urlscheme, url)` is true (where `url` is the URL Pkg +is currently trying to download.) + +`f` must be a function that takes three input arguments `(url, pkgserver, err)`, where `url` is the +URL currently being downloaded, `pkgserver = Pkg.pkg_server()` the current package server, and +`err` is one of `no-auth-file`, `insecure-connection`, `malformed-file`, `no-access-token`, +`no-refresh-key` or `insecure-refresh-url`. + +The handler `f` needs to return a tuple of `Bool`s `(handled, should_retry)`. If `handled` is `false`, +the next handler in the stack will be called, otherwise handling terminates; `get_auth_header` is called again if `should_retry` +is `true`. + +`register_auth_error_handler` returns a zero-arg function that can be called to deregister the handler. +""" +function register_auth_error_handler(urlscheme::Union{AbstractString, Regex}, @nospecialize(f)) + unique!(pushfirst!(AUTH_ERROR_HANDLERS, urlscheme => f)) + return () -> deregister_auth_error_handler(urlscheme, f) +end + +""" + deregister_auth_error_handler(urlscheme::Union{AbstractString, Regex}, f) + +Removes `f` from the stack of authentication error handlers. +""" +function deregister_auth_error_handler(urlscheme::Union{String, Regex}, @nospecialize(f)) + filter!(handler -> !(handler.first == urlscheme && handler.second === f), AUTH_ERROR_HANDLERS) + return nothing +end + +function get_auth_header(url::AbstractString; verbose::Bool = false) + server_dir = get_server_dir(url) + server_dir === nothing && return + auth_file = joinpath(server_dir, "auth.toml") + isfile(auth_file) || return handle_auth_error(url, "no-auth-file"; verbose=verbose) + # TODO: check for insecure auth file permissions + if !is_secure_url(url) + @warn "refusing to send auth info over insecure connection" url=url + return handle_auth_error(url, "insecure-connection"; verbose=verbose) + end + # parse the auth file + auth_info = try + TOML.parsefile(auth_file) + catch err + @error "malformed auth file" file=auth_file err=err + return handle_auth_error(url, "malformed-file"; verbose=verbose) + end + # check for an auth token + if !haskey(auth_info, "access_token") + @warn "auth file without access_token field" file=auth_file + return handle_auth_error(url, "no-access-token"; verbose=verbose) + end + auth_token = auth_info["access_token"]::String + auth_header = "Authorization" => "Bearer $auth_token" + # handle token expiration and refresh + expires_at = Inf + if haskey(auth_info, "expires_at") + expires_at = min(expires_at, Float64(auth_info["expires_at"])::Float64) + end + if haskey(auth_info, "expires_in") + expires_at = min(expires_at, mtime(auth_file) + Float64(auth_info["expires_in"])::Float64) + end + # if token is good until ten minutes from now, use it + time_now = time() + if expires_at ≥ time_now + 10*60 # ten minutes + return auth_header + end + if !haskey(auth_info, "refresh_url") || !haskey(auth_info, "refresh_token") + if expires_at ≤ time_now + @warn "expired auth without refresh keys" file=auth_file + end + # try it anyway since we can't refresh + return something(handle_auth_error(url, "no-refresh-key"; verbose=verbose), auth_header) + end + refresh_url = auth_info["refresh_url"]::String + if !is_secure_url(refresh_url) + @warn "ignoring insecure auth refresh URL" url=refresh_url + return something(handle_auth_error(url, "insecure-refresh-url"; verbose=verbose), auth_header) + end + verbose && @info "Refreshing expired auth token..." file=auth_file + tmp = tempname() + refresh_token = auth_info["refresh_token"]::String + refresh_auth = "Authorization" => "Bearer $refresh_token" + try download(refresh_url, tmp, auth_header=refresh_auth, verbose=verbose) + catch err + @warn "token refresh failure" file=auth_file url=refresh_url err=err + rm(tmp, force=true) + return handle_auth_error(url, "token-refresh-failed"; verbose=verbose) + end + auth_info = try TOML.parsefile(tmp) + catch err + @warn "discarding malformed auth file" url=refresh_url err=err + rm(tmp, force=true) + return something(handle_auth_error(url, "malformed-file"; verbose=verbose), auth_header) + end + if !haskey(auth_info, "access_token") + if haskey(auth_info, "refresh_token") + auth_info["refresh_token"] = "*"^64 + end + @warn "discarding auth file without access token" auth=auth_info + rm(tmp, force=true) + return something(handle_auth_error(url, "no-access-token"; verbose=verbose), auth_header) + end + if haskey(auth_info, "expires_in") + expires_in = auth_info["expires_in"] + if expires_in isa Number + expires_at = floor(time_now + Float64(expires_in)::Float64) + # overwrite expires_at (avoids clock skew issues) + auth_info["expires_at"] = expires_at + end + end + let auth_info = auth_info + open(tmp, write=true) do io + TOML.print(io, auth_info, sorted=true) + end + end + mv(tmp, auth_file, force=true) + access_token = auth_info["access_token"]::String + return "Authorization" => "Bearer $access_token" +end + +# based on information in this post: +# https://github.community/t5/GitHub-Actions/Have-the-CI-environment-variable-set-by-default/m-p/32358/highlight/true#M1097 +const CI_VARIABLES = [ + "APPVEYOR", + "CI", + "CI_SERVER", + "CIRCLECI", + "CONTINUOUS_INTEGRATION", + "GITHUB_ACTIONS", + "GITLAB_CI", + "JULIA_CI", + "JULIA_PKGEVAL", + "JULIA_REGISTRYCI_AUTOMERGE", + "TF_BUILD", + "TRAVIS", +] + +function get_metadata_headers(url::AbstractString) + headers = Pair{String,String}[] + server = pkg_server() + server_dir = get_server_dir(url, server) + server_dir === nothing && return headers + push!(headers, "Julia-Pkg-Protocol" => "1.0") + push!(headers, "Julia-Pkg-Server" => server) + push!(headers, "Julia-Version" => string(VERSION)) + system = triplet(HostPlatform()) + push!(headers, "Julia-System" => system) + ci_info = String[] + for var in CI_VARIABLES + val = get(ENV, var, nothing) + state = val === nothing ? "n" : + lowercase(val) in ("true", "t", "1", "yes", "y") ? "t" : + lowercase(val) in ("false", "f", "0", "no", "n") ? "f" : "o" + push!(ci_info, "$var=$state") + end + push!(headers, "Julia-CI-Variables" => join(ci_info, ';')) + push!(headers, "Julia-Interactive" => string(isinteractive())) + for (key, val) in ENV + m = match(r"^JULIA_PKG_SERVER_([A-Z0-9_]+)$"i, key) + m === nothing && continue + val = strip(val) + isempty(val) && continue + words = split(m.captures[1], '_', keepempty=false) + isempty(words) && continue + hdr = "Julia-" * join(map(titlecase, words), '-') + any(hdr == k for (k, v) in headers) && continue + push!(headers, hdr => val) + end + return headers +end + +function download( + url::AbstractString, + dest::AbstractString; + verbose::Bool = false, + headers::Vector{Pair{String,String}} = Pair{String,String}[], + auth_header::Union{Pair{String,String}, Nothing} = nothing, + io::IO=stderr_f(), + progress::Union{Nothing,Function} = nothing, # (total, now) -> nothing +) + if auth_header === nothing + auth_header = get_auth_header(url, verbose=verbose) + end + if auth_header !== nothing + push!(headers, auth_header) + end + for header in get_metadata_headers(url) + push!(headers, header) + end + + do_fancy = verbose && can_fancyprint(io) + progress = if !isnothing(progress) + progress + elseif do_fancy + bar = MiniProgressBar(header="Downloading", color=Base.info_color()) + start_progress(io, bar) + let bar=bar + (total, now) -> begin + bar.max = total + bar.current = now + # Downloads.download attatches the progress indicator to the header request too + # which is only ~100 bytes, and will report as 0 - 100% progress immediately + # then dip down to 0 before the actual download starts. So we only show the + # progress bar once the real download starts. + total > 1000 && show_progress(io, bar) + end + end + else + nothing + end + try + Downloads.download(url, dest; headers, progress) + finally + do_fancy && end_progress(io, bar) + end +end + +""" + download_verify( + url::AbstractString, + hash::Union{AbstractString, Nothing}, + dest::AbstractString; + verbose::Bool = false, + force::Bool = false, + quiet_download::Bool = false, + ) + +Download file located at `url`, verify it matches the given `hash`, and throw +an error if anything goes wrong. If `dest` already exists, just verify it. If +`force` is set to `true`, overwrite the given file if it exists but does not +match the given `hash`. + +This method returns `true` if the file was downloaded successfully, `false` +if an existing file was removed due to the use of `force`, and throws an error +if `force` is not set and the already-existent file fails verification, or if +`force` is set, verification fails, and then verification fails again after +redownloading the file. + +If `quiet_download` is set to `false`, this method will print to +stdout when downloading a new file. If it is set to `true` (default, and `verbose` is +set to `false`) the downloading process will be completely silent. If +`verbose` is set to `true`, messages about integrity verification will be +printed in addition to messages regarding downloading. +""" +function download_verify( + url::AbstractString, + hash::Union{AbstractString, Nothing}, + dest::AbstractString; + verbose::Bool = false, + force::Bool = false, + quiet_download::Bool = false, + progress::Union{Nothing,Function} = nothing, # (total, now) -> nothing +) + # Whether the file existed in the first place + file_existed = false + + if isfile(dest) + file_existed = true + if verbose + @info("Destination file $(dest) already exists, verifying...") + end + + # verify download, if it passes, return happy. If it fails, (and + # `force` is `true`, re-download!) + if hash !== nothing && verify(dest, hash; verbose=verbose) + return true + elseif !force + error("Verification failed, not overwriting $(dest)") + end + end + + # Make sure the containing folder exists + mkpath(dirname(dest)) + + # Download the file, optionally continuing + attempts = 3 + for i in 1:attempts + try + download(url, dest; verbose=verbose || !quiet_download, progress) + break + catch err + @debug "download and verify failed on attempt $i/$attempts" url dest err + # for system errors like `no space left on device` exit after first try + if err isa SystemError || i == attempts + rethrow() + else + sleep(1) + end + end + end + details = String[] + if hash !== nothing && !verify(dest, hash; verbose, details) + # If the file already existed, it's possible the initially downloaded chunk + # was bad. If verification fails after downloading, auto-delete the file + # and start over from scratch. + if file_existed + if verbose + @info("Continued download didn't work, restarting from scratch") + end + Base.rm(dest; force=true) + + # Download and verify from scratch + download(url, dest; verbose=verbose || !quiet_download) + if hash !== nothing && !verify(dest, hash; verbose, details) + @goto verification_failed + end + else + @label verification_failed + # If it didn't verify properly and we didn't resume, something is + # very wrong and we must complain mightily. + details_indented = join(map(s -> " $s", split(join(details, "\n"), '\n')), "\n") + error("Verification failed:\n" * details_indented) + end + end + + # If the file previously existed, this means we removed it (due to `force`) + # and redownloaded, so return `false`. If it didn't exist, then this means + # that we successfully downloaded it, so return `true`. + return !file_existed +end + +# TODO: can probably delete this, only affects tests +function copy_symlinks() + var = get(ENV, "BINARYPROVIDER_COPYDEREF", "") + lowercase(var) in ("true", "t", "yes", "y", "1") ? true : + lowercase(var) in ("false", "f", "no", "n", "0") ? false : nothing +end + +function unpack( + tarball_path::AbstractString, + dest::AbstractString; + verbose::Bool = false, +) + Tar.extract(`$(exe7z()) x $tarball_path -so`, dest, copy_symlinks = copy_symlinks()) +end + +""" + package(src_dir::AbstractString, tarball_path::AbstractString) + +Compress `src_dir` into a tarball located at `tarball_path`. +""" +function package(src_dir::AbstractString, tarball_path::AbstractString; io=stderr_f()) + rm(tarball_path, force=true) + cmd = `$(exe7z()) a -si -tgzip -mx9 $tarball_path` + open(pipeline(cmd, stdout=devnull, stderr=io), write=true) do io + Tar.create(src_dir, io) + end +end + +""" + download_verify_unpack( + url::AbstractString, + hash::Union{AbstractString, Nothing}, + dest::AbstractString; + tarball_path = nothing, + ignore_existence::Bool = false, + force::Bool = false, + verbose::Bool = false, + quiet_download::Bool = false, + io::IO=stderr, + ) + +Helper method to download tarball located at `url`, verify it matches the +given `hash`, then unpack it into folder `dest`. In general, the method +`install()` should be used to download and install tarballs into a `Prefix`; +this method should only be used if the extra functionality of `install()` is +undesired. + +If `tarball_path` is specified, the given `url` will be downloaded to +`tarball_path`, and it will not be removed after downloading and verification +is complete. If it is not specified, the tarball will be downloaded to a +temporary location, and removed after verification is complete. + +If `force` is specified, a verification failure will cause `tarball_path` to be +deleted (if it exists), the `dest` folder to be removed (if it exists) and the +tarball to be redownloaded and reverified. If the verification check is failed +a second time, an exception is raised. If `force` is not specified, a +verification failure will result in an immediate raised exception. + +If `ignore_existence` is set, the tarball is unpacked even if the destination +directory already exists. + +Returns `true` if a tarball was actually unpacked, `false` if nothing was +changed in the destination prefix. +""" +function download_verify_unpack( + url::AbstractString, + hash::Union{AbstractString, Nothing}, + dest::AbstractString; + tarball_path = nothing, + ignore_existence::Bool = false, + force::Bool = false, + verbose::Bool = false, + quiet_download::Bool = false, + io::IO=stderr_f(), + progress::Union{Nothing,Function} = nothing, # (total, now) -> nothing +) + # First, determine whether we should keep this tarball around + remove_tarball = false + if tarball_path === nothing + remove_tarball = true + + function url_ext(url) + url = basename(url) + + # Chop off urlparams + qidx = findfirst(isequal('?'), url) + if qidx !== nothing + url = url[1:qidx] + end + + # Try to detect extension + dot_idx = findlast(isequal('.'), url) + if dot_idx === nothing + return nothing + end + + return url[dot_idx+1:end] + end + + # If extension of url contains a recognized extension, use it, otherwise use ".gz" + ext = url_ext(url) + if !(ext in ["tar", "gz", "tgz", "bz2", "xz"]) + ext = "gz" + end + + # Work around windows limitations regarding tempname() + tarball_path = "$(tempname())-download.$(ext)" + tries = 0 + while isfile(tarball_path) && tries < 100 + tarball_path = "$(tempname())-download.$(ext)" + tries += 1 + end + if tries >= 100 + error("Unable to generate unused tempname! Clean up your temporary folder $(dirname(tempname())) and try again.") + end + end + + # Download the tarball; if it already existed and we needed to remove it + # then we should remove the unpacked path as well + should_delete = !download_verify(url, hash, tarball_path; force, verbose, quiet_download, progress) + if should_delete + if verbose + @info("Removing dest directory $(dest) as source tarball changed") + end + Base.rm(dest; recursive=true, force=true) + end + + # If the destination path already exists, don't bother to unpack + if !ignore_existence && isdir(dest) + if verbose + @info("Destination directory $(dest) already exists, returning") + end + + # Signify that we didn't do any unpacking + return false + end + + try + if verbose + @info("Unpacking $(tarball_path) into $(dest)...") + end + isnothing(progress) || progress(10000, 10000; status="unpacking") + open(`$(exe7z()) x $tarball_path -so`) do io + Tar.extract(io, dest, copy_symlinks = copy_symlinks()) + end + finally + if remove_tarball + Base.rm(tarball_path) + # Remove cached tarball hash, if it exists. + Base.rm(string(tarball_path, ".sha256"); force=true) + end + end + + # Signify that we did some unpacking! + return true +end + + +""" + verify(path::AbstractString, hash::AbstractString; + verbose::Bool = false, report_cache_status::Bool = false, + details::Union{Vector{String},Nothing} = nothing) + +Given a file `path` and a `hash`, calculate the SHA256 of the file and compare +it to `hash`. This method caches verification results in a `"\$(path).sha256"` +file to accelerate reverification of files that have been previously verified. +If no `".sha256"` file exists, a full verification will be done and the file +will be created, with the calculated hash being stored within the `".sha256"` +file. If a `".sha256"` file does exist, its contents are checked to ensure +that the hash contained within matches the given `hash` parameter, and its +modification time shows that the file located at `path` has not been modified +since the last verification. + +If `report_cache_status` is set to `true`, then the return value will be a +`Symbol` giving a granular status report on the state of the hash cache, in +addition to the `true`/`false` signifying whether verification completed +successfully. + +If `details` is provided, any pertinent detail will be pushed to it rather than logged. +""" +function verify(path::AbstractString, hash::AbstractString; verbose::Bool = false, + report_cache_status::Bool = false, hash_path::AbstractString="$(path).sha256", + details::Union{Vector{String},Nothing} = nothing) + + # Check hash string format + if !occursin(r"^[0-9a-f]{64}$"i, hash) + msg = "Hash value must be 64 hexadecimal characters (256 bits), " + if !isascii(hash) + msg *= "given hash value is non-ASCII" + elseif occursin(r"^[0-9a-f]*$"i, hash) + msg *= "given hash value has the wrong length ($(length(hash)))" + else + msg *= "given hash value contains non-hexadecimal characters" + end + msg *= ": $(repr(hash))" + error(msg) + end + hash = lowercase(hash) + + # Check to see if the hash cache is consistent + status = :hash_consistent + + # First, it must exist + if isfile(hash_path) + # Next, it must contain the same hash as what we're verifying against + if read(hash_path, String) == hash + # Next, it must be no older than the actual path + if stat(hash_path).mtime >= stat(path).mtime + # If all of that is true, then we're good! + if verbose + @info("Hash cache is consistent, returning true") + end + status = :hash_cache_consistent + + # If we're reporting our status, then report it! + if report_cache_status + return true, status + else + return true + end + else + if verbose + @info("File has been modified, hash cache invalidated") + end + status = :file_modified + end + else + if verbose + @info("Verification hash mismatch, hash cache invalidated") + end + status = :hash_cache_mismatch + end + else + if verbose + @info("No hash cache found") + end + status = :hash_cache_missing + end + + calc_hash = open(path) do file + bytes2hex(SHA.sha256(file)) + end + @assert occursin(r"^[0-9a-f]{64}$", calc_hash) + + if verbose + @info("Calculated hash $calc_hash for file $path") + end + + if calc_hash != hash + msg = "Hash Mismatch!\n" + msg *= " Expected sha256: $hash\n" + msg *= " Calculated sha256: $calc_hash" + if isnothing(details) + @error(msg) + else + push!(details, msg) + end + if report_cache_status + return false, :hash_mismatch + else + return false + end + end + + # Try to save a hash cache if everything worked out fine + try + open(hash_path, "w") do file + write(file, hash) + end + catch e + if isa(e, InterruptException) + rethrow(e) + end + + if verbose + @warn("Unable to create hash cache file $(hash_path)") + end + end + + if report_cache_status + return true, status + else + return true + end +end + +# Verify the git-tree-sha1 hash of a compressed archive. +function verify_archive_tree_hash(tar_gz::AbstractString, expected_hash::Base.SHA1) + # This can fail because unlike sha256 verification of the downloaded + # tarball, tree hash verification requires that the file can i) be + # decompressed and ii) is a proper archive. + calc_hash = try + Base.SHA1(open(Tar.tree_hash, `$(exe7z()) x $tar_gz -so`)) + catch err + @warn "unable to decompress and read archive" exception=err + return false + end + if calc_hash != expected_hash + @warn "tarball content does not match expected git-tree-sha1" + return false + end + return true +end + +end # module PlatformEngines diff --git a/stdlib/ArtifactDownloads/src/Utils.jl b/stdlib/ArtifactDownloads/src/Utils.jl new file mode 100644 index 0000000000000..0260d500e6868 --- /dev/null +++ b/stdlib/ArtifactDownloads/src/Utils.jl @@ -0,0 +1,141 @@ +module Utils + +import FileWatching, TOML, Dates + +struct ArtifactsError <: Exception + msg::String +end +artifactserror(msg::String...) = throw(ArtifactsError(join(msg))) +Base.showerror(io::IO, err::ArtifactsError) = print(io, err.msg) + +# +function pkg_server() + server = get(ENV, "JULIA_PKG_SERVER", "https://pkg.julialang.org") + isempty(server) && return nothing + startswith(server, r"\w+://") || (server = "https://$server") + return rstrip(server, '/') +end + +can_fancyprint(io::IO) = ((io isa Base.TTY) || (io isa IOContext{IO} && io.io isa Base.TTY)) && (get(ENV, "CI", nothing) != "true") + +function printpkgstyle(io::IO, cmd::Symbol, text::String, ignore_indent::Bool=false; color=:green) + indent = textwidth(string(:Precompiling)) # "Precompiling" is the longest operation + ignore_indent && (indent = 0) + printstyled(io, lpad(string(cmd), indent), color=color, bold=true) + println(io, " ", text) +end + +# For globally overriding in e.g. tests +const DEFAULT_IO = Ref{Union{IO,Nothing}}(nothing) + +# See discussion in https://github.com/JuliaLang/julia/pull/52249 +function unstableio(@nospecialize(io::IO)) + # Needed to prevent specialization https://github.com/JuliaLang/julia/pull/52249#discussion_r1401199265 + _io = Base.inferencebarrier(io) + IOContext{IO}( + _io, + get(_io,:color,false) ? Base.ImmutableDict{Symbol,Any}(:color, true) : Base.ImmutableDict{Symbol,Any}() + ) +end +stderr_f() = something(DEFAULT_IO[], unstableio(stderr)) +stdout_f() = something(DEFAULT_IO[], unstableio(stdout)) + +function write_env_usage(source_file::AbstractString, usage_filepath::AbstractString) + # Don't record ghost usage + !isfile(source_file) && return + + # Ensure that log dir exists + !ispath(logdir()) && mkpath(logdir()) + + usage_file = joinpath(logdir(), usage_filepath) + timestamp = now() + + ## Atomically write usage file using process id locking + FileWatching.mkpidlock(usage_file * ".pid", stale_age = 3) do + usage = if isfile(usage_file) + TOML.parsefile(usage_file) + else + Dict{String, Any}() + end + + # record new usage + usage[source_file] = [Dict("time" => timestamp)] + + # keep only latest usage info + for k in keys(usage) + times = map(usage[k]) do d + if haskey(d, "time") + Dates.DateTime(d["time"]) + else + # if there's no time entry because of a write failure be conservative and mark it as being used now + @debug "Usage file `$usage_filepath` has a missing `time` entry for `$k`. Marking as used `now()`" + Dates.now() + end + end + usage[k] = [Dict("time" => maximum(times))] + end + + open(usage_file, "w") do io + TOML.print(io, usage, sorted=true) + end + end + return +end + +depots() = Base.DEPOT_PATH +function depots1() + d = depots() + isempty(d) && artifactserror("no depots found in DEPOT_PATH") + return d[1] +end + +logdir(depot = depots1()) = joinpath(depot, "logs") + +function can_symlink(dir::AbstractString) + # guaranteed to be an empty directory + link_path = joinpath(dir, "link") + return try + symlink("target", link_path) + true + catch err + err isa Base.IOError || rethrow() + false + finally + rm(link_path, force=true) + end +end + +function set_readonly(path) + for (root, dirs, files) in walkdir(path) + for file in files + filepath = joinpath(root, file) + # `chmod` on a link would change the permissions of the target. If + # the link points to a file within the same root, it will be + # chmod'ed anyway, but we don't want to make directories read-only. + # It's better not to mess with the other cases (links to files + # outside of the root, links to non-file/non-directories, etc...) + islink(filepath) && continue + fmode = filemode(filepath) + @static if Sys.iswindows() + if Sys.isexecutable(filepath) + fmode |= 0o111 + end + end + try + chmod(filepath, fmode & (typemax(fmode) ⊻ 0o222)) + catch + end + end + end + return nothing +end +set_readonly(::Nothing) = nothing + +# See loading.jl +const TOML_CACHE = Base.TOMLCache(Base.TOML.Parser{Dates}()) +const TOML_LOCK = ReentrantLock() +# Some functions mutate the returning Dict so return a copy of the cached value here +parse_toml(toml_file::AbstractString) = + Base.invokelatest(deepcopy_toml, Base.parsed_toml(toml_file, TOML_CACHE, TOML_LOCK))::Dict{String, Any} + +end diff --git a/stdlib/Artifacts/src/Artifacts.jl b/stdlib/Artifacts/src/Artifacts.jl index 9bca72f6c7a14..4de3ff4a42e3e 100644 --- a/stdlib/Artifacts/src/Artifacts.jl +++ b/stdlib/Artifacts/src/Artifacts.jl @@ -759,6 +759,6 @@ precompile(NamedTuple{(:pkg_uuid,)}, (Tuple{Base.UUID},)) precompile(Core.kwfunc(load_artifacts_toml), (NamedTuple{(:pkg_uuid,), Tuple{Base.UUID}}, typeof(load_artifacts_toml), String)) precompile(parse_mapping, (String, String, String)) precompile(parse_mapping, (Dict{String, Any}, String, String)) -precompile(Tuple{typeof(Artifacts._artifact_str), Module, String, Base.SubString{String}, String, Base.Dict{String, Any}, Base.SHA1, Base.BinaryPlatforms.Platform, Any}) +precompile(Tuple{typeof(Artifacts._artifact_str), Module, String, Base.SubString{String}, String, Base.Dict{String, Any}, Base.SHA1, Base.BinaryPlatforms.Platform, Val{nothing}}) end # module Artifacts diff --git a/stdlib/Makefile b/stdlib/Makefile index ebc40c9db2b12..efb5f5d513c8d 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -40,7 +40,7 @@ endef $(foreach jll,$(JLLS),$(eval $(call download-artifacts-toml,$(jll)))) -STDLIBS = Artifacts Base64 CRC32c Dates FileWatching \ +STDLIBS = Artifacts ArtifactDownloads Base64 CRC32c Dates FileWatching \ Future InteractiveUtils Libdl LibGit2 LinearAlgebra Logging \ Markdown Mmap Printf Profile Random REPL Serialization \ SharedArrays Sockets Test TOML Unicode UUIDs \ diff --git a/stdlib/Manifest.toml b/stdlib/Manifest.toml index f9fb307190838..c210eb3833c01 100644 --- a/stdlib/Manifest.toml +++ b/stdlib/Manifest.toml @@ -2,12 +2,17 @@ julia_version = "1.12.0-DEV" manifest_format = "2.0" -project_hash = "d3a1f6b706609fe0c59521e1d770be6e2b8c489d" +project_hash = "c59a70b2cde34cfe2cc3e07f856b2c91946c224c" [[deps.ArgTools]] uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" version = "1.1.2" +[[deps.ArtifactDownloads]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Printf", "SHA", "TOML", "Tar", "p7zip_jll"] +uuid = "6ee59702-05f1-4877-b811-03813c0d76c5" +version = "1.11.0" + [[deps.Artifacts]] uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" version = "1.11.0" @@ -78,10 +83,10 @@ version = "18.1.7+2" [[deps.LLVMLibUnwind_jll]] deps = ["Artifacts", "Libdl"] uuid = "47c5dbc3-30ba-59ef-96a6-123e260183d9" -version = "12.0.1+0" +version = "14.0.6+0" [[deps.LazyArtifacts]] -deps = ["Artifacts", "Pkg"] +deps = ["ArtifactDownloads", "Artifacts"] uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" version = "1.11.0" @@ -113,7 +118,7 @@ version = "1.11.0+1" [[deps.LibUV_jll]] deps = ["Artifacts", "Libdl"] uuid = "183b4373-6708-53ba-ad28-60e28bb38547" -version = "2.0.1+17" +version = "2.0.1+18" [[deps.LibUnwind_jll]] deps = ["Artifacts", "Libdl"] diff --git a/stdlib/Project.toml b/stdlib/Project.toml index cc7ba99dd4e4f..8ffc797e1ead7 100644 --- a/stdlib/Project.toml +++ b/stdlib/Project.toml @@ -1,6 +1,7 @@ [deps] ArgTools = "0dad84c5-d112-42e6-8d28-ef12dabb789f" Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +ArtifactDownloads = "6ee59702-05f1-4877-b811-03813c0d76c5" Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" CRC32c = "8bf52ea8-c179-5cab-976a-9e18b702a9bc" CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae" diff --git a/stdlib/stdlib.mk b/stdlib/stdlib.mk index b79059d3368b1..af60e90898c89 100644 --- a/stdlib/stdlib.mk +++ b/stdlib/stdlib.mk @@ -3,8 +3,8 @@ STDLIBS_WITHIN_SYSIMG := \ LinearAlgebra Sockets INDEPENDENT_STDLIBS := \ - ArgTools Base64 CRC32c Dates DelimitedFiles Distributed Downloads Future \ - InteractiveUtils JuliaSyntaxHighlighting LazyArtifacts LibGit2 LibCURL Logging \ + ArtifactDownloads ArgTools Base64 CRC32c Dates DelimitedFiles Distributed Downloads \ + Future InteractiveUtils JuliaSyntaxHighlighting LazyArtifacts LibGit2 LibCURL Logging \ Markdown Mmap NetworkOptions Profile Printf Pkg REPL Serialization SharedArrays \ SparseArrays Statistics StyledStrings SuiteSparse_jll Tar Test TOML Unicode UUIDs \ dSFMT_jll GMP_jll libLLVM_jll LLD_jll LLVMLibUnwind_jll LibUnwind_jll LibUV_jll \ diff --git a/test/choosetests.jl b/test/choosetests.jl index 96d230d185c71..630cdee0c91fa 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -35,6 +35,7 @@ const TESTNAMES = [ const INTERNET_REQUIRED_LIST = [ "Artifacts", "Downloads", + "ArtifactDownloads", "LazyArtifacts", "LibCURL", "LibGit2", From 1f1d784d69accb92d2d6907c55d8f5fc4e60c400 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Thu, 12 Sep 2024 12:02:24 -0400 Subject: [PATCH 2/3] rm LibGit2 dependency --- stdlib/ArtifactDownloads/Project.toml | 1 - .../src/ArtifactDownloads.jl | 12 +- stdlib/ArtifactDownloads/src/GitTools.jl | 353 ------------------ .../ArtifactDownloads/src/GitTreeHashTools.jl | 133 +++++++ .../ArtifactDownloads/src/PlatformEngines.jl | 4 +- stdlib/ArtifactDownloads/src/Utils.jl | 18 +- stdlib/Manifest.toml | 2 +- 7 files changed, 159 insertions(+), 364 deletions(-) delete mode 100644 stdlib/ArtifactDownloads/src/GitTools.jl create mode 100644 stdlib/ArtifactDownloads/src/GitTreeHashTools.jl diff --git a/stdlib/ArtifactDownloads/Project.toml b/stdlib/ArtifactDownloads/Project.toml index 07374040c711d..d75c274784da4 100644 --- a/stdlib/ArtifactDownloads/Project.toml +++ b/stdlib/ArtifactDownloads/Project.toml @@ -7,7 +7,6 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6" FileWatching = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" -LibGit2 = "76f85450-5226-5b5a-8eaa-529ad045b433" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" p7zip_jll = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" diff --git a/stdlib/ArtifactDownloads/src/ArtifactDownloads.jl b/stdlib/ArtifactDownloads/src/ArtifactDownloads.jl index cdfeed8c3bc98..5529baa7044da 100644 --- a/stdlib/ArtifactDownloads/src/ArtifactDownloads.jl +++ b/stdlib/ArtifactDownloads/src/ArtifactDownloads.jl @@ -2,7 +2,7 @@ module ArtifactDownloads include("Utils.jl") include("MiniProgressBars.jl") -include("GitTools.jl") +include("GitTreeHashTools.jl") include("PlatformEngines.jl") import TOML @@ -11,8 +11,8 @@ import SHA: sha256 import ..Utils: set_readonly, pkg_server, can_fancyprint, stderr_f, printpkgstyle, write_env_usage, can_symlink, parse_toml -import ..GitTools import ..PlatformEngines: package, download_verify_unpack +import ..GitTreeHashTools: git_tree_hash import Base.BinaryPlatforms: AbstractPlatform, HostPlatform, triplet import Artifacts: artifact_names, ARTIFACTS_DIR_OVERRIDE, ARTIFACT_OVERRIDES, artifact_path, @@ -45,7 +45,7 @@ function create_artifact(f::Function) f(temp_dir) # Calculate the tree hash for this temporary directory - artifact_hash = SHA1(GitTools.tree_hash(temp_dir)) + artifact_hash = SHA1(git_tree_hash(temp_dir)) # If we created a dupe, just let the temp directory get destroyed. It's got the # same contents as whatever already exists after all, so it doesn't matter. Only @@ -135,7 +135,7 @@ function verify_artifact(hash::SHA1; honor_overrides::Bool=false) end # Otherwise actually run the verification - return all(hash.bytes .== GitTools.tree_hash(artifact_path(hash))) + return all(hash.bytes .== git_tree_hash(artifact_path(hash))) end """ @@ -179,7 +179,7 @@ Writes a mapping of `name` -> `hash` within the given `(Julia)Artifacts.toml` fi `platform` is not `nothing`, this artifact is marked as platform-specific, and will be a multi-mapping. It is valid to bind multiple artifacts with the same name, but different `platform`s and `hash`'es within the same `artifacts_toml`. If `force` is set -to `true`, this will overwrite a pre-existant mapping, otherwise an error is raised. +to `true`, this will overwrite a pre-existent mapping, otherwise an error is raised. `download_info` is an optional vector that contains tuples of URLs and a hash. These URLs will be listed as possible locations where this artifact can be obtained. If `lazy` @@ -334,7 +334,7 @@ function download_artifact( try download_verify_unpack(tarball_url, tarball_hash, temp_dir, ignore_existence=true, verbose=verbose, quiet_download=quiet_download, io=io) - calc_hash = SHA1(GitTools.tree_hash(temp_dir)) + calc_hash = SHA1(git_tree_hash(temp_dir)) # Did we get what we expected? If not, freak out. if calc_hash.bytes != tree_hash.bytes diff --git a/stdlib/ArtifactDownloads/src/GitTools.jl b/stdlib/ArtifactDownloads/src/GitTools.jl deleted file mode 100644 index c687bab278664..0000000000000 --- a/stdlib/ArtifactDownloads/src/GitTools.jl +++ /dev/null @@ -1,353 +0,0 @@ -module GitTools - -import SHA -import LibGit2 - -import ..MiniProgressBars: MiniProgressBar, start_progress, end_progress, show_progress -import ..Utils: can_fancyprint, printpkgstyle, stdout_f - -use_cli_git() = Base.get_bool_env("JULIA_PKG_USE_CLI_GIT", false) - -function transfer_progress(progress::Ptr{LibGit2.TransferProgress}, p::Any) - progress = unsafe_load(progress) - @assert haskey(p, :transfer_progress) - bar = p[:transfer_progress] - @assert typeof(bar) == MiniProgressBar - if progress.total_deltas != 0 - bar.header = "Resolving Deltas:" - bar.max = progress.total_deltas - bar.current = progress.indexed_deltas - else - bar.max = progress.total_objects - bar.current = progress.received_objects - end - show_progress(stdout_f(), bar) - return Cint(0) -end - -const GIT_REGEX = - r"^(?:(?git|ssh|https)://)?(?:[\w\.\+\-:]+@)?(?.+?)(?()/|:)(?.+?)(?:\.git)?$" -const GIT_PROTOCOLS = Dict{String, Union{Nothing, String}}() -const GIT_USERS = Dict{String, Union{Nothing, String}}() - -@deprecate setprotocol!(proto::Union{Nothing, AbstractString}) setprotocol!(protocol = proto) false - -function setprotocol!(; - domain::AbstractString="github.com", - protocol::Union{Nothing, AbstractString}=nothing, - user::Union{Nothing, AbstractString}=(protocol == "ssh" ? "git" : nothing) -) - domain = lowercase(domain) - GIT_PROTOCOLS[domain] = protocol - GIT_USERS[domain] = user -end - -function normalize_url(url::AbstractString) - # LibGit2 is fussy about trailing slash. Make sure there is none. - url = rstrip(url, '/') - m = match(GIT_REGEX, url) - m === nothing && return url - - host = m[:hostname] - path = "$(m[:path]).git" - - proto = get(GIT_PROTOCOLS, lowercase(host), nothing) - - if proto === nothing - url - else - user = get(GIT_USERS, lowercase(host), nothing) - user = user === nothing ? "" : "$user@" - - "$proto://$user$host/$path" - end -end - -function ensure_clone(io::IO, target_path, url; kwargs...) - if ispath(target_path) - return LibGit2.GitRepo(target_path) - else - return GitTools.clone(io, url, target_path; kwargs...) - end -end - -function checkout_tree_to_path(repo::LibGit2.GitRepo, tree::LibGit2.GitObject, path::String) - GC.@preserve path begin - opts = LibGit2.CheckoutOptions( - checkout_strategy = LibGit2.Consts.CHECKOUT_FORCE, - target_directory = Base.unsafe_convert(Cstring, path) - ) - LibGit2.checkout_tree(repo, tree, options=opts) - end -end - -function clone(io::IO, url, source_path; header=nothing, credentials=nothing, kwargs...) - url = String(url)::String - source_path = String(source_path)::String - @assert !isdir(source_path) || isempty(readdir(source_path)) - url = normalize_url(url) - printpkgstyle(io, :Cloning, header === nothing ? "git-repo `$url`" : header) - bar = MiniProgressBar(header = "Fetching:", color = Base.info_color()) - fancyprint = can_fancyprint(io) - callbacks = if fancyprint - LibGit2.Callbacks( - :transfer_progress => ( - @cfunction(transfer_progress, Cint, (Ptr{LibGit2.TransferProgress}, Any)), - bar, - ) - ) - else - LibGit2.Callbacks() - end - fancyprint && start_progress(io, bar) - if credentials === nothing - credentials = LibGit2.CachedCredentials() - end - try - if use_cli_git() - cmd = `git clone --quiet $url $source_path` - try - run(pipeline(cmd; stdout=devnull)) - catch err - artifactserror("The command $(cmd) failed, error: $err") - end - return LibGit2.GitRepo(source_path) - else - mkpath(source_path) - return LibGit2.clone(url, source_path; callbacks=callbacks, credentials=credentials, kwargs...) - end - catch err - rm(source_path; force=true, recursive=true) - err isa LibGit2.GitError || err isa InterruptException || rethrow() - if err isa InterruptException - artifactserror("git clone of `$url` interrupted") - elseif (err.class == LibGit2.Error.Net && err.code == LibGit2.Error.EINVALIDSPEC) || - (err.class == LibGit2.Error.Repository && err.code == LibGit2.Error.ENOTFOUND) - artifactserror("git repository not found at `$(url)`") - else - artifactserror("failed to clone from $(url), error: $err") - end - finally - Base.shred!(credentials) - fancyprint && end_progress(io, bar) - end -end - -function fetch(io::IO, repo::LibGit2.GitRepo, remoteurl=nothing; header=nothing, credentials=nothing, refspecs=[""], kwargs...) - if remoteurl === nothing - remoteurl = LibGit2.with(LibGit2.get(LibGit2.GitRemote, repo, "origin")) do remote - LibGit2.url(remote) - end - end - fancyprint = can_fancyprint(io) - remoteurl = normalize_url(remoteurl) - printpkgstyle(io, :Updating, header === nothing ? "git-repo `$remoteurl`" : header) - bar = MiniProgressBar(header = "Fetching:", color = Base.info_color()) - fancyprint = can_fancyprint(io) - callbacks = if fancyprint - LibGit2.Callbacks( - :transfer_progress => ( - @cfunction(transfer_progress, Cint, (Ptr{LibGit2.TransferProgress}, Any)), - bar, - ) - ) - else - LibGit2.Callbacks() - end - fancyprint && start_progress(io, bar) - if credentials === nothing - credentials = LibGit2.CachedCredentials() - end - try - if use_cli_git() - let remoteurl=remoteurl - cd(LibGit2.path(repo)) do - cmd = `git fetch -q $remoteurl $(only(refspecs))` - try - run(pipeline(cmd; stdout=devnull)) - catch err - artifactserror("The command $(cmd) failed, error: $err") - end - end - end - else - return LibGit2.fetch(repo; remoteurl=remoteurl, callbacks=callbacks, refspecs=refspecs, kwargs...) - end - catch err - err isa LibGit2.GitError || rethrow() - if (err.class == LibGit2.Error.Repository && err.code == LibGit2.Error.ERROR) - artifactserror("Git repository not found at '$(remoteurl)'") - else - artifactserror("failed to fetch from $(remoteurl), error: $err") - end - finally - Base.shred!(credentials) - fancyprint && end_progress(io, bar) - end -end - - -# This code gratefully adapted from https://github.com/simonbyrne/GitX.jl -@enum GitMode mode_dir=0o040000 mode_normal=0o100644 mode_executable=0o100755 mode_symlink=0o120000 mode_submodule=0o160000 -Base.string(mode::GitMode) = string(UInt32(mode); base=8) -Base.print(io::IO, mode::GitMode) = print(io, string(mode)) - -function gitmode(path::AbstractString) - # Windows doesn't deal with executable permissions in quite the same way, - # `stat()` gives a different answer than we actually want, so we use - # `isexecutable()` which uses `uv_fs_access()` internally. On other - # platforms however, we just want to check via `stat()`. - function isexec(p) - @static if Sys.iswindows() - return Sys.isexecutable(p) - end - return !iszero(filemode(p) & 0o100) - end - if islink(path) - return mode_symlink - elseif isdir(path) - return mode_dir - elseif isexec(path) - return mode_executable - else - return mode_normal - end -end - -""" - blob_hash(HashType::Type, path::AbstractString) - -Calculate the git blob hash of a given path. -""" -function blob_hash(::Type{HashType}, path::AbstractString) where HashType - ctx = HashType() - if islink(path) - datalen = length(readlink(path)) - else - datalen = filesize(path) - end - - # First, the header - SHA.update!(ctx, Vector{UInt8}("blob $(datalen)\0")) - - # Next, read data in in chunks of 4KB - buff = Vector{UInt8}(undef, 4*1024) - - try - if islink(path) - SHA.update!(ctx, Vector{UInt8}(readlink(path))) - else - open(path, "r") do io - while !eof(io) - num_read = readbytes!(io, buff) - SHA.update!(ctx, buff, num_read) - end - end - end - catch e - if isa(e, InterruptException) - rethrow(e) - end - @warn("Unable to open $(path) for hashing; git-tree-sha1 likely suspect") - end - - # Finish it off and return the digest! - return SHA.digest!(ctx) -end -blob_hash(path::AbstractString) = blob_hash(SHA.SHA1_CTX, path) - -""" - contains_files(root::AbstractString) - -Helper function to determine whether a directory contains files; e.g. it is a -direct parent of a file or it contains some other directory that itself is a -direct parent of a file. This is used to exclude directories from tree hashing. -""" -function contains_files(path::AbstractString) - st = lstat(path) - ispath(st) || throw(ArgumentError("non-existent path: $(repr(path))")) - isdir(st) || return true - for p in readdir(path) - contains_files(joinpath(path, p)) && return true - end - return false -end - - -""" - tree_hash(HashType::Type, root::AbstractString) - -Calculate the git tree hash of a given path. -""" -function tree_hash(::Type{HashType}, root::AbstractString; debug_out::Union{IO,Nothing} = nothing, indent::Int=0) where HashType - entries = Tuple{String, Vector{UInt8}, GitMode}[] - for f in sort(readdir(root; join=true); by = f -> gitmode(f) == mode_dir ? f*"/" : f) - # Skip `.git` directories - if basename(f) == ".git" - continue - end - - filepath = abspath(f) - mode = gitmode(filepath) - if mode == mode_dir - # If this directory contains no files, then skip it - contains_files(filepath) || continue - - # Otherwise, hash it up! - child_stream = nothing - if debug_out !== nothing - child_stream = IOBuffer() - end - hash = tree_hash(HashType, filepath; debug_out=child_stream, indent=indent+1) - if debug_out !== nothing - indent_str = "| "^indent - println(debug_out, "$(indent_str)+ [D] $(basename(filepath)) - $(bytes2hex(hash))") - print(debug_out, String(take!(child_stream))) - println(debug_out, indent_str) - end - else - hash = blob_hash(HashType, filepath) - if debug_out !== nothing - indent_str = "| "^indent - mode_str = mode == mode_normal ? "F" : "X" - println(debug_out, "$(indent_str)[$(mode_str)] $(basename(filepath)) - $(bytes2hex(hash))") - end - end - push!(entries, (basename(filepath), hash, mode)) - end - - content_size = 0 - for (n, h, m) in entries - content_size += ndigits(UInt32(m); base=8) + 1 + sizeof(n) + 1 + sizeof(h) - end - - # Return the hash of these entries - ctx = HashType() - SHA.update!(ctx, Vector{UInt8}("tree $(content_size)\0")) - for (name, hash, mode) in entries - SHA.update!(ctx, Vector{UInt8}("$(mode) $(name)\0")) - SHA.update!(ctx, hash) - end - return SHA.digest!(ctx) -end -tree_hash(root::AbstractString; debug_out::Union{IO,Nothing} = nothing) = tree_hash(SHA.SHA1_CTX, root; debug_out) - -function check_valid_HEAD(repo) - try LibGit2.head(repo) - catch err - artifactserror("invalid git HEAD ($(err.msg))") - end -end - -function git_file_stream(repo::LibGit2.GitRepo, spec::String; fakeit::Bool=false)::IO - blob = try LibGit2.GitBlob(repo, spec) - catch err - err isa LibGit2.GitError && err.code == LibGit2.Error.ENOTFOUND || rethrow() - fakeit && return devnull - end - iob = IOBuffer(LibGit2.content(blob)) - close(blob) - return iob -end - -end # module diff --git a/stdlib/ArtifactDownloads/src/GitTreeHashTools.jl b/stdlib/ArtifactDownloads/src/GitTreeHashTools.jl new file mode 100644 index 0000000000000..48798c89998d9 --- /dev/null +++ b/stdlib/ArtifactDownloads/src/GitTreeHashTools.jl @@ -0,0 +1,133 @@ +module GitTreeHashTools + +import SHA +import ..Utils: contains_files + +""" + git_blob_hash(HashType::Type, path::AbstractString) + +Calculate the git blob hash of a given path. +""" +function git_blob_hash(::Type{HashType}, path::AbstractString) where HashType + ctx = HashType() + if islink(path) + datalen = length(readlink(path)) + else + datalen = filesize(path) + end + + # First, the header + SHA.update!(ctx, Vector{UInt8}("blob $(datalen)\0")) + + # Next, read data in in chunks of 4KB + buff = Vector{UInt8}(undef, 4*1024) + + try + if islink(path) + SHA.update!(ctx, Vector{UInt8}(readlink(path))) + else + open(path, "r") do io + while !eof(io) + num_read = readbytes!(io, buff) + SHA.update!(ctx, buff, num_read) + end + end + end + catch e + if isa(e, InterruptException) + rethrow(e) + end + @warn("Unable to open $(path) for hashing; git-tree-sha1 likely suspect") + end + + # Finish it off and return the digest! + return SHA.digest!(ctx) +end +git_blob_hash(path::AbstractString) = git_blob_hash(SHA.SHA1_CTX, path) + +""" + git_tree_hash(HashType::Type, root::AbstractString) + +Calculate the git tree hash of a given path. +""" +function git_tree_hash(::Type{HashType}, root::AbstractString; debug_out::Union{IO,Nothing} = nothing, indent::Int=0) where HashType + entries = Tuple{String, Vector{UInt8}, GitMode}[] + for f in sort(readdir(root; join=true); by = f -> git_mode(f) == git_mode_dir ? f*"/" : f) + # Skip `.git` directories + if basename(f) == ".git" + continue + end + + filepath = abspath(f) + mode = git_mode(filepath) + if mode == git_mode_dir + # If this directory contains no files, then skip it + contains_files(filepath) || continue + + # Otherwise, hash it up! + child_stream = nothing + if debug_out !== nothing + child_stream = IOBuffer() + end + hash = git_tree_hash(HashType, filepath; debug_out=child_stream, indent=indent+1) + if debug_out !== nothing + indent_str = "| "^indent + println(debug_out, "$(indent_str)+ [D] $(basename(filepath)) - $(bytes2hex(hash))") + print(debug_out, String(take!(child_stream))) + println(debug_out, indent_str) + end + else + hash = git_blob_hash(HashType, filepath) + if debug_out !== nothing + indent_str = "| "^indent + mode_str = mode == git_mode_normal ? "F" : "X" + println(debug_out, "$(indent_str)[$(mode_str)] $(basename(filepath)) - $(bytes2hex(hash))") + end + end + push!(entries, (basename(filepath), hash, mode)) + end + + content_size = 0 + for (n, h, m) in entries + content_size += ndigits(UInt32(m); base=8) + 1 + sizeof(n) + 1 + sizeof(h) + end + + # Return the hash of these entries + ctx = HashType() + SHA.update!(ctx, Vector{UInt8}("tree $(content_size)\0")) + for (name, hash, mode) in entries + SHA.update!(ctx, Vector{UInt8}("$(mode) $(name)\0")) + SHA.update!(ctx, hash) + end + return SHA.digest!(ctx) +end +git_tree_hash(root::AbstractString; debug_out::Union{IO,Nothing} = nothing) = git_tree_hash(SHA.SHA1_CTX, root; debug_out) + +@enum GitMode git_mode_dir=0o040000 git_mode_normal=0o100644 git_mode_executable=0o100755 git_mode_symlink=0o120000 git_mode_submodule=0o160000 +Base.string(mode::GitMode) = string(UInt32(mode); base=8) +Base.print(io::IO, mode::GitMode) = print(io, string(mode)) + +function git_mode(path::AbstractString) + # Windows doesn't deal with executable permissions in quite the same way, + # `stat()` gives a different answer than we actually want, so we use + # `isexecutable()` which uses `uv_fs_access()` internally. On other + # platforms however, we just want to check via `stat()`. + function isexec(p) + @static if Sys.iswindows() + return Sys.isexecutable(p) + else + return !iszero(filemode(p) & 0o100) + end + end + if islink(path) + return git_mode_symlink + elseif isdir(path) + return git_mode_dir + elseif isexec(path) + return git_mode_executable + else + return git_mode_normal + end +end + +end # module diff --git a/stdlib/ArtifactDownloads/src/PlatformEngines.jl b/stdlib/ArtifactDownloads/src/PlatformEngines.jl index d5167c86d241f..534359d54dcd7 100644 --- a/stdlib/ArtifactDownloads/src/PlatformEngines.jl +++ b/stdlib/ArtifactDownloads/src/PlatformEngines.jl @@ -150,7 +150,7 @@ function get_auth_header(url::AbstractString; verbose::Bool = false) return auth_header end if !haskey(auth_info, "refresh_url") || !haskey(auth_info, "refresh_token") - if expires_at ≤ time_now + if expires_at ≤ time_now @warn "expired auth without refresh keys" file=auth_file end # try it anyway since we can't refresh @@ -283,7 +283,7 @@ function download( (total, now) -> begin bar.max = total bar.current = now - # Downloads.download attatches the progress indicator to the header request too + # Downloads.download attaches the progress indicator to the header request too # which is only ~100 bytes, and will report as 0 - 100% progress immediately # then dip down to 0 before the actual download starts. So we only show the # progress bar once the real download starts. diff --git a/stdlib/ArtifactDownloads/src/Utils.jl b/stdlib/ArtifactDownloads/src/Utils.jl index 0260d500e6868..5c45d8904c18d 100644 --- a/stdlib/ArtifactDownloads/src/Utils.jl +++ b/stdlib/ArtifactDownloads/src/Utils.jl @@ -8,7 +8,6 @@ end artifactserror(msg::String...) = throw(ArtifactsError(join(msg))) Base.showerror(io::IO, err::ArtifactsError) = print(io, err.msg) -# function pkg_server() server = get(ENV, "JULIA_PKG_SERVER", "https://pkg.julialang.org") isempty(server) && return nothing @@ -131,6 +130,23 @@ function set_readonly(path) end set_readonly(::Nothing) = nothing +""" + contains_files(root::AbstractString) + +Helper function to determine whether a directory contains files; e.g. it is a +direct parent of a file or it contains some other directory that itself is a +direct parent of a file. This is used to exclude directories from tree hashing. +""" +function contains_files(path::AbstractString) + st = lstat(path) + ispath(st) || throw(ArgumentError("non-existent path: $(repr(path))")) + isdir(st) || return true + for p in readdir(path) + contains_files(joinpath(path, p)) && return true + end + return false +end + # See loading.jl const TOML_CACHE = Base.TOMLCache(Base.TOML.Parser{Dates}()) const TOML_LOCK = ReentrantLock() diff --git a/stdlib/Manifest.toml b/stdlib/Manifest.toml index c210eb3833c01..85e3dc2b5cac6 100644 --- a/stdlib/Manifest.toml +++ b/stdlib/Manifest.toml @@ -9,7 +9,7 @@ uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" version = "1.1.2" [[deps.ArtifactDownloads]] -deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Printf", "SHA", "TOML", "Tar", "p7zip_jll"] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "Printf", "SHA", "TOML", "Tar", "p7zip_jll"] uuid = "6ee59702-05f1-4877-b811-03813c0d76c5" version = "1.11.0" From 43eeed4952136676caefe3cd534c7b647037fc39 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Fri, 13 Sep 2024 16:11:37 -0400 Subject: [PATCH 3/3] Add a few basic tests Again shamelessly ripped from Pkg - this only covers part of the functionality moved here (PlatformEngines). Moving the `Pkg.Artifacts` tests will be more involved, since they were written to also test interaction with a lot of other Pkg functionality. --- .../src/ArtifactDownloads.jl | 3 - .../ArtifactDownloads/test/platformengines.jl | 282 ++++++++++++++++++ stdlib/ArtifactDownloads/test/runtests.jl | 14 + 3 files changed, 296 insertions(+), 3 deletions(-) create mode 100644 stdlib/ArtifactDownloads/test/platformengines.jl create mode 100644 stdlib/ArtifactDownloads/test/runtests.jl diff --git a/stdlib/ArtifactDownloads/src/ArtifactDownloads.jl b/stdlib/ArtifactDownloads/src/ArtifactDownloads.jl index 5529baa7044da..4e94aa409c1b5 100644 --- a/stdlib/ArtifactDownloads/src/ArtifactDownloads.jl +++ b/stdlib/ArtifactDownloads/src/ArtifactDownloads.jl @@ -156,9 +156,6 @@ function archive_artifact(hash::SHA1, tarball_path::String; honor_overrides::Boo error("Unable to archive artifact $(bytes2hex(hash.bytes)): does not exist!") end - # TODO: We may not need this... - # (removes `PlatformEngines.package`) - # Package it up package(artifact_path(hash), tarball_path) diff --git a/stdlib/ArtifactDownloads/test/platformengines.jl b/stdlib/ArtifactDownloads/test/platformengines.jl new file mode 100644 index 0000000000000..5ee1830fbbe7e --- /dev/null +++ b/stdlib/ArtifactDownloads/test/platformengines.jl @@ -0,0 +1,282 @@ +module PlatformEngineTests + +import ..ArtifactDownloads: PlatformEngines # ensure we are using the correct ArtifactDownloads +import ..ArtifactDownloads.Utils: depots1 + +using Test, SHA, Tar + +function list_tarball_files(tarball_path::AbstractString) + names = String[] + Tar.list(`$(PlatformEngines.exe7z()) x $tarball_path -so`) do hdr + push!(names, hdr.path) + end + return names +end + +@testset "Packaging" begin + # Gotta set this guy up beforehand + tarball_path = nothing + tarball_hash = nothing + + mktempdir() do prefix + # Create random files + mkpath(joinpath(prefix, "bin")) + mkpath(joinpath(prefix, "lib")) + mkpath(joinpath(prefix, "etc")) + bar_path = joinpath(prefix, "bin", "bar.sh") + open(bar_path, "w") do f + write(f, "#!/bin/sh\n") + write(f, "echo yolo\n") + end + baz_path = joinpath(prefix, "lib", "baz.so") + open(baz_path, "w") do f + write(f, "this is not an actual .so\n") + end + + qux_path = joinpath(prefix, "etc", "qux.conf") + open(qux_path, "w") do f + write(f, "use_julia=true\n") + end + + # Next, package it up as a .tar.gz file + mktempdir() do output_dir + tarball_path = joinpath(output_dir, "foo.tar.gz") + PlatformEngines.package(prefix, tarball_path) + @test isfile(tarball_path) + + # Test that we can inspect the contents of the tarball + contents = list_tarball_files(tarball_path) + @test "bin/bar.sh" in contents + @test "lib/baz.so" in contents + @test "etc/qux.conf" in contents + end + end + +end + + +@testset "Verification" begin + mktempdir() do prefix + foo_path = joinpath(prefix, "foo") + open(foo_path, "w") do file + write(file, "test") + end + foo_hash = bytes2hex(sha256("test")) + + # Check that verifying with the right hash works + @test_logs (:info, r"No hash cache found") match_mode=:any begin + ret, status = PlatformEngines.verify(foo_path, foo_hash; verbose=true, report_cache_status=true) + @test ret == true + @test status == :hash_cache_missing + end + + # Check that it created a .sha256 file + @test isfile("$(foo_path).sha256") + + # Check that it verifies the second time around properly + @test_logs (:info, r"Hash cache is consistent") match_mode=:any begin + ret, status = PlatformEngines.verify(foo_path, foo_hash; verbose=true, report_cache_status=true) + @test ret == true + @test status == :hash_cache_consistent + end + + # Sleep for imprecise filesystems + sleep(2) + + # Get coverage of messing with different parts of the verification chain + touch(foo_path) + @test_logs (:info, r"File has been modified") match_mode=:any begin + ret, status = PlatformEngines.verify(foo_path, foo_hash; verbose=true, report_cache_status=true) + @test ret == true + @test status == :file_modified + end + + # Ensure that we print an error when verification fails + rm("$(foo_path).sha256"; force=true) + @test_logs (:error, r"Hash Mismatch!") match_mode=:any begin + @test !PlatformEngines.verify(foo_path, "0"^64; verbose=true) + end + + # Ensure that incorrect lengths cause an exception + @test_throws ErrorException PlatformEngines.verify(foo_path, "0"^65; verbose=true) + + # Ensure that messing with the hash file works properly + touch(foo_path) + @test PlatformEngines.verify(foo_path, foo_hash; verbose=true) + open("$(foo_path).sha256", "w") do file + write(file, "this is not the right hash") + end + @test_logs (:info, r"hash cache invalidated") match_mode=:any begin + ret, status = PlatformEngines.verify(foo_path, foo_hash; verbose=true, report_cache_status=true) + @test ret == true + @test status == :hash_cache_mismatch + end + + # Ensure that messing with the actual file works properly + open("$(foo_path)", "w") do file + write(file, "this is not the right content") + end + + # Delete hash cache file to force re-verification + rm("$(foo_path).sha256"; force=true) + @test_logs (:error, r"Hash Mismatch!") match_mode=:any begin + ret, status = PlatformEngines.verify(foo_path, foo_hash; verbose=true, report_cache_status=true) + @test ret == false + @test status == :hash_mismatch + end + end +end + + +const socrates_urls = [ + "https://github.com/staticfloat/small_bin/raw/f1a92f5eafbd30a0c6a8efb6947485b0f6d1bec3/socrates.tar.gz" => + "e65d2f13f2085f2c279830e863292312a72930fee5ba3c792b14c33ce5c5cc58", + "https://github.com/staticfloat/small_bin/raw/f1a92f5eafbd30a0c6a8efb6947485b0f6d1bec3/socrates.tar.bz2" => + "13fc17b97be41763b02cbb80e9d048302cec3bd3d446c2ed6e8210bddcd3ac76", + "https://github.com/staticfloat/small_bin/raw/f1a92f5eafbd30a0c6a8efb6947485b0f6d1bec3/socrates.tar.xz" => + "61bcf109fcb749ee7b6a570a6057602c08c836b6f81091eab7aa5f5870ec6475", +] +const socrates_hash = "adcbcf15674eafe8905093183d9ab997cbfba9056fc7dde8bfa5a22dfcfb4967" + +@testset "Downloading" begin + for (url, hash) in socrates_urls + mktempdir() do prefix + tarball_path = joinpath(prefix, "download_target.tar$(splitext(url)[2])") + + target_dir = joinpath(prefix, "target") + PlatformEngines.download_verify_unpack(url, hash, target_dir; tarball_path=tarball_path, verbose=true) + + # Test downloading a second time, to get the "already exists" path + PlatformEngines.download_verify_unpack(url, hash, target_dir; tarball_path=tarball_path, verbose=true) + + # And a third time, after corrupting it, to get the "redownloading" path + open(tarball_path, "w") do io + println(io, "corruptify") + end + PlatformEngines.download_verify_unpack(url, hash, target_dir; tarball_path=tarball_path, verbose=true, force=true) + + # Test that it has the contents we expect + socrates_path = joinpath(target_dir, "bin", "socrates") + @test isfile(socrates_path) + unpacked_hash = open(socrates_path) do f + bytes2hex(sha256(f)) + end + @test unpacked_hash == socrates_hash + end + end +end + +const collapse_url = "https://github.com/staticfloat/small_bin/raw/master/collapse_the_symlink/collapse_the_symlink.tar.gz" +const collapse_hash = "956c1201405f64d3465cc28cb0dec9d63c11a08cad28c381e13bb22e1fc469d3" +@testset "Copyderef unpacking" begin + withenv("BINARYPROVIDER_COPYDEREF" => "true") do + mktempdir() do prefix + target_dir = joinpath(prefix, "target") + PlatformEngines.download_verify_unpack(collapse_url, collapse_hash, target_dir; verbose=true) + + # Test that we get the files we expect + @test isfile(joinpath(target_dir, "collapse_the_symlink", "foo")) + @test isfile(joinpath(target_dir, "collapse_the_symlink", "foo.1")) + @test isfile(joinpath(target_dir, "collapse_the_symlink", "foo.1.1")) + + # Test that these are definitely not links + @test !islink(joinpath(target_dir, "collapse_the_symlink", "foo")) + @test !islink(joinpath(target_dir, "collapse_the_symlink", "foo.1.1")) + + # Test that broken symlinks get transparently dropped + @test !ispath(joinpath(target_dir, "collapse_the_symlink", "broken")) + end + end +end + +@testset "Download GitHub API #88" begin + mktempdir() do tmp + PlatformEngines.download("https://api.github.com/repos/JuliaPackaging/BinaryProvider.jl/tarball/c2a4fc38f29eb81d66e3322e585d0199722e5d71", joinpath(tmp, "BinaryProvider"); verbose=true) + @test isfile(joinpath(tmp, "BinaryProvider")) + end +end + +@testset "Authentication Header Hooks" begin + @test PlatformEngines.get_auth_header("https://foo.bar/baz") == nothing + + old = nothing + haskey(ENV, "JULIA_PKG_SERVER") && (old = ENV["JULIA_PKG_SERVER"]) + + push!(Base.DEPOT_PATH, ".") + + ENV["JULIA_PKG_SERVER"] = "" + + function test_server_dir(url, server, ::Nothing) + observed = PlatformEngines.get_server_dir(url, server) + expected = nothing + @test observed === expected + end + function test_server_dir(url, server, expected_directory::AbstractString) + observed = PlatformEngines.get_server_dir(url, server) + expected = joinpath(depots1(), "servers", expected_directory) + @debug "" url server expected_directory observed expected + if observed != expected + @error "Test failure" url server expected_directory observed expected + end + + @test observed == expected + + # Test for Windows drive letter shenanigans + @test startswith(observed, depots1()) + @test startswith(observed, joinpath(depots1(), "servers")) + end + + @testset "get_server_dir" begin + test_server_dir("https://foo.bar/baz/a", nothing, nothing) + test_server_dir("https://foo.bar/baz/a", "https://bar", nothing) + test_server_dir("https://foo.bar/baz/a", "foo.bar", nothing) + test_server_dir("https://foo.bar/bazx", "https://foo.bar/baz", nothing) + + for host in ["localhost", "foo", "foo.bar", "foo.bar.baz"] + for protocol in ["http", "https"] + for port in [("", ""), (":1234", "_1234")] + port_original, port_transformed = port + + for server_suffix in ["", "/hello", "/hello/world"] + server = "$(protocol)://$(host)$(port_original)$(server_suffix)" + for url_suffix in ["/", "/foo", "/foo/bar", "/foo/bar/baz"] + url = "$(server)$(url_suffix)" + test_server_dir(url, server, "$(host)$(port_transformed)") + end + end + end + end + end + end + + called = 0 + dispose = PlatformEngines.register_auth_error_handler("https://foo.bar/baz", function (url, svr, err) + called += 1 + return true, called < 3 + end) + + @test PlatformEngines.get_auth_header("https://foo.bar/baz") == nothing + @test called == 0 + + ENV["JULIA_PKG_SERVER"] = "https://foo.bar" + + @test PlatformEngines.get_auth_header("https://foo.bar/baz") == nothing + @test called == 3 + + dispose() + + @test PlatformEngines.get_auth_header("https://foo.bar/baz") == nothing + @test called == 3 + + dispose() + + ENV["JULIA_PKG_SERVER"] = "https://foo.bar/baz" + + @test PlatformEngines.get_auth_header("https://foo.bar/baz/a") == nothing + @test called == 3 + + old === nothing ? delete!(ENV, "JULIA_PKG_SERVER") : (ENV["JULIA_PKG_SERVER"] = old) + pop!(Base.DEPOT_PATH) +end + +end # module diff --git a/stdlib/ArtifactDownloads/test/runtests.jl b/stdlib/ArtifactDownloads/test/runtests.jl new file mode 100644 index 0000000000000..099517f81949d --- /dev/null +++ b/stdlib/ArtifactDownloads/test/runtests.jl @@ -0,0 +1,14 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +module ArtifactDownloadsTests + +import ArtifactDownloads +using Test + +@testset "ArtifactDownloads" begin + @testset "platformengines.jl" begin + include("platformengines.jl") + end +end + +end