From 829fb7cf58c379ba2d3821950fb3f79f3116d2d4 Mon Sep 17 00:00:00 2001 From: Cody Tapscott Date: Tue, 2 Jul 2024 16:31:10 -0400 Subject: [PATCH 01/57] TOML: Improve type-stability This changes the output of the TOML parser to provide specialize `Vector{T}` less aggressively, so that combinatorially expensive types like `Vector{Vector{Float64}}` or `Vector{Union{Float64,Int64}}` are instead returned as `Vector{Any}` Vectors of homogeneous leaf types, like `Vector{Float64}` are still supported as before. This change makes the TOML parser fully type-stable, except for its dynamic usage of Dates. Co-authored-by: Gabriel Baraldi --- base/loading.jl | 1 - base/toml_parser.jl | 100 ++++++++++++++++++++----------------- stdlib/TOML/test/values.jl | 2 +- 3 files changed, 54 insertions(+), 49 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 4dc735f0099d8..e0a2563dd0178 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -269,7 +269,6 @@ struct TOMLCache{Dates} d::Dict{String, CachedTOMLDict} end TOMLCache(p::TOML.Parser) = TOMLCache(p, Dict{String, CachedTOMLDict}()) -# TODO: Delete this converting constructor once Pkg stops using it TOMLCache(p::TOML.Parser, d::Dict{String, Dict{String, Any}}) = TOMLCache(p, convert(Dict{String, CachedTOMLDict}, d)) const TOML_CACHE = TOMLCache(TOML.Parser{nothing}()) diff --git a/base/toml_parser.jl b/base/toml_parser.jl index cc1455f61928b..4d07cfed05d8a 100644 --- a/base/toml_parser.jl +++ b/base/toml_parser.jl @@ -84,9 +84,6 @@ mutable struct Parser{Dates} # Filled in in case we are parsing a file to improve error messages filepath::Union{String, Nothing} - - # Optionally populate with the Dates stdlib to change the type of Date types returned - Dates::Union{Module, Nothing} # TODO: remove once Pkg is updated end function Parser{Dates}(str::String; filepath=nothing) where {Dates} @@ -106,8 +103,7 @@ function Parser{Dates}(str::String; filepath=nothing) where {Dates} IdSet{Any}(), # static_arrays IdSet{TOMLDict}(), # defined_tables root, - filepath, - nothing + filepath ) startup(l) return l @@ -495,8 +491,10 @@ function recurse_dict!(l::Parser, d::Dict, dotted_keys::AbstractVector{String}, d = d::TOMLDict key = dotted_keys[i] d = get!(TOMLDict, d, key) - if d isa Vector + if d isa Vector{Any} d = d[end] + elseif d isa Vector + return ParserError(ErrKeyAlreadyHasValue) end check && @try check_allowed_add_key(l, d, i == length(dotted_keys)) end @@ -537,7 +535,7 @@ function parse_array_table(l)::Union{Nothing, ParserError} end d = @try recurse_dict!(l, l.root, @view(table_key[1:end-1]), false) k = table_key[end] - old = get!(() -> [], d, k) + old = get!(() -> Any[], d, k) if old isa Vector if old in l.static_arrays return ParserError(ErrAddArrayToStaticArray) @@ -546,7 +544,7 @@ function parse_array_table(l)::Union{Nothing, ParserError} return ParserError(ErrArrayTreatedAsDictionary) end d_new = TOMLDict() - push!(old, d_new) + push!(old::Vector{Any}, d_new) push!(l.defined_tables, d_new) l.active_table = d_new @@ -668,41 +666,20 @@ end # Array # ######### -function push!!(v::Vector, el) - # Since these types are typically non-inferable, they are a big invalidation risk, - # and since it's used by the package-loading infrastructure the cost of invalidation - # is high. Therefore, this is written to reduce the "exposed surface area": e.g., rather - # than writing `T[el]` we write it as `push!(Vector{T}(undef, 1), el)` so that there - # is no ambiguity about what types of objects will be created. - T = eltype(v) - t = typeof(el) - if el isa T || t === T - push!(v, el::T) - return v - elseif T === Union{} - out = Vector{t}(undef, 1) - out[1] = el - return out - else - if T isa Union - newT = Any - else - newT = Union{T, typeof(el)} - end - new = Array{newT}(undef, length(v)) - copy!(new, v) - return push!(new, el) +function copyto_typed!(a::Vector{T}, b::Vector) where T + for i in 1:length(b) + a[i] = b[i]::T end + return nothing end -function parse_array(l::Parser)::Err{Vector} +function parse_array(l::Parser{Dates})::Err{Vector} where Dates skip_ws_nl(l) - array = Vector{Union{}}() + array = Vector{Any}() empty_array = accept(l, ']') while !empty_array v = @try parse_value(l) - # TODO: Worth to function barrier this? - array = push!!(array, v) + array = push!(array, v) # There can be an arbitrary number of newlines and comments before a value and before the closing bracket. skip_ws_nl(l) comma = accept(l, ',') @@ -712,8 +689,40 @@ function parse_array(l::Parser)::Err{Vector} return ParserError(ErrExpectedCommaBetweenItemsArray) end end - push!(l.static_arrays, array) - return array + # check for static type throughout array + T = !isempty(array) ? typeof(array[1]) : Union{} + for el in array + if typeof(el) != T + T = Any + break + end + end + if T === Any + new = array + elseif T === String + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === Bool + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === Int64 + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === UInt64 + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === Float64 + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === Union{} + new = Any[] + elseif (T === TOMLDict) || (T == BigInt) || (T === UInt128) || (T === Int128) || (T <: Vector) || + (T === Dates.Date) || (T === Dates.Time) || (T === Dates.DateTime) + # do nothing, leave as Vector{Any} + new = array + else @assert false end + push!(l.static_arrays, new) + return new end @@ -1025,10 +1034,9 @@ function parse_datetime(l) end function try_return_datetime(p::Parser{Dates}, year, month, day, h, m, s, ms) where Dates - if Dates !== nothing || p.Dates !== nothing - mod = Dates !== nothing ? Dates : p.Dates + if Dates !== nothing try - return mod.DateTime(year, month, day, h, m, s, ms) + return Dates.DateTime(year, month, day, h, m, s, ms) catch ex ex isa ArgumentError && return ParserError(ErrParsingDateTime) rethrow() @@ -1039,10 +1047,9 @@ function try_return_datetime(p::Parser{Dates}, year, month, day, h, m, s, ms) wh end function try_return_date(p::Parser{Dates}, year, month, day) where Dates - if Dates !== nothing || p.Dates !== nothing - mod = Dates !== nothing ? Dates : p.Dates + if Dates !== nothing try - return mod.Date(year, month, day) + return Dates.Date(year, month, day) catch ex ex isa ArgumentError && return ParserError(ErrParsingDateTime) rethrow() @@ -1062,10 +1069,9 @@ function parse_local_time(l::Parser) end function try_return_time(p::Parser{Dates}, h, m, s, ms) where Dates - if Dates !== nothing || p.Dates !== nothing - mod = Dates !== nothing ? Dates : p.Dates + if Dates !== nothing try - return mod.Time(h, m, s, ms) + return Dates.Time(h, m, s, ms) catch ex ex isa ArgumentError && return ParserError(ErrParsingDateTime) rethrow() diff --git a/stdlib/TOML/test/values.jl b/stdlib/TOML/test/values.jl index 4fc49d47fc98d..53be1b04708b3 100644 --- a/stdlib/TOML/test/values.jl +++ b/stdlib/TOML/test/values.jl @@ -172,6 +172,6 @@ end @testset "Array" begin @test testval("[1,2,3]", Int64[1,2,3]) @test testval("[1.0, 2.0, 3.0]", Float64[1.0, 2.0, 3.0]) - @test testval("[1.0, 2.0, 3]", Union{Int64, Float64}[1.0, 2.0, Int64(3)]) + @test testval("[1.0, 2.0, 3]", Any[1.0, 2.0, Int64(3)]) @test testval("[1.0, 2, \"foo\"]", Any[1.0, Int64(2), "foo"]) end From 9e07d9100b3259c20cad99b952daaa6138eeaf3e Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Sat, 31 Aug 2024 13:57:42 -0400 Subject: [PATCH 02/57] Move lets to testsets and add codeunits tests --- test/strings/types.jl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/test/strings/types.jl b/test/strings/types.jl index dbcf65b1d843b..d1e89e0e85196 100644 --- a/test/strings/types.jl +++ b/test/strings/types.jl @@ -302,8 +302,7 @@ end ## Cstring tests ## -# issue #13974: comparison against pointers -let +@testset "issue #13974: comparison against pointers" begin str = String("foobar") ptr = pointer(str) cstring = Cstring(ptr) @@ -324,10 +323,15 @@ let @test C_NULL != cstring end -# issue #31381: eltype(Cstring) != Cchar -let +@testset "issue #31381: eltype(Cstring) != Cchar" begin s = Cstring(C_NULL) @test eltype(Cstring) == Cchar @test eltype(s) == Cchar @test pointer(s) isa Ptr{Cchar} end + +@testset "Codeunits" begin + s = "I'm a string!" + @test codeunit(s) == UInt8 + @test codeunit(s, Int8(1)) == codeunit(s, 1) +end From 66349d411efd36211524da1741df129fd504b658 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Sat, 31 Aug 2024 14:17:04 -0400 Subject: [PATCH 03/57] Few more missing AnnotatedStrings tests --- test/strings/annotated.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl index 90aaadd6ede24..ee53c3d5846eb 100644 --- a/test/strings/annotated.jl +++ b/test/strings/annotated.jl @@ -5,14 +5,22 @@ @test str == Base.AnnotatedString(str.string, Tuple{UnitRange{Int}, Pair{Symbol, Any}}[]) @test length(str) == 11 @test ncodeunits(str) == 11 + @test codeunits(str) == codeunits("some string") + @test codeunit(str) == UInt8 + @test codeunit(str, 1) == codeunit("some string", 1) + @test firstindex(str) == firstindex("some string") @test convert(Base.AnnotatedString, str) === str @test eltype(str) == Base.AnnotatedChar{eltype(str.string)} @test first(str) == Base.AnnotatedChar(first(str.string), Pair{Symbol, Any}[]) @test str[1:4] isa SubString{typeof(str)} @test str[1:4] == Base.AnnotatedString("some") + big_byte_str = Base.AnnotatedString("आख") + @test_throws StringIndexError big_byte_str[5] @test "a" * str == Base.AnnotatedString("asome string") @test str * "a" == Base.AnnotatedString("some stringa") @test str * str == Base.AnnotatedString("some stringsome string") + @test cmp(str, "some stringy thingy") == -1 + @test cmp("some stringy thingy", str) == 1 @test str[3:4] == SubString("me") @test SubString("me") == str[3:4] Base.annotate!(str, 1:4, :thing => 0x01) From 6f04ee0ff34f727dfd44199d6e1132d2235ebedd Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Thu, 5 Sep 2024 21:54:11 -0300 Subject: [PATCH 04/57] [LLVMLibUnwindJLL] Update llvmlibunwind to 14.0.6 (#48140) --- deps/checksums/llvmunwind | 32 ++++ deps/llvmunwind.version | 2 +- ...ibunwind-revert-monorepo-requirement.patch | 156 ------------------ deps/unwind.mk | 49 +++--- stdlib/LLVMLibUnwind_jll/Project.toml | 2 +- 5 files changed, 59 insertions(+), 182 deletions(-) delete mode 100644 deps/patches/llvm-libunwind-revert-monorepo-requirement.patch diff --git a/deps/checksums/llvmunwind b/deps/checksums/llvmunwind index e69de29bb2d1d..a90d28717dd85 100644 --- a/deps/checksums/llvmunwind +++ b/deps/checksums/llvmunwind @@ -0,0 +1,32 @@ +LLVMLibUnwind.v14.0.6+0.aarch64-apple-darwin.tar.gz/md5/d8584e0e3dc26ea7404d3719cea9e233 +LLVMLibUnwind.v14.0.6+0.aarch64-apple-darwin.tar.gz/sha512/7a0396eaace91b9b4d013c209605d468a7ff9b99ede9fdd57602539a6fa6f3ea84a440f32840056a1234df3ef1896739ea0820fee72b4f208096c553fc54adb9 +LLVMLibUnwind.v14.0.6+0.aarch64-linux-gnu.tar.gz/md5/d6edea561b61173d05aa79936e49f6b7 +LLVMLibUnwind.v14.0.6+0.aarch64-linux-gnu.tar.gz/sha512/9fbe29ec6a33c719bc9a4dd19911ceded9622269c042192d339a6cf45aa8209ad64c424167c094ca01293438af5930f091acba0538b3fe640a746297f5cc8cb3 +LLVMLibUnwind.v14.0.6+0.aarch64-linux-musl.tar.gz/md5/3ec68c87e4bddd024ee0ca6adc2b3b96 +LLVMLibUnwind.v14.0.6+0.aarch64-linux-musl.tar.gz/sha512/be3cd9d5510c2693dee1494c36c479d32311ff83f5b2d31c08508a3dd370788961ce46e9025afe148a0febd05942fd294370a357dd717bee353d8a108617f6de +LLVMLibUnwind.v14.0.6+0.armv6l-linux-gnueabihf.tar.gz/md5/8ca5a926d69124225d485d679232a54f +LLVMLibUnwind.v14.0.6+0.armv6l-linux-gnueabihf.tar.gz/sha512/353f540b342bc54877e7a41fe65c9eeac525fd91bf4cddbe1b3ec2ed93c3751beaf8316a4d31530502b067100b160301262e10cbe4407db3abf1ceb5d9a74eb2 +LLVMLibUnwind.v14.0.6+0.armv6l-linux-musleabihf.tar.gz/md5/4e5b576958f2a2e708eb5918ceef0de0 +LLVMLibUnwind.v14.0.6+0.armv6l-linux-musleabihf.tar.gz/sha512/2e98c472d3ee25c2e062efa4eb21ac9cfc49b26ea9d99ad4a8e7660c4c09f121d31193bd161f54ea332ce94785d601897311e9e6668adb1e25e2b666e0d5bb3f +LLVMLibUnwind.v14.0.6+0.armv7l-linux-gnueabihf.tar.gz/md5/1c81a886e799663ce8d04400c5b516a9 +LLVMLibUnwind.v14.0.6+0.armv7l-linux-gnueabihf.tar.gz/sha512/236b78b9a17eaae74ab07349ac8dde16c3abbd48e0d075abd1c195d60efff48e2fbf799554df114ea3d3dba937e0369430a2788bde2a1201126e026ef6cdac42 +LLVMLibUnwind.v14.0.6+0.armv7l-linux-musleabihf.tar.gz/md5/0371f43ebcb571d0a635739252b88986 +LLVMLibUnwind.v14.0.6+0.armv7l-linux-musleabihf.tar.gz/sha512/605318ae3737e26ff89d6291311a7db3bc3ec7c8d1f2e72ae40fd3d9df0754ee2ebfb77687122605f26d76d62effb85157bc39982814920d5af46c124e71a5ff +LLVMLibUnwind.v14.0.6+0.i686-linux-gnu.tar.gz/md5/cd3f1cdf404b6102754ced4bd3a890f6 +LLVMLibUnwind.v14.0.6+0.i686-linux-gnu.tar.gz/sha512/65fe2c5b1e04da1e1d8111a0b0083fa0fa9447eaea7af7a018c09fe6d5506566c491bbad296a7be8c488ca3495016ae16a6879d69f057f8866d94910147dee03 +LLVMLibUnwind.v14.0.6+0.i686-linux-musl.tar.gz/md5/abac9b416d2ba5abcf5ce849f43ffa96 +LLVMLibUnwind.v14.0.6+0.i686-linux-musl.tar.gz/sha512/fed677ed6f103c56eb9dd4578fa37a56ed2a4bc803aa1997c5af19762a623d2f82db1f72f429448d66fcef3b37af2104e6cb782f023aaabef086a921a862b042 +LLVMLibUnwind.v14.0.6+0.i686-w64-mingw32.tar.gz/md5/4c71ffd7c8cabb1c0ed6290b193883c5 +LLVMLibUnwind.v14.0.6+0.i686-w64-mingw32.tar.gz/sha512/6b1421a3268170467225112167cdb33fec962181993a2dad5594d4ee0623ac88ee0588cdc7d0656dc1cb9129ef96f621a97a224731cd161134d7d63c8fd32c16 +LLVMLibUnwind.v14.0.6+0.powerpc64le-linux-gnu.tar.gz/md5/06faf505f0dc354afcd01113cfc57af2 +LLVMLibUnwind.v14.0.6+0.powerpc64le-linux-gnu.tar.gz/sha512/1f9dfbd403e2ce121e126c217baede178cb1323012bb5e3cd1f778ff51e4216aed9dd69036e2baffbd60a6f5ae438ddaba6c13809459e94bb00be3f7bfc8c30e +LLVMLibUnwind.v14.0.6+0.x86_64-apple-darwin.tar.gz/md5/516a11d99306e3f214968a7951b07a06 +LLVMLibUnwind.v14.0.6+0.x86_64-apple-darwin.tar.gz/sha512/885738599bbd96f20083f9b9368ce3f243bd5868d3ac9a45189de6cb40b6664a6dcdaece159989e504670231db8c2addfa8d544003eb0cdabba960e4ab6a4470 +LLVMLibUnwind.v14.0.6+0.x86_64-linux-gnu.tar.gz/md5/d851b90ea3f9664774316169fc494e21 +LLVMLibUnwind.v14.0.6+0.x86_64-linux-gnu.tar.gz/sha512/a1f529454f0881baaa508481ba97ecffb040fa92141b4cbc72278adcf8b84f0766fa918aea7fb99ce690c4fd80c36fec365987625db42f4e7bb36ad24ce177d0 +LLVMLibUnwind.v14.0.6+0.x86_64-linux-musl.tar.gz/md5/dc4e86eb2effe1f6cb0d0ceda635f226 +LLVMLibUnwind.v14.0.6+0.x86_64-linux-musl.tar.gz/sha512/c52de384853890f9df81aa9e422c1ba3fde12b2ae9c7b60b9ecdc6d0c88eab495dd336af2b6cd2c31d6eddcd0a213954eadbc7884bc39ce2039cec672eac32fe +LLVMLibUnwind.v14.0.6+0.x86_64-unknown-freebsd.tar.gz/md5/8477e3624c73a820d8ab82a53e1e10fa +LLVMLibUnwind.v14.0.6+0.x86_64-unknown-freebsd.tar.gz/sha512/32ce031245a5b59a779cd77fa3c9bf05ee59e48c913b75d4964bea49f37da232c59a42ad993f7b5edc88322148c1d7394984349682bfce3b69d33a51756ac8e3 +LLVMLibUnwind.v14.0.6+0.x86_64-w64-mingw32.tar.gz/md5/7be93eccbdb0aff427c43af651073d66 +LLVMLibUnwind.v14.0.6+0.x86_64-w64-mingw32.tar.gz/sha512/89a61a81ec664c72107ac09e717200b00434350bf77064267180bc0c101a59e0ee8c8af4dd6fe75eacdeb14e82743c138b2fc558ca08550d8796b8db93f89da4 diff --git a/deps/llvmunwind.version b/deps/llvmunwind.version index 7d13af9a158f7..9c2a91c566ba2 100644 --- a/deps/llvmunwind.version +++ b/deps/llvmunwind.version @@ -2,4 +2,4 @@ LLVMUNWIND_JLL_NAME := LLVMLibUnwind ## source build -LLVMUNWIND_VER := 12.0.1 +LLVMUNWIND_VER := 14.0.6 diff --git a/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch b/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch deleted file mode 100644 index 4e3897dfb9801..0000000000000 --- a/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch +++ /dev/null @@ -1,156 +0,0 @@ -Upstream commit 8c03fdf34a659925a3f09c8f54016e47ea1c7519 changed the build such -that it requires living inside the monorepo with libcxx available, only so that -it can reuse a CMake file to simplify some build steps. This patch is a revert -of that commit applied only to libunwind. - ---- -diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt -index 570b8db90653..a383d7d77d6f 100644 ---- a/libunwind/CMakeLists.txt -+++ b/libunwind/CMakeLists.txt -@@ -1,7 +1,3 @@ --if (NOT IS_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/../libcxx") -- message(FATAL_ERROR "libunwind requires being built in a monorepo layout with libcxx available") --endif() -- - #=============================================================================== - # Setup Project - #=============================================================================== -@@ -15,31 +11,103 @@ set(CMAKE_MODULE_PATH - ${CMAKE_MODULE_PATH} - ) - --set(LIBUNWIND_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) --set(LIBUNWIND_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) --set(LIBUNWIND_LIBCXX_PATH "${CMAKE_CURRENT_LIST_DIR}/../libcxx" CACHE PATH -- "Specify path to libc++ source.") -- - if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_BUILD) - project(libunwind LANGUAGES C CXX ASM) - -+ # Rely on llvm-config. -+ set(CONFIG_OUTPUT) -+ if(NOT LLVM_CONFIG_PATH) -+ find_program(LLVM_CONFIG_PATH "llvm-config") -+ endif() -+ if (DEFINED LLVM_PATH) -+ set(LLVM_INCLUDE_DIR ${LLVM_INCLUDE_DIR} CACHE PATH "Path to llvm/include") -+ set(LLVM_PATH ${LLVM_PATH} CACHE PATH "Path to LLVM source tree") -+ set(LLVM_MAIN_SRC_DIR ${LLVM_PATH}) -+ set(LLVM_CMAKE_PATH "${LLVM_PATH}/cmake/modules") -+ elseif(LLVM_CONFIG_PATH) -+ message(STATUS "Found LLVM_CONFIG_PATH as ${LLVM_CONFIG_PATH}") -+ set(CONFIG_COMMAND ${LLVM_CONFIG_PATH} "--includedir" "--prefix" "--src-root") -+ execute_process(COMMAND ${CONFIG_COMMAND} -+ RESULT_VARIABLE HAD_ERROR -+ OUTPUT_VARIABLE CONFIG_OUTPUT) -+ if (NOT HAD_ERROR) -+ string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";" -+ CONFIG_OUTPUT ${CONFIG_OUTPUT}) -+ else() -+ string(REPLACE ";" " " CONFIG_COMMAND_STR "${CONFIG_COMMAND}") -+ message(STATUS "${CONFIG_COMMAND_STR}") -+ message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}") -+ endif() -+ -+ list(GET CONFIG_OUTPUT 0 INCLUDE_DIR) -+ list(GET CONFIG_OUTPUT 1 LLVM_OBJ_ROOT) -+ list(GET CONFIG_OUTPUT 2 MAIN_SRC_DIR) -+ -+ set(LLVM_INCLUDE_DIR ${INCLUDE_DIR} CACHE PATH "Path to llvm/include") -+ set(LLVM_BINARY_DIR ${LLVM_OBJ_ROOT} CACHE PATH "Path to LLVM build tree") -+ set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree") -+ set(LLVM_LIT_PATH "${LLVM_PATH}/utils/lit/lit.py") -+ -+ # --cmakedir is supported since llvm r291218 (4.0 release) -+ execute_process( -+ COMMAND ${LLVM_CONFIG_PATH} --cmakedir -+ RESULT_VARIABLE HAD_ERROR -+ OUTPUT_VARIABLE CONFIG_OUTPUT -+ ERROR_QUIET) -+ if(NOT HAD_ERROR) -+ string(STRIP "${CONFIG_OUTPUT}" LLVM_CMAKE_PATH_FROM_LLVM_CONFIG) -+ file(TO_CMAKE_PATH "${LLVM_CMAKE_PATH_FROM_LLVM_CONFIG}" LLVM_CMAKE_PATH) -+ else() -+ file(TO_CMAKE_PATH "${LLVM_BINARY_DIR}" LLVM_BINARY_DIR_CMAKE_STYLE) -+ set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR_CMAKE_STYLE}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm") -+ endif() -+ else() -+ message(WARNING "UNSUPPORTED LIBUNWIND CONFIGURATION DETECTED: " -+ "llvm-config not found and LLVM_MAIN_SRC_DIR not defined. " -+ "Reconfigure with -DLLVM_CONFIG=path/to/llvm-config " -+ "or -DLLVM_PATH=path/to/llvm-source-root.") -+ endif() -+ -+ if (EXISTS ${LLVM_CMAKE_PATH}) -+ list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}") -+ include("${LLVM_CMAKE_PATH}/AddLLVM.cmake") -+ include("${LLVM_CMAKE_PATH}/HandleLLVMOptions.cmake") -+ else() -+ message(WARNING "Not found: ${LLVM_CMAKE_PATH}") -+ endif() -+ - set(PACKAGE_NAME libunwind) - set(PACKAGE_VERSION 12.0.1) - set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") - set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org") - -- # Add the CMake module path of libcxx so we can reuse HandleOutOfTreeLLVM.cmake -- set(LIBUNWIND_LIBCXX_CMAKE_PATH "${LIBUNWIND_LIBCXX_PATH}/cmake/Modules") -- list(APPEND CMAKE_MODULE_PATH "${LIBUNWIND_LIBCXX_CMAKE_PATH}") -+ if (EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) -+ set(LLVM_LIT ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) -+ else() -+ # Seek installed Lit. -+ find_program(LLVM_LIT "lit.py" ${LLVM_MAIN_SRC_DIR}/utils/lit -+ DOC "Path to lit.py") -+ endif() - -- # In a standalone build, we don't have llvm to automatically generate the -- # llvm-lit script for us. So we need to provide an explicit directory that -- # the configurator should write the script into. -- set(LIBUNWIND_STANDALONE_BUILD 1) -- set(LLVM_LIT_OUTPUT_DIR "${LIBUNWIND_BINARY_DIR}/bin") -+ if (LLVM_LIT) -+ # Define the default arguments to use with 'lit', and an option for the user -+ # to override. -+ set(LIT_ARGS_DEFAULT "-sv") -+ if (MSVC OR XCODE) -+ set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar") -+ endif() -+ set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit") -+ -+ # On Win32 hosts, provide an option to specify the path to the GnuWin32 tools. -+ if (WIN32 AND NOT CYGWIN) -+ set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools") -+ endif() -+ else() -+ set(LLVM_INCLUDE_TESTS OFF) -+ endif() - -- # Find the LLVM sources and simulate LLVM CMake options. -- include(HandleOutOfTreeLLVM) -+ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}) -+ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}) - else() - set(LLVM_LIT "${CMAKE_SOURCE_DIR}/utils/lit/lit.py") - endif() -@@ -85,8 +153,6 @@ set(LIBUNWIND_TEST_COMPILER_FLAGS "" CACHE STRING - "Additional compiler flags for test programs.") - set(LIBUNWIND_TEST_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/test/lit.site.cfg.in" CACHE STRING - "The Lit testing configuration to use when running the tests.") --set(LIBUNWIND_TEST_PARAMS "" CACHE STRING -- "A list of parameters to run the Lit test suite with.") - - if (NOT LIBUNWIND_ENABLE_SHARED AND NOT LIBUNWIND_ENABLE_STATIC) - message(FATAL_ERROR "libunwind must be built as either a shared or static library.") -@@ -113,6 +179,9 @@ set(CMAKE_MODULE_PATH - "${CMAKE_CURRENT_SOURCE_DIR}/cmake" - ${CMAKE_MODULE_PATH}) - -+set(LIBUNWIND_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -+set(LIBUNWIND_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) -+ - if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE) - set(LIBUNWIND_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++) - set(LIBUNWIND_INSTALL_LIBRARY_DIR lib${LLVM_LIBDIR_SUFFIX}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++) diff --git a/deps/unwind.mk b/deps/unwind.mk index 079e4d69b04a3..3951bbf36e22f 100644 --- a/deps/unwind.mk +++ b/deps/unwind.mk @@ -88,40 +88,41 @@ check-unwind: $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-checked LLVMUNWIND_OPTS := $(CMAKE_COMMON) \ -DCMAKE_BUILD_TYPE=MinSizeRel \ -DLIBUNWIND_ENABLE_PEDANTIC=OFF \ - -DLLVM_PATH=$(SRCCACHE)/$(LLVM_SRC_DIR)/llvm + -DLIBUNWIND_INCLUDE_DOCS=OFF \ + -DLIBUNWIND_INCLUDE_TESTS=OFF \ + -DLIBUNWIND_INSTALL_HEADERS=ON \ + -DLIBUNWIND_ENABLE_ASSERTIONS=OFF \ + -DLLVM_CONFIG_PATH=$(build_depsbindir)/llvm-config \ + -DLLVM_PATH=$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/llvm -$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz: | $(SRCCACHE) - $(JLDOWNLOAD) $@ https://github.com/llvm/llvm-project/releases/download/llvmorg-$(LLVMUNWIND_VER)/libunwind-$(LLVMUNWIND_VER).src.tar.xz +$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz: | $(SRCCACHE) + $(JLDOWNLOAD) $@ https://github.com/llvm/llvm-project/releases/download/llvmorg-$(LLVMUNWIND_VER)/llvm-project-$(LLVMUNWIND_VER).src.tar.xz -$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz +$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz $(JLCHECKSUM) $< cd $(dir $<) && $(TAR) xf $< - mv $(SRCCACHE)/libunwind-$(LLVMUNWIND_VER).src $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) + mv $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).src $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER) echo 1 > $@ -$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-prologue-epilogue.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted - cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-prologue-epilogue.patch +$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-prologue-epilogue.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted + cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-prologue-epilogue.patch echo 1 > $@ -$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-force-dwarf.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-prologue-epilogue.patch-applied - cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-force-dwarf.patch +$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-force-dwarf.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-prologue-epilogue.patch-applied + cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-force-dwarf.patch echo 1 > $@ -$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-revert-monorepo-requirement.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-force-dwarf.patch-applied - cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-revert-monorepo-requirement.patch +$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-force-dwarf.patch-applied + cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch echo 1 > $@ -$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-revert-monorepo-requirement.patch-applied - cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch - echo 1 > $@ - -checksum-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz +checksum-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz $(JLCHECKSUM) $< -$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied +$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied mkdir -p $(dir $@) cd $(dir $@) && \ - $(CMAKE) $(dir $<) $(LLVMUNWIND_OPTS) + $(CMAKE) $(dir $<)/libunwind $(LLVMUNWIND_OPTS) echo 1 > $@ $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured @@ -131,7 +132,7 @@ $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled: $(BUILDDIR)/llvmunwind- $(eval $(call staged-install, \ llvmunwind,llvmunwind-$(LLVMUNWIND_VER), \ MAKE_INSTALL,,, \ - cp -fR $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/include/* $(build_includedir))) + cp -fR $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/* $(build_includedir))) clean-llvmunwind: -rm -f $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled @@ -139,14 +140,14 @@ clean-llvmunwind: -$(MAKE) -C $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER) clean distclean-llvmunwind: - rm -rf $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz \ + rm -rf $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz \ $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) \ $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER) -get-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz -extract-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted -configure-llvmunwind: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured -compile-llvmunwind: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled +get-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz +extract-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted +configure-llvmunwind: $(BUILDDIR)/llvm-project-$(LLVMUNWIND_VER)/build-configured +compile-llvmunwind: $(BUILDDIR)/llvm-project-$(LLVMUNWIND_VER)/build-compiled fastcheck-llvmunwind: check-llvmunwind check-llvmunwind: # no test/check provided by Makefile diff --git a/stdlib/LLVMLibUnwind_jll/Project.toml b/stdlib/LLVMLibUnwind_jll/Project.toml index 36c24111d4d31..0cb0fe5440066 100644 --- a/stdlib/LLVMLibUnwind_jll/Project.toml +++ b/stdlib/LLVMLibUnwind_jll/Project.toml @@ -1,6 +1,6 @@ name = "LLVMLibUnwind_jll" uuid = "47c5dbc3-30ba-59ef-96a6-123e260183d9" -version = "12.0.1+0" +version = "14.0.6+0" [deps] Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" From 8547b0250046cf20f762bfd910bf180b757c69a6 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Sat, 7 Sep 2024 06:06:49 +1200 Subject: [PATCH 05/57] Add `JL_DATA_TYPE` for `jl_line_info_node_t` and `jl_code_info_t` (#55698) --- src/julia.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/julia.h b/src/julia.h index efdd6a1b08bf7..abb8a57ff13b0 100644 --- a/src/julia.h +++ b/src/julia.h @@ -234,6 +234,7 @@ JL_DLLEXPORT extern const jl_callptr_t jl_f_opaque_closure_call_addr; JL_DLLEXPORT extern const jl_callptr_t jl_fptr_wait_for_compiled_addr; typedef struct _jl_line_info_node_t { + JL_DATA_TYPE struct _jl_module_t *module; jl_value_t *method; // may contain a jl_symbol, jl_method_t, or jl_method_instance_t jl_sym_t *file; @@ -281,6 +282,7 @@ typedef union __jl_purity_overrides_t { // This type describes a single function body typedef struct _jl_code_info_t { + JL_DATA_TYPE // ssavalue-indexed arrays of properties: jl_array_t *code; // Any array of statements jl_debuginfo_t *debuginfo; // Table of edge data for each statement From 0c9c1e25b7d017250fd5b64255c683b74a168a9b Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Fri, 6 Sep 2024 16:10:50 -0300 Subject: [PATCH 06/57] Canonicalize names of nested functions by keeping a more fine grained counter -- per (module, method name) pair (#53719) As mentioned in https://github.com/JuliaLang/julia/pull/53716, we've been noticing that `precompile` statements lists from one version of our codebase often don't apply cleanly in a slightly different version. That's because a lot of nested and anonymous function names have a global numeric suffix which is incremented every time a new name is generated, and these numeric suffixes are not very stable across codebase changes. To solve this, this PR makes the numeric suffixes a bit more fine grained: every pair of (module, top-level/outermost function name) will have its own counter, which should make nested function names a bit more stable across different versions. This PR applies @JeffBezanson's idea of making the symbol name changes directly in `current-julia-module-counter`. Here is an example: ```Julia julia> function foo(x) function bar(y) return x + y end end foo (generic function with 1 method) julia> f = foo(42) (::var"#bar#foo##0"{Int64}) (generic function with 1 method) ``` --- doc/src/manual/functions.md | 4 +- src/ast.c | 44 ++++++++++++++++-- src/datatype.c | 14 +++--- src/flisp/flisp.h | 2 +- src/julia-syntax.scm | 92 ++++++++++++++++++++++++------------- src/julia_internal.h | 22 ++++++++- test/show.jl | 63 +++++++++++++++++++++++++ 7 files changed, 193 insertions(+), 48 deletions(-) diff --git a/doc/src/manual/functions.md b/doc/src/manual/functions.md index 9a91ea7467750..be81fe529ef7d 100644 --- a/doc/src/manual/functions.md +++ b/doc/src/manual/functions.md @@ -292,12 +292,12 @@ syntaxes: ```jldoctest julia> x -> x^2 + 2x - 1 -#1 (generic function with 1 method) +#2 (generic function with 1 method) julia> function (x) x^2 + 2x - 1 end -#3 (generic function with 1 method) +#5 (generic function with 1 method) ``` Each statement creates a function taking one argument `x` and returning the value of the polynomial `x^2 + diff --git a/src/ast.c b/src/ast.c index 26b95225fbf1c..ea1de429a946c 100644 --- a/src/ast.c +++ b/src/ast.c @@ -7,6 +7,7 @@ #include #include #include + #ifdef _OS_WINDOWS_ #include #endif @@ -215,11 +216,46 @@ static value_t fl_nothrow_julia_global(fl_context_t *fl_ctx, value_t *args, uint decode_restriction_value(pku) : jl_atomic_load_relaxed(&b->value)) != NULL ? fl_ctx->T : fl_ctx->F; } -static value_t fl_current_module_counter(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT +// Used to generate a unique suffix for a given symbol (e.g. variable or type name) +// first argument contains a stack of method definitions seen so far by `closure-convert` in flisp. +// if the top of the stack is non-NIL, we use it to augment the suffix so that it becomes +// of the form $top_level_method_name##$counter, where `counter` is the smallest integer +// such that the resulting name is not already defined in the current module's bindings. +// If the top of the stack is NIL, we simply return the current module's counter. +// This ensures that precompile statements are a bit more stable across different versions +// of a codebase. see #53719 +static value_t fl_module_unique_name(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) { + argcount(fl_ctx, "julia-module-unique-name", nargs, 1); jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx); - assert(ctx->module); - return fixnum(jl_module_next_counter(ctx->module)); + jl_module_t *m = ctx->module; + assert(m != NULL); + // Get the outermost function name from the `parsed_method_stack` top + char *funcname = NULL; + value_t parsed_method_stack = args[0]; + if (parsed_method_stack != fl_ctx->NIL) { + value_t bottom_stack_symbol = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "last")), parsed_method_stack); + funcname = tosymbol(fl_ctx, bottom_stack_symbol, "julia-module-unique-name")->name; + } + size_t sz = funcname != NULL ? strlen(funcname) + 32 : 32; // 32 is enough for the suffix + char *buf = (char*)alloca(sz); + if (funcname != NULL && strchr(funcname, '#') == NULL) { + for (int i = 0; ; i++) { + snprintf(buf, sz, "%s##%d", funcname, i); + jl_sym_t *sym = jl_symbol(buf); + JL_LOCK(&m->lock); + if (jl_get_module_binding(m, sym, 0) == NULL) { // make sure this name is not already taken + jl_get_module_binding(m, sym, 1); // create the binding + JL_UNLOCK(&m->lock); + return symbol(fl_ctx, buf); + } + JL_UNLOCK(&m->lock); + } + } + else { + snprintf(buf, sz, "%d", jl_module_next_counter(m)); + } + return symbol(fl_ctx, buf); } static int jl_is_number(jl_value_t *v) @@ -252,7 +288,7 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m static const builtinspec_t julia_flisp_ast_ext[] = { { "defined-julia-global", fl_defined_julia_global }, // TODO: can we kill this safepoint { "nothrow-julia-global", fl_nothrow_julia_global }, - { "current-julia-module-counter", fl_current_module_counter }, + { "current-julia-module-counter", fl_module_unique_name }, { "julia-scalar?", fl_julia_scalar }, { NULL, NULL } }; diff --git a/src/datatype.c b/src/datatype.c index 1157c1d425cb2..c78b00fdd2245 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -20,23 +20,21 @@ extern "C" { // allocating TypeNames ----------------------------------------------------------- -static int is10digit(char c) JL_NOTSAFEPOINT -{ - return (c >= '0' && c <= '9'); -} - static jl_sym_t *jl_demangle_typename(jl_sym_t *s) JL_NOTSAFEPOINT { char *n = jl_symbol_name(s); if (n[0] != '#') return s; - char *end = strrchr(n, '#'); + char *end = strchr(&n[1], '#'); + // handle `#f...##...#...` + if (end != NULL && end[1] == '#') + end = strchr(&end[2], '#'); int32_t len; - if (end == n || end == n+1) + if (end == NULL || end == n+1) len = strlen(n) - 1; else len = (end-n) - 1; // extract `f` from `#f#...` - if (is10digit(n[1])) + if (isdigit(n[1]) || is_canonicalized_anonfn_typename(n)) return _jl_symbol(n, len+1); return _jl_symbol(&n[1], len); } diff --git a/src/flisp/flisp.h b/src/flisp/flisp.h index 669753a9f5302..f8dd1cfd81ed0 100644 --- a/src/flisp/flisp.h +++ b/src/flisp/flisp.h @@ -158,7 +158,7 @@ value_t fl_cons(fl_context_t *fl_ctx, value_t a, value_t b) JL_NOTSAFEPOINT; value_t fl_list2(fl_context_t *fl_ctx, value_t a, value_t b) JL_NOTSAFEPOINT; value_t fl_listn(fl_context_t *fl_ctx, size_t n, ...) JL_NOTSAFEPOINT; value_t symbol(fl_context_t *fl_ctx, const char *str) JL_NOTSAFEPOINT; -char *symbol_name(fl_context_t *fl_ctx, value_t v); +char *symbol_name(fl_context_t *fl_ctx, value_t v) JL_NOTSAFEPOINT; int fl_is_keyword_name(const char *str, size_t len); value_t alloc_vector(fl_context_t *fl_ctx, size_t n, int init); size_t llength(value_t v); diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index a2d3ffdd66f67..6815921375184 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -225,6 +225,19 @@ (if lb (list lb ub) (list ub)) (if lb (list lb '(core Any)) '()))))) +(define (is-method? x) + (if (and (pair? x) (eq? (car x) 'method)) + (let ((name (cadr x))) + (if (and (pair? name) (eq? (car name) 'globalref)) + (let ((name (caddr name))) + (if (symbol? name) + #t + #f)) + (if (symbol? name) + #t + #f))) + #f)) + (define (method-expr-name m) (let ((name (cadr m))) (cond ((globalref? name) (caddr name)) @@ -372,7 +385,7 @@ (generator (if (expr-contains-p if-generated? body (lambda (x) (not (function-def? x)))) (let* ((gen (generated-version body)) (nongen (non-generated-version body)) - (gname (symbol (string (gensy) "#" (current-julia-module-counter)))) + (gname (symbol (string (gensy) "#" (current-julia-module-counter '())))) (gf (make-generator-function gname names anames gen))) (set! body (insert-after-meta nongen @@ -512,7 +525,7 @@ "" "#") (or und '_) "#" - (string (current-julia-module-counter))))))) + (string (current-julia-module-counter '()))))))) ;; this is a hack: nest these statements inside a call so they get closure ;; converted together, allowing all needed types to be defined before any methods. `(call (core ifelse) (false) (false) (block @@ -1251,7 +1264,7 @@ (list a))) ;; TODO: always use a specific special name like #anon# or _, then ignore ;; this as a local variable name. - (name (symbol (string "#" (current-julia-module-counter))))) + (name (symbol (string "#" (current-julia-module-counter '()))))) (expand-forms `(block (local ,name) (function @@ -3537,9 +3550,9 @@ f(x) = yt(x) (define (clear-capture-bits vinfos) (map vinfo:not-capt vinfos)) -(define (convert-lambda lam fname interp capt-sp opaq) +(define (convert-lambda lam fname interp capt-sp opaq parsed-method-stack) (let ((body (add-box-inits-to-body - lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq (table) (vinfo-to-table (car (lam:vinfo lam))))))) + lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq parsed-method-stack (table) (vinfo-to-table (car (lam:vinfo lam))))))) `(lambda ,(lam:args lam) (,(clear-capture-bits (car (lam:vinfo lam))) () @@ -3614,7 +3627,7 @@ f(x) = yt(x) ;; declared types. ;; when doing this, the original value needs to be preserved, to ;; ensure the expression `a=b` always returns exactly `b`. -(define (convert-assignment var rhs0 fname lam interp opaq globals locals) +(define (convert-assignment var rhs0 fname lam interp opaq parsed-method-stack globals locals) (cond ((symbol? var) (let* ((vi (get locals var #f)) @@ -3632,7 +3645,7 @@ f(x) = yt(x) (equal? rhs0 '(the_exception))) rhs0 (make-ssavalue))) - (rhs (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq (table) locals) #t lam)) + (rhs (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq parsed-method-stack (table) locals) #t lam)) (ex (cond (closed `(call (core setfield!) ,(if interp `($ ,var) @@ -3916,17 +3929,17 @@ f(x) = yt(x) (define (toplevel-preserving? e) (and (pair? e) (memq (car e) '(if elseif block trycatch tryfinally trycatchelse)))) -(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq (globals (table)) (locals (table))) +(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq parsed-method-stack (globals (table)) (locals (table))) (if toplevel (map (lambda (x) (let ((tl (lift-toplevel (cl-convert x fname lam namemap defined (and toplevel (toplevel-preserving? x)) - interp opaq globals locals)))) + interp opaq parsed-method-stack globals locals)))) (if (null? (cdr tl)) (car tl) `(block ,@(cdr tl) ,(car tl))))) exprs) - (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq globals locals)) exprs))) + (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq parsed-method-stack globals locals)) exprs))) (define (prepare-lambda! lam) ;; mark all non-arguments as assigned, since locals that are never assigned @@ -3935,11 +3948,17 @@ f(x) = yt(x) (list-tail (car (lam:vinfo lam)) (length (lam:args lam)))) (lambda-optimize-vars! lam)) -(define (cl-convert e fname lam namemap defined toplevel interp opaq (globals (table)) (locals (table))) +;; must start with a hash and second character must be numeric +(define (anon-function-name? str) + (and (>= (string-length str) 2) + (char=? (string.char str 0) #\#) + (char-numeric? (string.char str 1)))) + +(define (cl-convert- e fname lam namemap defined toplevel interp opaq parsed-method-stack (globals (table)) (locals (table))) (if (and (not lam) (not (and (pair? e) (memq (car e) '(lambda method macro opaque_closure))))) (if (atom? e) e - (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals locals))) + (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))) (cond ((symbol? e) (define (new-undef-var name) @@ -3958,7 +3977,7 @@ f(x) = yt(x) (val (if (equal? typ '(core Any)) val `(call (core typeassert) ,val - ,(cl-convert typ fname lam namemap defined toplevel interp opaq globals locals))))) + ,(cl-convert typ fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))))) `(block ,@(if (eq? box access) '() `((= ,access ,box))) ,undefcheck @@ -3990,8 +4009,8 @@ f(x) = yt(x) e) ((=) (let ((var (cadr e)) - (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq globals locals))) - (convert-assignment var rhs fname lam interp opaq globals locals))) + (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))) + (convert-assignment var rhs fname lam interp opaq parsed-method-stack globals locals))) ((local-def) ;; make new Box for local declaration of defined variable (let ((vi (get locals (cadr e) #f))) (if (and vi (vinfo:asgn vi) (vinfo:capt vi)) @@ -4043,7 +4062,7 @@ f(x) = yt(x) cvs))) `(new_opaque_closure ,(cadr e) ,(or (caddr e) '(call (core apply_type) (core Union))) ,(or (cadddr e) '(core Any)) ,allow-partial - (opaque_closure_method (null) ,nargs ,isva ,functionloc ,(convert-lambda lam2 (car (lam:args lam2)) #f '() (symbol-to-idx-map cvs))) + (opaque_closure_method (null) ,nargs ,isva ,functionloc ,(convert-lambda lam2 (car (lam:args lam2)) #f '() (symbol-to-idx-map cvs) parsed-method-stack)) ,@var-exprs)))) ((method) (let* ((name (method-expr-name e)) @@ -4057,7 +4076,7 @@ f(x) = yt(x) (sp-inits (if (or short (not (eq? (car sig) 'block))) '() (map-cl-convert (butlast (cdr sig)) - fname lam namemap defined toplevel interp opaq globals locals))) + fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))) (sig (and sig (if (eq? (car sig) 'block) (last sig) sig)))) @@ -4084,22 +4103,22 @@ f(x) = yt(x) ;; anonymous functions with keyword args generate global ;; functions that refer to the type of a local function (rename-sig-types sig namemap) - fname lam namemap defined toplevel interp opaq globals locals) + fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals) ,(let ((body (add-box-inits-to-body lam2 - (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq (table) + (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq parsed-method-stack (table) (vinfo-to-table (car (lam:vinfo lam2))))))) `(lambda ,(cadr lam2) (,(clear-capture-bits (car vis)) ,@(cdr vis)) ,body))))) (else - (let* ((exprs (lift-toplevel (convert-lambda lam2 '|#anon| #t '() #f))) + (let* ((exprs (lift-toplevel (convert-lambda lam2 '|#anon| #t '() #f parsed-method-stack))) (top-stmts (cdr exprs)) (newlam (compact-and-renumber (linearize (car exprs)) 'none 0))) `(toplevel-butfirst (block ,@sp-inits - (method ,(cadr e) ,(cl-convert sig fname lam namemap defined toplevel interp opaq globals locals) + (method ,(cadr e) ,(cl-convert sig fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals) ,(julia-bq-macro newlam))) ,@top-stmts)))) @@ -4108,9 +4127,11 @@ f(x) = yt(x) (type-name (or (get namemap name #f) (and name (symbol (string (if (= (string.char (string name) 0) #\#) - "" - "#") - name "#" (current-julia-module-counter)))))) + (if (anon-function-name? (string name)) + (string "#" (current-julia-module-counter parsed-method-stack)) + name) + (string "#" name)) + "#" (current-julia-module-counter parsed-method-stack)))))) (alldefs (expr-find-all (lambda (ex) (and (length> ex 2) (eq? (car ex) 'method) (not (eq? ex e)) @@ -4202,12 +4223,12 @@ f(x) = yt(x) (append (map (lambda (gs tvar) (make-assignment gs `(call (core TypeVar) ',tvar (core Any)))) closure-param-syms closure-param-names) - `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq globals locals) + `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals) ,(convert-lambda lam2 (if iskw (caddr (lam:args lam2)) (car (lam:args lam2))) - #f closure-param-names #f))))))) + #f closure-param-names #f parsed-method-stack))))))) (mk-closure ;; expression to make the closure (let* ((var-exprs (map (lambda (v) (let ((cv (assq v (cadr (lam:vinfo lam))))) @@ -4241,7 +4262,7 @@ f(x) = yt(x) (begin (put! defined name #t) `(toplevel-butfirst - ,(convert-assignment name mk-closure fname lam interp opaq globals locals) + ,(convert-assignment name mk-closure fname lam interp opaq parsed-method-stack globals locals) ,@typedef ,@(map (lambda (v) `(moved-local ,v)) moved-vars) ,@sp-inits @@ -4255,14 +4276,14 @@ f(x) = yt(x) (table) (table) (null? (cadr e)) ;; only toplevel thunks have 0 args - interp opaq globals (vinfo-to-table (car (lam:vinfo e)))))) + interp opaq parsed-method-stack globals (vinfo-to-table (car (lam:vinfo e)))))) `(lambda ,(cadr e) (,(clear-capture-bits (car (lam:vinfo e))) () ,@(cddr (lam:vinfo e))) (block ,@body)))) ;; remaining `::` expressions are type assertions ((|::|) - (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq globals locals)) + (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)) ;; remaining `decl` expressions are only type assertions if the ;; argument is global or a non-symbol. ((decl) @@ -4280,13 +4301,20 @@ f(x) = yt(x) (globaldecl ,ref ,(caddr e)) (null))) `(call (core typeassert) ,@(cdr e)))) - fname lam namemap defined toplevel interp opaq globals locals)))) + fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))) ;; `with-static-parameters` expressions can be removed now; used only by analyze-vars ((with-static-parameters) - (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq globals locals)) + (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)) (else (cons (car e) - (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals locals)))))))) + (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))))))) + +;; wrapper for `cl-convert-` +(define (cl-convert e fname lam namemap defined toplevel interp opaq (parsed-method-stack '()) (globals (table)) (locals (table))) + (if (is-method? e) + (let ((name (method-expr-name e))) + (cl-convert- e fname lam namemap defined toplevel interp opaq (cons name parsed-method-stack) globals locals)) + (cl-convert- e fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))) (define (closure-convert e) (cl-convert e #f #f (table) (table) #f #f #f)) diff --git a/src/julia_internal.h b/src/julia_internal.h index d9e1a078c8a03..ac955e2bcb3a6 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -13,6 +13,7 @@ #include "support/strtod.h" #include "gc-alloc-profiler.h" #include "support/rle.h" +#include #include #include #include @@ -947,12 +948,31 @@ STATIC_INLINE jl_ptr_kind_union_t jl_walk_binding_inplace(jl_binding_t **bnd, jl } #endif +STATIC_INLINE int is10digit(char c) JL_NOTSAFEPOINT +{ + return (c >= '0' && c <= '9'); +} + STATIC_INLINE int is_anonfn_typename(char *name) { if (name[0] != '#' || name[1] == '#') return 0; char *other = strrchr(name, '#'); - return other > &name[1] && other[1] > '0' && other[1] <= '9'; + return other > &name[1] && is10digit(other[1]); +} + +// Returns true for typenames of anounymous functions that have been canonicalized (i.e. +// we mangled the name of the outermost enclosing function in their name). +STATIC_INLINE int is_canonicalized_anonfn_typename(char *name) JL_NOTSAFEPOINT +{ + char *delim = strchr(&name[1], '#'); + if (delim == NULL) + return 0; + if (delim[1] != '#') + return 0; + if (!is10digit(delim[2])) + return 0; + return 1; } // Each tuple can exist in one of 4 Vararg states: diff --git a/test/show.jl b/test/show.jl index 63663152d9d91..65c65111606c5 100644 --- a/test/show.jl +++ b/test/show.jl @@ -755,6 +755,69 @@ end @test startswith(sprint(show, typeof(x->x), context = :module=>@__MODULE__), "var\"") +# PR 53719 +module M53719 + f = x -> x + 1 + function foo(x) + function bar(y) + function baz(z) + return x + y + z + end + return baz + end + return bar + end + function foo2(x) + function bar2(y) + return z -> x + y + z + end + return bar2 + end + lambda1 = (x)->begin + function foo(y) + return x + y + end + return foo + end + lambda2 = (x)->begin + y -> x + y + end +end + +@testset "PR 53719 function names" begin + # M53719.f should be printed as var"#[0-9]+" + @test occursin(r"var\"#[0-9]+", sprint(show, M53719.f, context = :module=>M53719)) + # M53719.foo(1) should be printed as var"#bar" + @test occursin(r"var\"#bar", sprint(show, M53719.foo(1), context = :module=>M53719)) + # M53719.foo(1)(2) should be printed as var"#baz" + @test occursin(r"var\"#baz", sprint(show, M53719.foo(1)(2), context = :module=>M53719)) + # M53719.foo2(1) should be printed as var"#bar2" + @test occursin(r"var\"#bar2", sprint(show, M53719.foo2(1), context = :module=>M53719)) + # M53719.foo2(1)(2) should be printed as var"#foo2##[0-9]+" + @test occursin(r"var\"#foo2##[0-9]+", sprint(show, M53719.foo2(1)(2), context = :module=>M53719)) + # M53719.lambda1(1) should be printed as var"#foo" + @test occursin(r"var\"#foo", sprint(show, M53719.lambda1(1), context = :module=>M53719)) + # M53719.lambda2(1) should be printed as var"#[0-9]+" + @test occursin(r"var\"#[0-9]+", sprint(show, M53719.lambda2(1), context = :module=>M53719)) +end + +@testset "PR 53719 function types" begin + # typeof(M53719.f) should be printed as var"#[0-9]+#[0-9]+" + @test occursin(r"var\"#[0-9]+#[0-9]+", sprint(show, typeof(M53719.f), context = :module=>M53719)) + #typeof(M53719.foo(1)) should be printed as var"#bar#foo##[0-9]+" + @test occursin(r"var\"#bar#foo##[0-9]+", sprint(show, typeof(M53719.foo(1)), context = :module=>M53719)) + #typeof(M53719.foo(1)(2)) should be printed as var"#baz#foo##[0-9]+" + @test occursin(r"var\"#baz#foo##[0-9]+", sprint(show, typeof(M53719.foo(1)(2)), context = :module=>M53719)) + #typeof(M53719.foo2(1)) should be printed as var"#bar2#foo2##[0-9]+" + @test occursin(r"var\"#bar2#foo2##[0-9]+", sprint(show, typeof(M53719.foo2(1)), context = :module=>M53719)) + #typeof(M53719.foo2(1)(2)) should be printed as var"#foo2##[0-9]+#foo2##[0-9]+" + @test occursin(r"var\"#foo2##[0-9]+#foo2##[0-9]+", sprint(show, typeof(M53719.foo2(1)(2)), context = :module=>M53719)) + #typeof(M53719.lambda1(1)) should be printed as var"#foo#[0-9]+" + @test occursin(r"var\"#foo#[0-9]+", sprint(show, typeof(M53719.lambda1(1)), context = :module=>M53719)) + #typeof(M53719.lambda2(1)) should be printed as var"#[0-9]+#[0-9]+" + @test occursin(r"var\"#[0-9]+#[0-9]+", sprint(show, typeof(M53719.lambda2(1)), context = :module=>M53719)) +end + #test methodshow.jl functions @test Base.inbase(Base) @test !Base.inbase(LinearAlgebra) From ac5479946115253f87de1daee467fbc9e3d8e5d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= <765740+giordano@users.noreply.github.com> Date: Fri, 6 Sep 2024 23:17:22 +0100 Subject: [PATCH 07/57] Use `uv_available_parallelism` inside `jl_effective_threads` (#55592) --- deps/checksums/libuv | 68 +++++++++++++++++------------------ deps/libuv.version | 2 +- src/sys.c | 23 +++--------- stdlib/LibUV_jll/Project.toml | 2 +- 4 files changed, 40 insertions(+), 55 deletions(-) diff --git a/deps/checksums/libuv b/deps/checksums/libuv index c2c86a9767463..6887c3fe62f41 100644 --- a/deps/checksums/libuv +++ b/deps/checksums/libuv @@ -1,34 +1,34 @@ -LibUV.v2.0.1+17.aarch64-apple-darwin.tar.gz/md5/f176c76e5e2096dea8443302cf9550b8 -LibUV.v2.0.1+17.aarch64-apple-darwin.tar.gz/sha512/4301b13953a08a758b86e30af3261fd9a291ce3829b4d98e71e2a2c040e322e284c5a6eb4bc7189cc352f4b1cf7041e2cfd3380d511d88c151df3101ad74594e -LibUV.v2.0.1+17.aarch64-linux-gnu.tar.gz/md5/c81515783363702a1bd4b65fd6d7f36b -LibUV.v2.0.1+17.aarch64-linux-gnu.tar.gz/sha512/011429365337f5a45e56ca7a42709866bb994c747a1170d870f5f3ddfff2d36138866ee9278ac01172bc71bde8dee404bcb9cae9c7b44222bf1cc883659df269 -LibUV.v2.0.1+17.aarch64-linux-musl.tar.gz/md5/e74d5ea4912dd326b2705638faa7b805 -LibUV.v2.0.1+17.aarch64-linux-musl.tar.gz/sha512/a26a9f2c9051816230324071c502321f7af3885d581a400615858a93a4cd457226048d15b0e7f6a73d12659763c705b5ab519e9f5b35c6d886b9fd5babbfe352 -LibUV.v2.0.1+17.armv6l-linux-gnueabihf.tar.gz/md5/6df38bcf5d0a61dee63d16b73d0c9a24 -LibUV.v2.0.1+17.armv6l-linux-gnueabihf.tar.gz/sha512/d5354a6532061de0a58965ce0e427bde52f9ae0ee39a98e1a33de4c414fddcba9ba139ddf91be7321a4ccc97bbf7a8a8357ff10cf60f83c0a6bff7d839d6d7a8 -LibUV.v2.0.1+17.armv6l-linux-musleabihf.tar.gz/md5/6f02a24cfbfae3032fadceaea1faed39 -LibUV.v2.0.1+17.armv6l-linux-musleabihf.tar.gz/sha512/7fd107eb9a5ea84b488ea02e4fbedc9fe13bb11be859986a47af38f40ad775dd9f738c790878a3503437bcac1eb26ad9fe26f4aa0d3cb45c980b4c5abc9aec99 -LibUV.v2.0.1+17.armv7l-linux-gnueabihf.tar.gz/md5/96b09dec72f7e9b7409fa2920e67c866 -LibUV.v2.0.1+17.armv7l-linux-gnueabihf.tar.gz/sha512/6a0f79fc15c944fabba5c65180b665bc9769c6ff25863e330049f48b3a4394b448492f5a9a76bb7f8dbd3ce44dfc6f9ccdc2c71c42e1c749e88070fe99b1db69 -LibUV.v2.0.1+17.armv7l-linux-musleabihf.tar.gz/md5/f44e4b2521a813181f943895bdb0dd3c -LibUV.v2.0.1+17.armv7l-linux-musleabihf.tar.gz/sha512/cda1413dca817f772e8b343db0c6de0ef6b8f269e9a6a2ef3403c2582aeab554f46281bbb1eb4659c259198ef47fe26aab648a281e66f80aaf2f2cda0a23ac05 -LibUV.v2.0.1+17.i686-linux-gnu.tar.gz/md5/1f231d89cf9c04515d2d107a5d786cc8 -LibUV.v2.0.1+17.i686-linux-gnu.tar.gz/sha512/089cb8a372cdee0cbc0e78fc201611bb9bafd99af9a78e09d6097a6b70e7c4aa001ebd86f944b0a885c072093c529bf86bcaa32bde4fc1934407a858c1a5a764 -LibUV.v2.0.1+17.i686-linux-musl.tar.gz/md5/01cfc2a9e2536dbd330267917abb19ce -LibUV.v2.0.1+17.i686-linux-musl.tar.gz/sha512/72f3588cb464a60e61f8998242aaa2abdf93df920a2feba5e1d66ef0f2498488df0ec415cbb499d7f75c47bdfc7e3a2fdda6a94383492e0ad13e464eb1314ff8 -LibUV.v2.0.1+17.i686-w64-mingw32.tar.gz/md5/8ba829adad6a45dd71d5f25a7f958e59 -LibUV.v2.0.1+17.i686-w64-mingw32.tar.gz/sha512/dff3b1cfe54e583f8e9536ed02e56180b2cdb391bd108559ed97b2cafa743ebade9ddf04580912436e2efab747e09c79b99e33187ed67a27c5d38113373e1cec -LibUV.v2.0.1+17.powerpc64le-linux-gnu.tar.gz/md5/af0e43d9d0aa91dd82b63220d96991ef -LibUV.v2.0.1+17.powerpc64le-linux-gnu.tar.gz/sha512/9fabe3089e4fc60e910770c32d36300ce8ef36c28e8cc9c72fbecba6eb80285ee8174e84e4452fb4ce90ee7c7f94e99b03fce47d8c579bd614bfffd553f93666 -LibUV.v2.0.1+17.x86_64-apple-darwin.tar.gz/md5/871040e874eedae54553d8f1c91b9133 -LibUV.v2.0.1+17.x86_64-apple-darwin.tar.gz/sha512/d5eee08b65e4bb8b444c61ac277bec9ef944b9279dd7f0732b3cd91d47788c77938e5db71e019e01bbe7785a75df95faf14368764f700c6b7a6b9e4d96d6b4c2 -LibUV.v2.0.1+17.x86_64-linux-gnu.tar.gz/md5/d2d186952c6d017fe33f6a6bea63a3ea -LibUV.v2.0.1+17.x86_64-linux-gnu.tar.gz/sha512/15501534bf5721e6bb668aabe6dc6375349f7a284e28df0609d00982e7e456908bd6868722391afa7f44a5c82faedc8cf544f69a0e4fb9fb0d529b3ae3d44d78 -LibUV.v2.0.1+17.x86_64-linux-musl.tar.gz/md5/271d4d40a1ae53ed5b2376e5936cfcf9 -LibUV.v2.0.1+17.x86_64-linux-musl.tar.gz/sha512/1956f059ed01f66b72349d6561b04e6a89b7257c0f838d7fbdd2cee79bd126bb46b93bf944a042b5a6a235762a7a0cdd117207342dd55a0c58653a70b4a38d48 -LibUV.v2.0.1+17.x86_64-unknown-freebsd.tar.gz/md5/62fe8523948914fbe7e28bf0b8d73594 -LibUV.v2.0.1+17.x86_64-unknown-freebsd.tar.gz/sha512/e6486888028c96975f74bc9313cba9706f6bf2be085aa776c44cbb2886753b2eee62469a0be92eb0542df1d0f51db3b34c7ba5e46842e16c6ff1d20e11b75322 -LibUV.v2.0.1+17.x86_64-w64-mingw32.tar.gz/md5/541601cf27a1d28d25b9ffb00f5aed16 -LibUV.v2.0.1+17.x86_64-w64-mingw32.tar.gz/sha512/58fd5b54694dbddc2abaccfa7291955c75818cdedbd5d9057025bc3e3e6511812e7b03f71cd839c70cd39e77c5ba3e19a07dccd579e2148a2d212fa3829a9a76 -libuv-c57e7f06cbe697ca8ea9215ce054a608c451b193.tar.gz/md5/2b81dbf80d7a9fd10806d1705dbccb7f -libuv-c57e7f06cbe697ca8ea9215ce054a608c451b193.tar.gz/sha512/6796960a58031fa2bc9d72c8f35dff9f213e8d36bbc21ddb65f59bb4dfb074fc18414aece5a046a882dd08b42d5bd32277560c07a1298f68ab8bcd8aadcbf50b +LibUV.v2.0.1+18.aarch64-apple-darwin.tar.gz/md5/f176c76e5e2096dea8443302cf9550b8 +LibUV.v2.0.1+18.aarch64-apple-darwin.tar.gz/sha512/4301b13953a08a758b86e30af3261fd9a291ce3829b4d98e71e2a2c040e322e284c5a6eb4bc7189cc352f4b1cf7041e2cfd3380d511d88c151df3101ad74594e +LibUV.v2.0.1+18.aarch64-linux-gnu.tar.gz/md5/c81515783363702a1bd4b65fd6d7f36b +LibUV.v2.0.1+18.aarch64-linux-gnu.tar.gz/sha512/011429365337f5a45e56ca7a42709866bb994c747a1170d870f5f3ddfff2d36138866ee9278ac01172bc71bde8dee404bcb9cae9c7b44222bf1cc883659df269 +LibUV.v2.0.1+18.aarch64-linux-musl.tar.gz/md5/e74d5ea4912dd326b2705638faa7b805 +LibUV.v2.0.1+18.aarch64-linux-musl.tar.gz/sha512/a26a9f2c9051816230324071c502321f7af3885d581a400615858a93a4cd457226048d15b0e7f6a73d12659763c705b5ab519e9f5b35c6d886b9fd5babbfe352 +LibUV.v2.0.1+18.armv6l-linux-gnueabihf.tar.gz/md5/6df38bcf5d0a61dee63d16b73d0c9a24 +LibUV.v2.0.1+18.armv6l-linux-gnueabihf.tar.gz/sha512/d5354a6532061de0a58965ce0e427bde52f9ae0ee39a98e1a33de4c414fddcba9ba139ddf91be7321a4ccc97bbf7a8a8357ff10cf60f83c0a6bff7d839d6d7a8 +LibUV.v2.0.1+18.armv6l-linux-musleabihf.tar.gz/md5/6f02a24cfbfae3032fadceaea1faed39 +LibUV.v2.0.1+18.armv6l-linux-musleabihf.tar.gz/sha512/7fd107eb9a5ea84b488ea02e4fbedc9fe13bb11be859986a47af38f40ad775dd9f738c790878a3503437bcac1eb26ad9fe26f4aa0d3cb45c980b4c5abc9aec99 +LibUV.v2.0.1+18.armv7l-linux-gnueabihf.tar.gz/md5/96b09dec72f7e9b7409fa2920e67c866 +LibUV.v2.0.1+18.armv7l-linux-gnueabihf.tar.gz/sha512/6a0f79fc15c944fabba5c65180b665bc9769c6ff25863e330049f48b3a4394b448492f5a9a76bb7f8dbd3ce44dfc6f9ccdc2c71c42e1c749e88070fe99b1db69 +LibUV.v2.0.1+18.armv7l-linux-musleabihf.tar.gz/md5/f44e4b2521a813181f943895bdb0dd3c +LibUV.v2.0.1+18.armv7l-linux-musleabihf.tar.gz/sha512/cda1413dca817f772e8b343db0c6de0ef6b8f269e9a6a2ef3403c2582aeab554f46281bbb1eb4659c259198ef47fe26aab648a281e66f80aaf2f2cda0a23ac05 +LibUV.v2.0.1+18.i686-linux-gnu.tar.gz/md5/1f231d89cf9c04515d2d107a5d786cc8 +LibUV.v2.0.1+18.i686-linux-gnu.tar.gz/sha512/089cb8a372cdee0cbc0e78fc201611bb9bafd99af9a78e09d6097a6b70e7c4aa001ebd86f944b0a885c072093c529bf86bcaa32bde4fc1934407a858c1a5a764 +LibUV.v2.0.1+18.i686-linux-musl.tar.gz/md5/01cfc2a9e2536dbd330267917abb19ce +LibUV.v2.0.1+18.i686-linux-musl.tar.gz/sha512/72f3588cb464a60e61f8998242aaa2abdf93df920a2feba5e1d66ef0f2498488df0ec415cbb499d7f75c47bdfc7e3a2fdda6a94383492e0ad13e464eb1314ff8 +LibUV.v2.0.1+18.i686-w64-mingw32.tar.gz/md5/8c6599aab9ed4c46e52f03683aac664e +LibUV.v2.0.1+18.i686-w64-mingw32.tar.gz/sha512/13f0565f7244a8bcf1ab43fac91a856dc86d214877033a3cefee8c2179c1a275dfd7dda32e9017763acac2ba42ab6799934a58f5feaa38fb6cf2253dd713f57a +LibUV.v2.0.1+18.powerpc64le-linux-gnu.tar.gz/md5/af0e43d9d0aa91dd82b63220d96991ef +LibUV.v2.0.1+18.powerpc64le-linux-gnu.tar.gz/sha512/9fabe3089e4fc60e910770c32d36300ce8ef36c28e8cc9c72fbecba6eb80285ee8174e84e4452fb4ce90ee7c7f94e99b03fce47d8c579bd614bfffd553f93666 +LibUV.v2.0.1+18.x86_64-apple-darwin.tar.gz/md5/871040e874eedae54553d8f1c91b9133 +LibUV.v2.0.1+18.x86_64-apple-darwin.tar.gz/sha512/d5eee08b65e4bb8b444c61ac277bec9ef944b9279dd7f0732b3cd91d47788c77938e5db71e019e01bbe7785a75df95faf14368764f700c6b7a6b9e4d96d6b4c2 +LibUV.v2.0.1+18.x86_64-linux-gnu.tar.gz/md5/d2d186952c6d017fe33f6a6bea63a3ea +LibUV.v2.0.1+18.x86_64-linux-gnu.tar.gz/sha512/15501534bf5721e6bb668aabe6dc6375349f7a284e28df0609d00982e7e456908bd6868722391afa7f44a5c82faedc8cf544f69a0e4fb9fb0d529b3ae3d44d78 +LibUV.v2.0.1+18.x86_64-linux-musl.tar.gz/md5/271d4d40a1ae53ed5b2376e5936cfcf9 +LibUV.v2.0.1+18.x86_64-linux-musl.tar.gz/sha512/1956f059ed01f66b72349d6561b04e6a89b7257c0f838d7fbdd2cee79bd126bb46b93bf944a042b5a6a235762a7a0cdd117207342dd55a0c58653a70b4a38d48 +LibUV.v2.0.1+18.x86_64-unknown-freebsd.tar.gz/md5/62fe8523948914fbe7e28bf0b8d73594 +LibUV.v2.0.1+18.x86_64-unknown-freebsd.tar.gz/sha512/e6486888028c96975f74bc9313cba9706f6bf2be085aa776c44cbb2886753b2eee62469a0be92eb0542df1d0f51db3b34c7ba5e46842e16c6ff1d20e11b75322 +LibUV.v2.0.1+18.x86_64-w64-mingw32.tar.gz/md5/ae103f24b6e1830cdbe02143826fe551 +LibUV.v2.0.1+18.x86_64-w64-mingw32.tar.gz/sha512/f814085c135815947f342ff24fa0e1015e283ccece84a5b8dd5ccec0f5928a129e5fd79100a33b131376ad696f70b5acadcc5a02a7e6544635ecf7e18003ba1c +libuv-af4172ec713ee986ba1a989b9e33993a07c60c9e.tar.gz/md5/c1a7d3c74ef3999052f3bfe426264353 +libuv-af4172ec713ee986ba1a989b9e33993a07c60c9e.tar.gz/sha512/a3f16863b711ddeeb5ab8d135d7df7a4be19cc2b9821fc78c8cd3ba421231d39b7d8bd9965321455094fda01584842a58f60612d93082b4fe32210b8aa44d999 diff --git a/deps/libuv.version b/deps/libuv.version index db6afd9f1bb51..ebfc63927d9db 100644 --- a/deps/libuv.version +++ b/deps/libuv.version @@ -6,4 +6,4 @@ LIBUV_JLL_NAME := LibUV ## source build LIBUV_VER := 2 LIBUV_BRANCH=julia-uv2-1.48.0 -LIBUV_SHA1=c57e7f06cbe697ca8ea9215ce054a608c451b193 +LIBUV_SHA1=af4172ec713ee986ba1a989b9e33993a07c60c9e diff --git a/src/sys.c b/src/sys.c index 107a8f7637763..712d232da363a 100644 --- a/src/sys.c +++ b/src/sys.c @@ -478,25 +478,10 @@ JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT JL_DLLEXPORT int jl_effective_threads(void) JL_NOTSAFEPOINT { - int cpu = jl_cpu_threads(); - int masksize = uv_cpumask_size(); - if (masksize < 0 || jl_running_under_rr(0)) - return cpu; - uv_thread_t tid = uv_thread_self(); - char *cpumask = (char *)calloc(masksize, sizeof(char)); - int err = uv_thread_getaffinity(&tid, cpumask, masksize); - if (err) { - free(cpumask); - jl_safe_printf("WARNING: failed to get thread affinity (%s %d)\n", uv_err_name(err), - err); - return cpu; - } - int n = 0; - for (size_t i = 0; i < masksize; i++) { - n += cpumask[i]; - } - free(cpumask); - return n < cpu ? n : cpu; + // We want the more conservative estimate of the two. + int cpu_threads = jl_cpu_threads(); + int available_parallelism = uv_available_parallelism(); + return available_parallelism < cpu_threads ? available_parallelism : cpu_threads; } diff --git a/stdlib/LibUV_jll/Project.toml b/stdlib/LibUV_jll/Project.toml index 0c1ad8f25f848..fb03c6b996048 100644 --- a/stdlib/LibUV_jll/Project.toml +++ b/stdlib/LibUV_jll/Project.toml @@ -1,6 +1,6 @@ name = "LibUV_jll" uuid = "183b4373-6708-53ba-ad28-60e28bb38547" -version = "2.0.1+17" +version = "2.0.1+18" [deps] Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" From fa1c6b211e2b13dd46221aac0c02791aa8ba34d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= <765740+giordano@users.noreply.github.com> Date: Sat, 7 Sep 2024 13:04:14 +0100 Subject: [PATCH 08/57] [LinearAlgebra] Initialise number of BLAS threads with `jl_effective_threads` (#55574) This is a safer estimate than `Sys.CPU_THREADS` to avoid oversubscribing the machine when running distributed applications, or when the Julia process is constrained by external controls (`taskset`, `cgroups`, etc.). Fix #55572 --- NEWS.md | 2 ++ stdlib/LinearAlgebra/src/LinearAlgebra.jl | 4 ++-- test/threads.jl | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 95a8a51c67ac8..c12cc3c64300c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -128,6 +128,8 @@ Standard library changes between different eigendecomposition algorithms ([#49355]). * Added a generic version of the (unblocked) pivoted Cholesky decomposition (callable via `cholesky[!](A, RowMaximum())`) ([#54619]). +* The number of default BLAS threads now respects process affinity, instead of + using total number of logical threads available on the system ([#55574]). #### Logging diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl index be59516f086ab..27d4255fb656b 100644 --- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl +++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl @@ -843,9 +843,9 @@ function __init__() # https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables if !haskey(ENV, "OPENBLAS_NUM_THREADS") && !haskey(ENV, "GOTO_NUM_THREADS") && !haskey(ENV, "OMP_NUM_THREADS") @static if Sys.isapple() && Base.BinaryPlatforms.arch(Base.BinaryPlatforms.HostPlatform()) == "aarch64" - BLAS.set_num_threads(max(1, Sys.CPU_THREADS)) + BLAS.set_num_threads(max(1, @ccall(jl_effective_threads()::Cint))) else - BLAS.set_num_threads(max(1, Sys.CPU_THREADS ÷ 2)) + BLAS.set_num_threads(max(1, @ccall(jl_effective_threads()::Cint) ÷ 2)) end end end diff --git a/test/threads.jl b/test/threads.jl index 2832f2a0e972c..6265368c2ac79 100644 --- a/test/threads.jl +++ b/test/threads.jl @@ -359,3 +359,18 @@ end @test jl_setaffinity(0, mask, cpumasksize) == 0 end end + +# Make sure default number of BLAS threads respects CPU affinity: issue #55572. +@testset "LinearAlgebra number of default threads" begin + if AFFINITY_SUPPORTED + allowed_cpus = findall(uv_thread_getaffinity()) + cmd = addenv(`$(Base.julia_cmd()) --startup-file=no -E 'using LinearAlgebra; BLAS.get_num_threads()'`, + # Remove all variables which could affect the default number of threads + "OPENBLAS_NUM_THREADS"=>nothing, + "GOTO_NUM_THREADS"=>nothing, + "OMP_NUM_THREADS"=>nothing) + for n in 1:min(length(allowed_cpus), 8) # Cap to 8 to avoid too many tests on large systems + @test readchomp(setcpuaffinity(cmd, allowed_cpus[1:n])) == string(max(1, n ÷ 2)) + end + end +end From e95eeddb30737962791f51e637dabdfe38c1a9bc Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Sat, 7 Sep 2024 09:43:58 -0400 Subject: [PATCH 09/57] Artifacts: Improve type-stability (#55707) This improves Artifacts.jl to make `artifact"..."` fully type-stable, so that it can be used with `--trim`. This is a requirement for JLL support w/ trimmed executables. Dependent on https://github.com/JuliaLang/julia/pull/55016 --------- Co-authored-by: Gabriel Baraldi --- base/loading.jl | 1 - base/toml_parser.jl | 100 ++++++++++++++++-------------- stdlib/Artifacts/src/Artifacts.jl | 30 +++++---- stdlib/TOML/test/values.jl | 2 +- 4 files changed, 68 insertions(+), 65 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 4dc735f0099d8..e0a2563dd0178 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -269,7 +269,6 @@ struct TOMLCache{Dates} d::Dict{String, CachedTOMLDict} end TOMLCache(p::TOML.Parser) = TOMLCache(p, Dict{String, CachedTOMLDict}()) -# TODO: Delete this converting constructor once Pkg stops using it TOMLCache(p::TOML.Parser, d::Dict{String, Dict{String, Any}}) = TOMLCache(p, convert(Dict{String, CachedTOMLDict}, d)) const TOML_CACHE = TOMLCache(TOML.Parser{nothing}()) diff --git a/base/toml_parser.jl b/base/toml_parser.jl index cc1455f61928b..4d07cfed05d8a 100644 --- a/base/toml_parser.jl +++ b/base/toml_parser.jl @@ -84,9 +84,6 @@ mutable struct Parser{Dates} # Filled in in case we are parsing a file to improve error messages filepath::Union{String, Nothing} - - # Optionally populate with the Dates stdlib to change the type of Date types returned - Dates::Union{Module, Nothing} # TODO: remove once Pkg is updated end function Parser{Dates}(str::String; filepath=nothing) where {Dates} @@ -106,8 +103,7 @@ function Parser{Dates}(str::String; filepath=nothing) where {Dates} IdSet{Any}(), # static_arrays IdSet{TOMLDict}(), # defined_tables root, - filepath, - nothing + filepath ) startup(l) return l @@ -495,8 +491,10 @@ function recurse_dict!(l::Parser, d::Dict, dotted_keys::AbstractVector{String}, d = d::TOMLDict key = dotted_keys[i] d = get!(TOMLDict, d, key) - if d isa Vector + if d isa Vector{Any} d = d[end] + elseif d isa Vector + return ParserError(ErrKeyAlreadyHasValue) end check && @try check_allowed_add_key(l, d, i == length(dotted_keys)) end @@ -537,7 +535,7 @@ function parse_array_table(l)::Union{Nothing, ParserError} end d = @try recurse_dict!(l, l.root, @view(table_key[1:end-1]), false) k = table_key[end] - old = get!(() -> [], d, k) + old = get!(() -> Any[], d, k) if old isa Vector if old in l.static_arrays return ParserError(ErrAddArrayToStaticArray) @@ -546,7 +544,7 @@ function parse_array_table(l)::Union{Nothing, ParserError} return ParserError(ErrArrayTreatedAsDictionary) end d_new = TOMLDict() - push!(old, d_new) + push!(old::Vector{Any}, d_new) push!(l.defined_tables, d_new) l.active_table = d_new @@ -668,41 +666,20 @@ end # Array # ######### -function push!!(v::Vector, el) - # Since these types are typically non-inferable, they are a big invalidation risk, - # and since it's used by the package-loading infrastructure the cost of invalidation - # is high. Therefore, this is written to reduce the "exposed surface area": e.g., rather - # than writing `T[el]` we write it as `push!(Vector{T}(undef, 1), el)` so that there - # is no ambiguity about what types of objects will be created. - T = eltype(v) - t = typeof(el) - if el isa T || t === T - push!(v, el::T) - return v - elseif T === Union{} - out = Vector{t}(undef, 1) - out[1] = el - return out - else - if T isa Union - newT = Any - else - newT = Union{T, typeof(el)} - end - new = Array{newT}(undef, length(v)) - copy!(new, v) - return push!(new, el) +function copyto_typed!(a::Vector{T}, b::Vector) where T + for i in 1:length(b) + a[i] = b[i]::T end + return nothing end -function parse_array(l::Parser)::Err{Vector} +function parse_array(l::Parser{Dates})::Err{Vector} where Dates skip_ws_nl(l) - array = Vector{Union{}}() + array = Vector{Any}() empty_array = accept(l, ']') while !empty_array v = @try parse_value(l) - # TODO: Worth to function barrier this? - array = push!!(array, v) + array = push!(array, v) # There can be an arbitrary number of newlines and comments before a value and before the closing bracket. skip_ws_nl(l) comma = accept(l, ',') @@ -712,8 +689,40 @@ function parse_array(l::Parser)::Err{Vector} return ParserError(ErrExpectedCommaBetweenItemsArray) end end - push!(l.static_arrays, array) - return array + # check for static type throughout array + T = !isempty(array) ? typeof(array[1]) : Union{} + for el in array + if typeof(el) != T + T = Any + break + end + end + if T === Any + new = array + elseif T === String + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === Bool + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === Int64 + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === UInt64 + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === Float64 + new = Array{T}(undef, length(array)) + copyto_typed!(new, array) + elseif T === Union{} + new = Any[] + elseif (T === TOMLDict) || (T == BigInt) || (T === UInt128) || (T === Int128) || (T <: Vector) || + (T === Dates.Date) || (T === Dates.Time) || (T === Dates.DateTime) + # do nothing, leave as Vector{Any} + new = array + else @assert false end + push!(l.static_arrays, new) + return new end @@ -1025,10 +1034,9 @@ function parse_datetime(l) end function try_return_datetime(p::Parser{Dates}, year, month, day, h, m, s, ms) where Dates - if Dates !== nothing || p.Dates !== nothing - mod = Dates !== nothing ? Dates : p.Dates + if Dates !== nothing try - return mod.DateTime(year, month, day, h, m, s, ms) + return Dates.DateTime(year, month, day, h, m, s, ms) catch ex ex isa ArgumentError && return ParserError(ErrParsingDateTime) rethrow() @@ -1039,10 +1047,9 @@ function try_return_datetime(p::Parser{Dates}, year, month, day, h, m, s, ms) wh end function try_return_date(p::Parser{Dates}, year, month, day) where Dates - if Dates !== nothing || p.Dates !== nothing - mod = Dates !== nothing ? Dates : p.Dates + if Dates !== nothing try - return mod.Date(year, month, day) + return Dates.Date(year, month, day) catch ex ex isa ArgumentError && return ParserError(ErrParsingDateTime) rethrow() @@ -1062,10 +1069,9 @@ function parse_local_time(l::Parser) end function try_return_time(p::Parser{Dates}, h, m, s, ms) where Dates - if Dates !== nothing || p.Dates !== nothing - mod = Dates !== nothing ? Dates : p.Dates + if Dates !== nothing try - return mod.Time(h, m, s, ms) + return Dates.Time(h, m, s, ms) catch ex ex isa ArgumentError && return ParserError(ErrParsingDateTime) rethrow() diff --git a/stdlib/Artifacts/src/Artifacts.jl b/stdlib/Artifacts/src/Artifacts.jl index bfc884cc30634..9bca72f6c7a14 100644 --- a/stdlib/Artifacts/src/Artifacts.jl +++ b/stdlib/Artifacts/src/Artifacts.jl @@ -175,13 +175,11 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any} end end - overrides = Dict{Symbol,Any}( - # Overrides by UUID - :UUID => overrides_uuid, - - # Overrides by hash - :hash => overrides_hash - ) + overrides = Dict{Symbol,Any}() + # Overrides by UUID + overrides[:UUID] = overrides_uuid + # Overrides by hash + overrides[:hash] = overrides_hash ARTIFACT_OVERRIDES[] = overrides return overrides @@ -351,7 +349,7 @@ function process_overrides(artifact_dict::Dict, pkg_uuid::Base.UUID) # If we've got a platform-specific friend, override all hashes: artifact_dict_name = artifact_dict[name] - if isa(artifact_dict_name, Array) + if isa(artifact_dict_name, Vector{Any}) for entry in artifact_dict_name entry = entry::Dict{String,Any} hash = SHA1(entry["git-tree-sha1"]::String) @@ -544,7 +542,7 @@ function jointail(dir, tail) end end -function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, @nospecialize(lazyartifacts)) +function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, ::Val{LazyArtifacts}) where LazyArtifacts pkg = Base.PkgId(__module__) if pkg.uuid !== nothing # Process overrides for this UUID, if we know what it is @@ -563,11 +561,11 @@ function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dic # If not, try determining what went wrong: meta = artifact_meta(name, artifact_dict, artifacts_toml; platform) if meta !== nothing && get(meta, "lazy", false) - if lazyartifacts isa Module && isdefined(lazyartifacts, :ensure_artifact_installed) - if nameof(lazyartifacts) in (:Pkg, :Artifacts) + if LazyArtifacts isa Module && isdefined(LazyArtifacts, :ensure_artifact_installed) + if nameof(LazyArtifacts) in (:Pkg, :Artifacts) Base.depwarn("using Pkg instead of using LazyArtifacts is deprecated", :var"@artifact_str", force=true) end - return jointail(lazyartifacts.ensure_artifact_installed(string(name), meta, artifacts_toml; platform), path_tail) + return jointail(LazyArtifacts.ensure_artifact_installed(string(name), meta, artifacts_toml; platform), path_tail) end error("Artifact $(repr(name)) is a lazy artifact; package developers must call `using LazyArtifacts` in $(__module__) before using lazy artifacts.") end @@ -699,10 +697,10 @@ macro artifact_str(name, platform=nothing) # Check if the user has provided `LazyArtifacts`, and thus supports lazy artifacts # If not, check to see if `Pkg` or `Pkg.Artifacts` has been imported. - lazyartifacts = nothing + LazyArtifacts = nothing for module_name in (:LazyArtifacts, :Pkg, :Artifacts) if isdefined(__module__, module_name) - lazyartifacts = GlobalRef(__module__, module_name) + LazyArtifacts = GlobalRef(__module__, module_name) break end end @@ -714,7 +712,7 @@ macro artifact_str(name, platform=nothing) platform = HostPlatform() artifact_name, artifact_path_tail, hash = artifact_slash_lookup(name, artifact_dict, artifacts_toml, platform) return quote - Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), $(artifact_name), $(artifact_path_tail), $(artifact_dict), $(hash), $(platform), $(lazyartifacts))::String + Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), $(artifact_name), $(artifact_path_tail), $(artifact_dict), $(hash), $(platform), Val($(LazyArtifacts)))::String end else if platform === nothing @@ -723,7 +721,7 @@ macro artifact_str(name, platform=nothing) return quote local platform = $(esc(platform)) local artifact_name, artifact_path_tail, hash = artifact_slash_lookup($(esc(name)), $(artifact_dict), $(artifacts_toml), platform) - Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), artifact_name, artifact_path_tail, $(artifact_dict), hash, platform, $(lazyartifacts))::String + Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), artifact_name, artifact_path_tail, $(artifact_dict), hash, platform, Val($(LazyArtifacts)))::String end end end diff --git a/stdlib/TOML/test/values.jl b/stdlib/TOML/test/values.jl index 4fc49d47fc98d..53be1b04708b3 100644 --- a/stdlib/TOML/test/values.jl +++ b/stdlib/TOML/test/values.jl @@ -172,6 +172,6 @@ end @testset "Array" begin @test testval("[1,2,3]", Int64[1,2,3]) @test testval("[1.0, 2.0, 3.0]", Float64[1.0, 2.0, 3.0]) - @test testval("[1.0, 2.0, 3]", Union{Int64, Float64}[1.0, 2.0, Int64(3)]) + @test testval("[1.0, 2.0, 3]", Any[1.0, 2.0, Int64(3)]) @test testval("[1.0, 2, \"foo\"]", Any[1.0, Int64(2), "foo"]) end From 8cae8d138b8105e5aa3b96cbe40713a97690e7d4 Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Sun, 8 Sep 2024 00:09:16 +0530 Subject: [PATCH 10/57] Remove redundant conversion in structured matrix broadcasting (#55695) The additional construction is unnecessary, as we are already constructing a `Matrix`. Performance: ```julia julia> using LinearAlgebra julia> U = UpperTriangular(rand(1000,1000)); julia> L = LowerTriangular(rand(1000,1000)); julia> @btime $U .+ $L; 1.956 ms (6 allocations: 15.26 MiB) # nightly 1.421 ms (3 allocations: 7.63 MiB) # This PR ``` --- stdlib/LinearAlgebra/src/structuredbroadcast.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/LinearAlgebra/src/structuredbroadcast.jl b/stdlib/LinearAlgebra/src/structuredbroadcast.jl index 3c60b37959f91..0c06f84116fc7 100644 --- a/stdlib/LinearAlgebra/src/structuredbroadcast.jl +++ b/stdlib/LinearAlgebra/src/structuredbroadcast.jl @@ -96,7 +96,7 @@ structured_broadcast_alloc(bc, ::Type{UnitLowerTriangular}, ::Type{ElType}, n) w structured_broadcast_alloc(bc, ::Type{UnitUpperTriangular}, ::Type{ElType}, n) where {ElType} = UnitUpperTriangular(Array{ElType}(undef, n, n)) structured_broadcast_alloc(bc, ::Type{Matrix}, ::Type{ElType}, n) where {ElType} = - Matrix(Array{ElType}(undef, n, n)) + Array{ElType}(undef, n, n) # A _very_ limited list of structure-preserving functions known at compile-time. This list is # derived from the formerly-implemented `broadcast` methods in 0.6. Note that this must From 4f0a333d9d76df76a6383ed2113e66c789d5ecee Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Sat, 7 Sep 2024 18:16:22 -0400 Subject: [PATCH 11/57] [Profile] fix threading issue (#55704) I forgot about the existence of threads, so had hard-coded this to only support one thread. Clearly that is not sufficient though, so use the semaphore here as it is intended to be used. Fixes #55703 --------- Co-authored-by: Ian Butterworth --- src/signals-unix.c | 24 ++++++++++-------------- stdlib/Profile/test/runtests.jl | 3 ++- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/signals-unix.c b/src/signals-unix.c index d0885b6bdee3f..f99eca31730b6 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -426,6 +426,7 @@ pthread_mutex_t in_signal_lock; // shared with jl_delete_thread static bt_context_t *signal_context; // protected by in_signal_lock static int exit_signal_cond = -1; static int signal_caught_cond = -1; +static int signals_inflight = 0; int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) { @@ -438,7 +439,7 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) pthread_mutex_unlock(&in_signal_lock); return 0; } - if (jl_atomic_load(&ptls2->signal_request) != 0) { + while (signals_inflight) { // something is wrong, or there is already a usr2 in flight elsewhere // try to wait for it to finish or wait for timeout struct pollfd event = {signal_caught_cond, POLLIN, 0}; @@ -450,25 +451,16 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) pthread_mutex_unlock(&in_signal_lock); return 0; } - } - // check for any stale signal_caught_cond events - struct pollfd event = {signal_caught_cond, POLLIN, 0}; - do { - err = poll(&event, 1, 0); - } while (err == -1 && errno == EINTR); - if (err == -1) { - pthread_mutex_unlock(&in_signal_lock); - return 0; - } - if ((event.revents & POLLIN) != 0) { // consume it before continuing eventfd_t got; do { err = read(signal_caught_cond, &got, sizeof(eventfd_t)); } while (err == -1 && errno == EINTR); if (err != sizeof(eventfd_t)) abort(); - assert(got == 1); (void) got; + assert(signals_inflight >= got); + signals_inflight -= got; } + signals_inflight++; sig_atomic_t request = jl_atomic_exchange(&ptls2->signal_request, 1); assert(request == 0 || request == -1); request = 1; @@ -485,6 +477,7 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) if (err == -1) { // not ready after timeout: try to cancel this request if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) { + signals_inflight--; pthread_mutex_unlock(&in_signal_lock); return 0; } @@ -494,7 +487,9 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx) err = read(signal_caught_cond, &got, sizeof(eventfd_t)); } while (err == -1 && errno == EINTR); if (err != sizeof(eventfd_t)) abort(); - assert(got == 1); (void) got; + assert(signals_inflight >= got); + signals_inflight -= got; + signals_inflight++; // Now the other thread is waiting on exit_signal_cond (verify that here by // checking it is 0, and add an acquire barrier for good measure) request = jl_atomic_load_acquire(&ptls2->signal_request); @@ -521,6 +516,7 @@ static void jl_try_deliver_sigint(void) jl_safepoint_enable_sigint(); jl_wake_libuv(); pthread_mutex_lock(&in_signal_lock); + signals_inflight++; jl_atomic_store_release(&ptls2->signal_request, 2); // This also makes sure `sleep` is aborted. pthread_kill(ptls2->system_id, SIGUSR2); diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl index 32d628130c4ac..1769cbd12da3e 100644 --- a/stdlib/Profile/test/runtests.jl +++ b/stdlib/Profile/test/runtests.jl @@ -168,7 +168,8 @@ let cmd = Base.julia_cmd() println("done") print(Profile.len_data()) """ - p = open(`$cmd -e $script`) + # use multiple threads here to ensure that profiling works with threading + p = open(`$cmd -t2 -e $script`) t = Timer(120) do t # should be under 10 seconds, so give it 2 minutes then report failure println("KILLING debuginfo registration test BY PROFILE TEST WATCHDOG\n") From 8173750700d21a6c35b47e76075eb84a9cb8a393 Mon Sep 17 00:00:00 2001 From: Neven Sajko Date: Sun, 8 Sep 2024 12:12:38 +0200 Subject: [PATCH 12/57] delete flaky ranges/`TwicePrecision` test (#55712) Fixes #55710 --- test/ranges.jl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/test/ranges.jl b/test/ranges.jl index 16b2c6bf7b77b..86cd1c3f2345c 100644 --- a/test/ranges.jl +++ b/test/ranges.jl @@ -292,15 +292,10 @@ end rand_twiceprecision(::Type{T}) where {T<:Number} = Base.TwicePrecision{T}(rand(widen(T))) - rand_twiceprecision_is_ok(::Type{T}) where {T<:Number} = @test !iszero(rand_twiceprecision(T).lo) - # For this test the `BigFloat` mantissa needs to be just a bit # larger than the `Float64` mantissa setprecision(BigFloat, 70) do n = 10 - @testset "rand twiceprecision is ok" for T ∈ (Float32, Float64), i ∈ 1:n - rand_twiceprecision_is_ok(T) - end @testset "twiceprecision roundtrip is not lossy 1" for i ∈ 1:n twiceprecision_roundtrip_is_not_lossy(Float64, rand(BigFloat)) end From 95c643a689293eb91a47cc83c41533a94c3677cc Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Sun, 8 Sep 2024 18:12:38 +0530 Subject: [PATCH 13/57] Avoid stack overflow in triangular eigvecs (#55497) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a stack overflow in ```julia julia> using LinearAlgebra, StaticArrays julia> U = UpperTriangular(SMatrix{2,2}(1:4)) 2×2 UpperTriangular{Int64, SMatrix{2, 2, Int64, 4}} with indices SOneTo(2)×SOneTo(2): 1 3 ⋅ 4 julia> eigvecs(U) Warning: detected a stack overflow; program state may be corrupted, so further execution might be unreliable. ERROR: StackOverflowError: Stacktrace: [1] eigvecs(A::UpperTriangular{Float32, SMatrix{2, 2, Float32, 4}}) (repeats 79984 times) @ LinearAlgebra ~/.julia/juliaup/julia-nightly/share/julia/stdlib/v1.12/LinearAlgebra/src/triangular.jl:2749 ``` After this, ```julia julia> eigvecs(U) 2×2 Matrix{Float32}: 1.0 1.0 0.0 1.0 ``` --- stdlib/LinearAlgebra/src/triangular.jl | 8 ++++++++ stdlib/LinearAlgebra/test/triangular.jl | 16 ++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/stdlib/LinearAlgebra/src/triangular.jl b/stdlib/LinearAlgebra/src/triangular.jl index 71473e0dc1174..df0d0d4fd0d8b 100644 --- a/stdlib/LinearAlgebra/src/triangular.jl +++ b/stdlib/LinearAlgebra/src/triangular.jl @@ -2782,6 +2782,14 @@ end # Generic eigensystems eigvals(A::AbstractTriangular) = diag(A) +# fallback for unknown types +function eigvecs(A::AbstractTriangular{<:BlasFloat}) + if istriu(A) + eigvecs(UpperTriangular(Matrix(A))) + else # istril(A) + eigvecs(LowerTriangular(Matrix(A))) + end +end function eigvecs(A::AbstractTriangular{T}) where T TT = promote_type(T, Float32) if TT <: BlasFloat diff --git a/stdlib/LinearAlgebra/test/triangular.jl b/stdlib/LinearAlgebra/test/triangular.jl index 8748d11bd1da4..a09d0092e9f39 100644 --- a/stdlib/LinearAlgebra/test/triangular.jl +++ b/stdlib/LinearAlgebra/test/triangular.jl @@ -1198,6 +1198,22 @@ end end end +@testset "eigvecs for AbstractTriangular" begin + S = SizedArrays.SizedArray{(3,3)}(reshape(1:9,3,3)) + for T in (UpperTriangular, UnitUpperTriangular, + LowerTriangular, UnitLowerTriangular) + U = T(S) + V = eigvecs(U) + λ = eigvals(U) + @test U * V ≈ V * Diagonal(λ) + + MU = MyTriangular(U) + V = eigvecs(U) + λ = eigvals(U) + @test MU * V ≈ V * Diagonal(λ) + end +end + @testset "(l/r)mul! and (l/r)div! for generic triangular" begin @testset for T in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular) M = MyTriangular(T(rand(4,4))) From 527fa1c1cd4335074e775bde686dbcbdf921c177 Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Mon, 9 Sep 2024 01:17:29 -0400 Subject: [PATCH 14/57] builtins: add `Core.throw_methoderror` (#55705) This allows us to simulate/mark calls that are known-to-fail. Required for https://github.com/JuliaLang/julia/pull/54972/ --- base/compiler/abstractinterpretation.jl | 13 +++++++++++++ .../compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl | 1 + base/compiler/tfuncs.jl | 4 ++++ src/builtin_proto.h | 1 + src/builtins.c | 9 +++++++++ src/gf.c | 4 ++-- src/julia_internal.h | 2 +- src/staticdata.c | 2 +- 8 files changed, 32 insertions(+), 4 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 8623a32ddbb2b..83a39ce10d891 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -2226,6 +2226,17 @@ function abstract_throw(interp::AbstractInterpreter, argtypes::Vector{Any}, ::Ab return CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()) end +function abstract_throw_methoderror(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState) + exct = if length(argtypes) == 1 + ArgumentError + elseif !isvarargtype(argtypes[2]) + MethodError + else + tmerge(𝕃ᵢ, MethodError, ArgumentError) + end + return CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()) +end + # call where the function is known exactly function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState, @@ -2246,6 +2257,8 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), return abstract_applicable(interp, argtypes, sv, max_methods) elseif f === throw return abstract_throw(interp, argtypes, sv) + elseif f === Core.throw_methoderror + return abstract_throw_methoderror(interp, argtypes, sv) end rt = abstract_call_builtin(interp, f, arginfo, sv) ft = popfirst!(argtypes) diff --git a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl index f74cb90e6ab51..6967efe495be1 100644 --- a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl +++ b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl @@ -1213,6 +1213,7 @@ escape_builtin!(::typeof(Core.donotdelete), _...) = false # not really safe, but `ThrownEscape` will be imposed later escape_builtin!(::typeof(isdefined), _...) = false escape_builtin!(::typeof(throw), _...) = false +escape_builtin!(::typeof(Core.throw_methoderror), _...) = false function escape_builtin!(::typeof(ifelse), astate::AnalysisState, pc::Int, args::Vector{Any}) length(args) == 4 || return false diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index 0c57c04a6ddea..64a93bd07c2fa 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -89,6 +89,7 @@ function add_tfunc(@nospecialize(f::Builtin), minarg::Int, maxarg::Int, @nospeci end add_tfunc(throw, 1, 1, @nospecs((𝕃::AbstractLattice, x)->Bottom), 0) +add_tfunc(Core.throw_methoderror, 1, INT_INF, @nospecs((𝕃::AbstractLattice, x)->Bottom), 0) # the inverse of typeof_tfunc # returns (type, isexact, isconcrete, istype) @@ -2313,6 +2314,7 @@ const _CONSISTENT_BUILTINS = Any[ (<:), typeassert, throw, + Core.throw_methoderror, setfield!, donotdelete ] @@ -2335,6 +2337,7 @@ const _EFFECT_FREE_BUILTINS = [ (<:), typeassert, throw, + Core.throw_methoderror, getglobal, compilerbarrier, ] @@ -2350,6 +2353,7 @@ const _INACCESSIBLEMEM_BUILTINS = Any[ isa, nfields, throw, + Core.throw_methoderror, tuple, typeassert, typeof, diff --git a/src/builtin_proto.h b/src/builtin_proto.h index 8b97c46df72da..7fbd555758675 100644 --- a/src/builtin_proto.h +++ b/src/builtin_proto.h @@ -69,6 +69,7 @@ DECLARE_BUILTIN(svec); DECLARE_BUILTIN(swapfield); DECLARE_BUILTIN(swapglobal); DECLARE_BUILTIN(throw); +DECLARE_BUILTIN(throw_methoderror); DECLARE_BUILTIN(tuple); DECLARE_BUILTIN(typeassert); DECLARE_BUILTIN(typeof); diff --git a/src/builtins.c b/src/builtins.c index 152836bcab6a9..75c4d02c898b2 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -580,6 +580,14 @@ JL_CALLABLE(jl_f_throw) return jl_nothing; } +JL_CALLABLE(jl_f_throw_methoderror) +{ + JL_NARGSV(throw_methoderror, 1); + size_t world = jl_get_tls_world_age(); + jl_method_error(args[0], &args[1], nargs, world); + return jl_nothing; +} + JL_CALLABLE(jl_f_ifelse) { JL_NARGS(ifelse, 3, 3); @@ -2437,6 +2445,7 @@ void jl_init_primitives(void) JL_GC_DISABLED add_builtin_func("_compute_sparams", jl_f__compute_sparams); add_builtin_func("_svec_ref", jl_f__svec_ref); add_builtin_func("current_scope", jl_f_current_scope); + add_builtin_func("throw_methoderror", jl_f_throw_methoderror); // builtin types add_builtin("Any", (jl_value_t*)jl_any_type); diff --git a/src/gf.c b/src/gf.c index 95bab0d0f832e..970cb62b8a862 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2335,7 +2335,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method JL_GC_POP(); } -static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args, size_t world) +static void JL_NORETURN jl_method_error_bare(jl_value_t *f, jl_value_t *args, size_t world) { if (jl_methoderror_type) { jl_value_t *e = jl_new_struct_uninit(jl_methoderror_type); @@ -2360,7 +2360,7 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args, // not reached } -void JL_NORETURN jl_method_error(jl_function_t *f, jl_value_t **args, size_t na, size_t world) +void JL_NORETURN jl_method_error(jl_value_t *f, jl_value_t **args, size_t na, size_t world) { jl_value_t *argtup = jl_f_tuple(NULL, args, na - 1); JL_GC_PUSH1(&argtup); diff --git a/src/julia_internal.h b/src/julia_internal.h index ac955e2bcb3a6..f00667d016796 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -705,7 +705,7 @@ int jl_valid_type_param(jl_value_t *v); JL_DLLEXPORT jl_value_t *jl_apply_2va(jl_value_t *f, jl_value_t **args, uint32_t nargs); -void JL_NORETURN jl_method_error(jl_function_t *f, jl_value_t **args, size_t na, size_t world); +void JL_NORETURN jl_method_error(jl_value_t *F, jl_value_t **args, size_t na, size_t world); JL_DLLEXPORT jl_value_t *jl_get_exceptionf(jl_datatype_t *exception_type, const char *fmt, ...); JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t); diff --git a/src/staticdata.c b/src/staticdata.c index 6dfe5e91a9c55..b991dfe8f37f3 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -497,7 +497,7 @@ static htable_t relocatable_ext_cis; // (reverse of fptr_to_id) // This is a manually constructed dual of the fvars array, which would be produced by codegen for Julia code, for C. static const jl_fptr_args_t id_to_fptrs[] = { - &jl_f_throw, &jl_f_is, &jl_f_typeof, &jl_f_issubtype, &jl_f_isa, + &jl_f_throw, &jl_f_throw_methoderror, &jl_f_is, &jl_f_typeof, &jl_f_issubtype, &jl_f_isa, &jl_f_typeassert, &jl_f__apply_iterate, &jl_f__apply_pure, &jl_f__call_latest, &jl_f__call_in_world, &jl_f__call_in_world_total, &jl_f_isdefined, &jl_f_tuple, &jl_f_svec, &jl_f_intrinsic_call, From 5272dad1f8f688b24a4cd85ce02b1d57cc7840b7 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Mon, 9 Sep 2024 10:01:16 -0400 Subject: [PATCH 15/57] Small missing tests for Irrationals (#55657) Looks like a bunch of methods for `Irrational`s are tested but not picked up by coverage... --- test/numbers.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/numbers.jl b/test/numbers.jl index 34e775f9b2eea..fc3dc2c06bb7c 100644 --- a/test/numbers.jl +++ b/test/numbers.jl @@ -1158,6 +1158,8 @@ end end @testset "Irrationals compared with Rationals and Floats" begin + @test pi != Float64(pi) + @test Float64(pi) != pi @test Float64(pi,RoundDown) < pi @test Float64(pi,RoundUp) > pi @test !(Float64(pi,RoundDown) > pi) @@ -1176,6 +1178,7 @@ end @test nextfloat(big(pi)) > pi @test !(prevfloat(big(pi)) > pi) @test !(nextfloat(big(pi)) < pi) + @test big(typeof(pi)) == BigFloat @test 2646693125139304345//842468587426513207 < pi @test !(2646693125139304345//842468587426513207 > pi) From 169e9e8de1b1429e8efbacfd6274c2a5399989bf Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 9 Sep 2024 12:10:33 -0300 Subject: [PATCH 16/57] Implement faster thread local rng for scheduler (#55501) Implement optimal uniform random number generator using the method proposed in https://github.com/swiftlang/swift/pull/39143 based on OpenSSL's implementation of it in https://github.com/openssl/openssl/blob/1d2cbd9b5a126189d5e9bc78a3bdb9709427d02b/crypto/rand/rand_uniform.c#L13-L99 This PR also fixes some bugs found while developing it. This is a replacement for https://github.com/JuliaLang/julia/pull/50203 and fixes the issues found by @IanButterworth with both rngs C rng image New scheduler rng image ~On my benchmarks the julia implementation seems to be almost 50% faster than the current implementation.~ With oscars suggestion of removing the debiasing this is now almost 5x faster than the original implementation. And almost fully branchless We might want to backport the two previous commits since they technically fix bugs. --------- Co-authored-by: Valentin Churavy --- base/partr.jl | 55 ++++++++++++++++++++++++++++++++++++++- src/ccall.cpp | 32 +++++++++++++++++++++++ src/jl_exported_funcs.inc | 2 ++ src/julia_threads.h | 2 ++ src/scheduler.c | 9 ------- src/threading.c | 12 +++++++++ 6 files changed, 102 insertions(+), 10 deletions(-) diff --git a/base/partr.jl b/base/partr.jl index 8c95e3668ee74..6053a584af5ba 100644 --- a/base/partr.jl +++ b/base/partr.jl @@ -20,7 +20,60 @@ const heaps = [Vector{taskheap}(undef, 0), Vector{taskheap}(undef, 0)] const heaps_lock = [SpinLock(), SpinLock()] -cong(max::UInt32) = iszero(max) ? UInt32(0) : ccall(:jl_rand_ptls, UInt32, (UInt32,), max) + UInt32(1) +""" + cong(max::UInt32) + +Return a random UInt32 in the range `1:max` except if max is 0, in that case return 0. +""" +cong(max::UInt32) = iszero(max) ? UInt32(0) : rand_ptls(max) + UInt32(1) #TODO: make sure users don't use 0 and remove this check + +get_ptls_rng() = ccall(:jl_get_ptls_rng, UInt64, ()) + +set_ptls_rng(seed::UInt64) = ccall(:jl_set_ptls_rng, Cvoid, (UInt64,), seed) + +""" + rand_ptls(max::UInt32) + +Return a random UInt32 in the range `0:max-1` using the thread-local RNG +state. Max must be greater than 0. +""" +Base.@assume_effects :removable :inaccessiblememonly :notaskstate function rand_ptls(max::UInt32) + rngseed = get_ptls_rng() + val, seed = rand_uniform_max_int32(max, rngseed) + set_ptls_rng(seed) + return val % UInt32 +end + +# This implementation is based on OpenSSLs implementation of rand_uniform +# https://github.com/openssl/openssl/blob/1d2cbd9b5a126189d5e9bc78a3bdb9709427d02b/crypto/rand/rand_uniform.c#L13-L99 +# Comments are vendored from their implementation as well. +# For the original developer check the PR to swift https://github.com/apple/swift/pull/39143. + +# Essentially it boils down to incrementally generating a fixed point +# number on the interval [0, 1) and multiplying this number by the upper +# range limit. Once it is certain what the fractional part contributes to +# the integral part of the product, the algorithm has produced a definitive +# result. +""" + rand_uniform_max_int32(max::UInt32, seed::UInt64) + +Return a random UInt32 in the range `0:max-1` using the given seed. +Max must be greater than 0. +""" +Base.@assume_effects :total function rand_uniform_max_int32(max::UInt32, seed::UInt64) + if max == UInt32(1) + return UInt32(0), seed + end + # We are generating a fixed point number on the interval [0, 1). + # Multiplying this by the range gives us a number on [0, upper). + # The high word of the multiplication result represents the integral part + # This is not completely unbiased as it's missing the fractional part of the original implementation but it's good enough for our purposes + seed = UInt64(69069) * seed + UInt64(362437) + prod = (UInt64(max)) * (seed % UInt32) # 64 bit product + i = prod >> 32 % UInt32 # integral part + return i % UInt32, seed +end + function multiq_sift_up(heap::taskheap, idx::Int32) diff --git a/src/ccall.cpp b/src/ccall.cpp index 36808e13fdbf9..7ab8cfa974d6f 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -22,6 +22,8 @@ TRANSFORMED_CCALL_STAT(jl_cpu_wake); TRANSFORMED_CCALL_STAT(jl_gc_safepoint); TRANSFORMED_CCALL_STAT(jl_get_ptls_states); TRANSFORMED_CCALL_STAT(jl_threadid); +TRANSFORMED_CCALL_STAT(jl_get_ptls_rng); +TRANSFORMED_CCALL_STAT(jl_set_ptls_rng); TRANSFORMED_CCALL_STAT(jl_get_tls_world_age); TRANSFORMED_CCALL_STAT(jl_get_world_counter); TRANSFORMED_CCALL_STAT(jl_gc_enable_disable_finalizers_internal); @@ -1692,6 +1694,36 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) ai.decorateInst(tid); return mark_or_box_ccall_result(ctx, tid, retboxed, rt, unionall, static_rt); } + else if (is_libjulia_func(jl_get_ptls_rng)) { + ++CCALL_STAT(jl_get_ptls_rng); + assert(lrt == getInt64Ty(ctx.builder.getContext())); + assert(!isVa && !llvmcall && nccallargs == 0); + JL_GC_POP(); + Value *ptls_p = get_current_ptls(ctx); + const int rng_offset = offsetof(jl_tls_states_t, rngseed); + Value *rng_ptr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptls_p, ConstantInt::get(ctx.types().T_size, rng_offset / sizeof(int8_t))); + setName(ctx.emission_context, rng_ptr, "rngseed_ptr"); + LoadInst *rng_value = ctx.builder.CreateAlignedLoad(getInt64Ty(ctx.builder.getContext()), rng_ptr, Align(sizeof(void*))); + setName(ctx.emission_context, rng_value, "rngseed"); + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe); + ai.decorateInst(rng_value); + return mark_or_box_ccall_result(ctx, rng_value, retboxed, rt, unionall, static_rt); + } + else if (is_libjulia_func(jl_set_ptls_rng)) { + ++CCALL_STAT(jl_set_ptls_rng); + assert(lrt == getVoidTy(ctx.builder.getContext())); + assert(!isVa && !llvmcall && nccallargs == 1); + JL_GC_POP(); + Value *ptls_p = get_current_ptls(ctx); + const int rng_offset = offsetof(jl_tls_states_t, rngseed); + Value *rng_ptr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptls_p, ConstantInt::get(ctx.types().T_size, rng_offset / sizeof(int8_t))); + setName(ctx.emission_context, rng_ptr, "rngseed_ptr"); + assert(argv[0].V->getType() == getInt64Ty(ctx.builder.getContext())); + auto store = ctx.builder.CreateAlignedStore(argv[0].V, rng_ptr, Align(sizeof(void*))); + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe); + ai.decorateInst(store); + return ghostValue(ctx, jl_nothing_type); + } else if (is_libjulia_func(jl_get_tls_world_age)) { bool toplevel = !(ctx.linfo && jl_is_method(ctx.linfo->def.method)); if (!toplevel) { // top level code does not see a stable world age during execution diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 7f1636ad9ad80..7abf2b055bb8c 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -452,6 +452,8 @@ XX(jl_test_cpu_feature) \ XX(jl_threadid) \ XX(jl_threadpoolid) \ + XX(jl_get_ptls_rng) \ + XX(jl_set_ptls_rng) \ XX(jl_throw) \ XX(jl_throw_out_of_memory_error) \ XX(jl_too_few_args) \ diff --git a/src/julia_threads.h b/src/julia_threads.h index 7c6de1896ca13..b697a0bf030ed 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -18,6 +18,8 @@ extern "C" { JL_DLLEXPORT int16_t jl_threadid(void); JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT; +JL_DLLEXPORT uint64_t jl_get_ptls_rng(void) JL_NOTSAFEPOINT; +JL_DLLEXPORT void jl_set_ptls_rng(uint64_t new_seed) JL_NOTSAFEPOINT; // JULIA_ENABLE_THREADING may be controlled by altering JULIA_THREADS in Make.user diff --git a/src/scheduler.c b/src/scheduler.c index bd7da13aa42e3..bb2f85b52283f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -84,15 +84,6 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT; -// parallel task runtime -// --- - -JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) -{ - jl_ptls_t ptls = jl_current_task->ptls; - return cong(max, &ptls->rngseed); -} - // initialize the threading infrastructure // (called only by the main thread) void jl_init_threadinginfra(void) diff --git a/src/threading.c b/src/threading.c index 2f3719b89fac3..44b1192528531 100644 --- a/src/threading.c +++ b/src/threading.c @@ -314,6 +314,18 @@ JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT return -1; // everything else uses threadpool -1 (does not belong to any threadpool) } +// get thread local rng +JL_DLLEXPORT uint64_t jl_get_ptls_rng(void) JL_NOTSAFEPOINT +{ + return jl_current_task->ptls->rngseed; +} + +// get thread local rng +JL_DLLEXPORT void jl_set_ptls_rng(uint64_t new_seed) JL_NOTSAFEPOINT +{ + jl_current_task->ptls->rngseed = new_seed; +} + jl_ptls_t jl_init_threadtls(int16_t tid) { #ifndef _OS_WINDOWS_ From 1463c9991c4a75730e4d6822313180ca323241c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1ll=20Haraldsson?= Date: Mon, 9 Sep 2024 17:57:18 +0000 Subject: [PATCH 17/57] Add precompile signatures to Markdown to reduce latency. (#55715) Fixes #55706 that is seemingly a 4472x regression, not just 16x (was my first guess, based on CondaPkg, also fixes or greatly mitigates https://github.com/JuliaPy/CondaPkg.jl/issues/145), and large part of 3x regression for PythonCall. --------- Co-authored-by: Kristoffer Carlsson --- stdlib/Markdown/src/Markdown.jl | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/stdlib/Markdown/src/Markdown.jl b/stdlib/Markdown/src/Markdown.jl index b9ff56297fe51..1832e3a6a6956 100644 --- a/stdlib/Markdown/src/Markdown.jl +++ b/stdlib/Markdown/src/Markdown.jl @@ -122,4 +122,25 @@ import Base.Docs: catdoc catdoc(md::MD...) = MD(md...) +if Base.generating_output() + # workload to reduce latency + md""" + # H1 + ## H2 + ### H3 + **bold text** + *italicized text* + > blockquote + 1. First item + 2. Second item + 3. Third item + - First item + - Second item + - Third item + `code` + Horizontal Rule + --- + """ +end + end From 68feddc7c89b7f328eb72e336c7dbe51ac47b626 Mon Sep 17 00:00:00 2001 From: Nathan Zimmerberg <39104088+nhz2@users.noreply.github.com> Date: Mon, 9 Sep 2024 14:23:38 -0400 Subject: [PATCH 18/57] Fix invalidations for FileIO (#55593) Fixes https://github.com/JuliaIO/FileIO.jl/issues/396 --- base/strings/io.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/base/strings/io.jl b/base/strings/io.jl index acbd945c8e137..754e058cd2f54 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -365,7 +365,8 @@ function _join_preserve_annotations(iterator, args...) # in nature, we extract an `AnnotatedString`, otherwise we just extract # a plain `String` from `io`. if isconcretetype(et) || !isempty(io.annotations) - read(seekstart(io), AnnotatedString{String}) + seekstart(io) + read(io, AnnotatedString{String}) else String(take!(io.io)) end From 88c90caaaff52963eb640ac2978e7b921e2dabdb Mon Sep 17 00:00:00 2001 From: Zentrik Date: Mon, 9 Sep 2024 21:19:30 +0100 Subject: [PATCH 19/57] Fix various issues with PGO+LTO makefile (#55581) This fixes various issues with the PGO+LTO makefile - `USECCACHE` doesn't work throwing an error at https://github.com/JuliaLang/julia/blob/eb5587dac02d1f6edf486a71b95149139cc5d9f7/Make.inc#L734 This is because setting `CC` and `CCX` by passing them as arguments to `make` prevents `Make.inc` from prepending these variables with `ccache` as `Make.inc` doesn't use override. To workaround this I instead set `USECLANG` and add the toolchain to the `PATH`. - To deal with similar issues for the other make flags, I pass them as environment variables which can be edited in `Make.inc`. - I add a way to build in one go by creating the `all` target, now you can just run `make` and a PGO+LTO build that profiles Julia's build will be generated. - I workaround `PROFRAW_FILES` not being reevaluated after `stage1` builds, this caused the generation of `PROFILE_FILE` to run an outdated command if `stage1` was built and affected the profraw files. This is important when building in one go. - I add a way to run rules like `binary-dist` which are not defined in this makefile with the correct toolchain which for example prevents `make binary-dist` from unnecessarily rebuilding `sys.ji`. - Include `-Wl,--undefined-version` till https://github.com/JuliaLang/julia/issues/54533 gets fixed. These changes need to be copied to the PGO+LTO+BOLT makefile and some to the BOLT makefile in a later pr. --------- Co-authored-by: Zentrik --- contrib/pgo-lto/Makefile | 42 +++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/contrib/pgo-lto/Makefile b/contrib/pgo-lto/Makefile index 896d41ac2e286..ddd86f5d5b39a 100644 --- a/contrib/pgo-lto/Makefile +++ b/contrib/pgo-lto/Makefile @@ -8,7 +8,6 @@ STAGE0_TOOLS:=$(STAGE0_BUILD)/usr/tools/ PROFILE_DIR:=$(CURDIR)/profiles PROFILE_FILE:=$(PROFILE_DIR)/merged.prof -PROFRAW_FILES:=$(wildcard $(PROFILE_DIR)/*.profraw) JULIA_ROOT:=$(CURDIR)/../.. LLVM_CXXFILT:=$(STAGE0_TOOLS)llvm-cxxfilt @@ -26,15 +25,16 @@ AFTER_STAGE1_MESSAGE:='You can now optionally collect more profiling data for us Note that running extensive scripts may result in counter overflows, which can be detected by running $\ `make top`. Afterwards run `make stage2`.' -TOOLCHAIN_FLAGS = $\ - "CC=$(STAGE0_TOOLS)clang" $\ - "CXX=$(STAGE0_TOOLS)clang++" $\ - "LD=$(STAGE0_TOOLS)ld.lld" $\ - "AR=$(STAGE0_TOOLS)llvm-ar" $\ - "RANLIB=$(STAGE0_TOOLS)llvm-ranlib" $\ - "CFLAGS+=$(PGO_CFLAGS)" $\ - "CXXFLAGS+=$(PGO_CXXFLAGS)" $\ - "LDFLAGS+=$(PGO_LDFLAGS)" +STAGE1_FLAGS:=LDFLAGS="-fuse-ld=lld -flto=thin -Wl,--undefined-version -fprofile-generate=$(PROFILE_DIR)" $\ + CFLAGS="-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)" $\ + CXXFLAGS="-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)" +STAGE2_FLAGS:=LDFLAGS="-fuse-ld=lld -flto=thin -Wl,--undefined-version -fprofile-use=$(PROFILE_FILE) -Wl,--icf=safe" $\ + CFLAGS="-fprofile-use=$(PROFILE_FILE)" $\ + CXXFLAGS="-fprofile-use=$(PROFILE_FILE)" + +COMMON_FLAGS:=USECLANG=1 USE_BINARYBUILDER_LLVM=0 + +all: stage2 # Default target as first in file $(STAGE0_BUILD) $(STAGE1_BUILD) $(STAGE2_BUILD): $(MAKE) -C $(JULIA_ROOT) O=$@ configure @@ -48,26 +48,20 @@ stage0: | $(STAGE0_BUILD) touch $@ $(STAGE1_BUILD): stage0 -stage1: PGO_CFLAGS:=-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE) -stage1: PGO_CXXFLAGS:=-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE) -stage1: PGO_LDFLAGS:=-fuse-ld=lld -flto=thin -fprofile-generate=$(PROFILE_DIR) -stage1: export USE_BINARYBUILDER_LLVM=0 stage1: | $(STAGE1_BUILD) - $(MAKE) -C $(STAGE1_BUILD) $(TOOLCHAIN_FLAGS) && touch $@ + @echo "--- Build Julia Stage 1 - with instrumentation" + PATH=$(STAGE0_TOOLS):$$PATH $(STAGE1_FLAGS) $(MAKE) -C $(STAGE1_BUILD) $(COMMON_FLAGS) && touch $@ @echo $(AFTER_STAGE1_MESSAGE) -stage2: PGO_CFLAGS:=-fprofile-use=$(PROFILE_FILE) -stage2: PGO_CXXFLAGS:=-fprofile-use=$(PROFILE_FILE) -stage2: PGO_LDFLAGS:=-fuse-ld=lld -flto=thin -fprofile-use=$(PROFILE_FILE) -Wl,--icf=safe -stage2: export USE_BINARYBUILDER_LLVM=0 stage2: $(PROFILE_FILE) | $(STAGE2_BUILD) - $(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) && touch $@ + @echo "--- Build Julia Stage 2 - PGO + LTO optimised" + PATH=$(STAGE0_TOOLS):$$PATH $(STAGE2_FLAGS) $(MAKE) -C $(STAGE2_BUILD) $(COMMON_FLAGS) && touch $@ -install: stage2 - $(MAKE) -C $(STAGE2_BUILD) USE_BINARYBUILDER_LLVM=0 install +.DEFAULT: stage2 + PATH=$(STAGE0_TOOLS):$$PATH $(STAGE2_FLAGS) $(MAKE) -C $(STAGE2_BUILD) $(COMMON_FLAGS) $@ -$(PROFILE_FILE): stage1 $(PROFRAW_FILES) - $(LLVM_PROFDATA) merge -output=$@ $(PROFRAW_FILES) +$(PROFILE_FILE): stage1 $(wildcard $(PROFILE_DIR)/*.profraw) + $(LLVM_PROFDATA) merge -output=$@ $(PROFILE_DIR)/*.profraw # show top 50 functions top: $(PROFILE_FILE) From 99b8868ff563eded17376b3f90ffc1637222f2c6 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Mon, 9 Sep 2024 21:01:08 -0400 Subject: [PATCH 20/57] Fix `pkgdir` for extensions (#55720) Fixes https://github.com/JuliaLang/julia/issues/55719 --------- Co-authored-by: Max Horn <241512+fingolfin@users.noreply.github.com> --- base/loading.jl | 16 +++++++++++++++- test/loading.jl | 19 ++++++++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index e0a2563dd0178..fe86a8c198461 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -508,6 +508,8 @@ package root. To get the root directory of the package that implements the current module the form `pkgdir(@__MODULE__)` can be used. +If an extension module is given, the root of the parent package is returned. + ```julia-repl julia> pkgdir(Foo) "/path/to/Foo.jl" @@ -525,7 +527,19 @@ function pkgdir(m::Module, paths::String...) rootmodule = moduleroot(m) path = pathof(rootmodule) path === nothing && return nothing - return joinpath(dirname(dirname(path)), paths...) + original = path + path, base = splitdir(dirname(path)) + if base == "src" + # package source in `../src/Foo.jl` + elseif base == "ext" + # extension source in `../ext/FooExt.jl` + elseif basename(path) == "ext" + # extension source in `../ext/FooExt/FooExt.jl` + path = dirname(path) + else + error("Unexpected path structure for module source: $original") + end + return joinpath(path, paths...) end function get_pkgversion_from_path(path) diff --git a/test/loading.jl b/test/loading.jl index fe6f800276547..8db8405ef2a83 100644 --- a/test/loading.jl +++ b/test/loading.jl @@ -1034,6 +1034,16 @@ end end @testset "Extensions" begin + test_ext = """ + function test_ext(parent::Module, ext::Symbol) + _ext = Base.get_extension(parent, ext) + _ext isa Module || error("expected extension \$ext to be loaded") + _pkgdir = pkgdir(_ext) + _pkgdir == pkgdir(parent) != nothing || error("unexpected extension \$ext pkgdir path: \$_pkgdir") + _pkgversion = pkgversion(_ext) + _pkgversion == pkgversion(parent) || error("unexpected extension \$ext version: \$_pkgversion") + end + """ depot_path = mktempdir() try proj = joinpath(@__DIR__, "project", "Extensions", "HasDepWithExtensions.jl") @@ -1044,6 +1054,7 @@ end cmd = """ $load_distr begin + $ew $test_ext $ew push!(empty!(DEPOT_PATH), $(repr(depot_path))) using HasExtensions $ew using HasExtensions @@ -1051,6 +1062,7 @@ end $ew HasExtensions.ext_loaded && error("ext_loaded set") using HasDepWithExtensions $ew using HasDepWithExtensions + $ew test_ext(HasExtensions, :Extension) $ew Base.get_extension(HasExtensions, :Extension).extvar == 1 || error("extvar in Extension not set") $ew HasExtensions.ext_loaded || error("ext_loaded not set") $ew HasExtensions.ext_folder_loaded && error("ext_folder_loaded set") @@ -1102,13 +1114,14 @@ end test_ext_proj = """ begin + $test_ext using HasExtensions using ExtDep - Base.get_extension(HasExtensions, :Extension) isa Module || error("expected extension to load") + test_ext(HasExtensions, :Extension) using ExtDep2 - Base.get_extension(HasExtensions, :ExtensionFolder) isa Module || error("expected extension to load") + test_ext(HasExtensions, :ExtensionFolder) using ExtDep3 - Base.get_extension(HasExtensions, :ExtensionDep) isa Module || error("expected extension to load") + test_ext(HasExtensions, :ExtensionDep) end """ for compile in (`--compiled-modules=no`, ``) From 7a645dd23af4491d1d03f4f1cba5d46d8268677d Mon Sep 17 00:00:00 2001 From: Jishnu Bhattacharya Date: Tue, 10 Sep 2024 12:09:54 +0530 Subject: [PATCH 21/57] Avoid materializing arrays in bidiag matmul (#55450) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, small `Bidiagonal`/`Tridiagonal` matrices are materialized in matrix multiplications, but this is wasteful and unnecessary. This PR changes this to use a naive matrix multiplication for small matrices, and fall back to the banded multiplication for larger ones. Multiplication by a `Bidiagonal` falls back to a banded matrix multiplication for all sizes in the current implementation, and iterates in a cache-friendly manner for the non-`Bidiagonal` matrix. In certain cases, the matrices were being materialized if the non-structured matrix was small, even if the structured matrix was large. This is changed as well in this PR. Some improvements in performance: ```julia julia> B = Bidiagonal(rand(3), rand(2), :U); A = rand(size(B)...); C = similar(A); julia> @btime mul!($C, $A, $B); 193.152 ns (6 allocations: 352 bytes) # nightly v"1.12.0-DEV.1034" 18.826 ns (0 allocations: 0 bytes) # This PR julia> T = Tridiagonal(rand(99), rand(100), rand(99)); A = rand(2, size(T,2)); C = similar(A); julia> @btime mul!($C, $A, $T); 9.398 μs (8 allocations: 79.94 KiB) # nightly 416.407 ns (0 allocations: 0 bytes) # This PR julia> B = Bidiagonal(rand(300), rand(299), :U); A = rand(20000, size(B,2)); C = similar(A); julia> @btime mul!($C, $A, $B); 33.395 ms (0 allocations: 0 bytes) # nightly 6.695 ms (0 allocations: 0 bytes) # This PR (cache-friendly) ``` Closes https://github.com/JuliaLang/julia/pull/55414 --------- Co-authored-by: Daniel Karrasch --- stdlib/LinearAlgebra/src/LinearAlgebra.jl | 4 +- stdlib/LinearAlgebra/src/bidiag.jl | 330 +++++++++++++++++++--- stdlib/LinearAlgebra/test/bidiag.jl | 85 ++++-- stdlib/LinearAlgebra/test/tridiag.jl | 71 +++++ 4 files changed, 422 insertions(+), 68 deletions(-) diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl index 27d4255fb656b..17216845b350c 100644 --- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl +++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl @@ -673,7 +673,9 @@ matprod_dest(A::Diagonal, B::Diagonal, TS) = _matprod_dest_diag(B, TS) _matprod_dest_diag(A, TS) = similar(A, TS) function _matprod_dest_diag(A::SymTridiagonal, TS) n = size(A, 1) - Tridiagonal(similar(A, TS, n-1), similar(A, TS, n), similar(A, TS, n-1)) + ev = similar(A, TS, max(0, n-1)) + dv = similar(A, TS, n) + Tridiagonal(ev, dv, similar(ev)) end # Special handling for adj/trans vec diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl index d86bad7e41435..8bc5b1c47f366 100644 --- a/stdlib/LinearAlgebra/src/bidiag.jl +++ b/stdlib/LinearAlgebra/src/bidiag.jl @@ -557,7 +557,8 @@ end # function to get the internally stored vectors for Bidiagonal and [Sym]Tridiagonal # to avoid allocations in _mul! below (#24324, #24578) _diag(A::Tridiagonal, k) = k == -1 ? A.dl : k == 0 ? A.d : A.du -_diag(A::SymTridiagonal, k) = k == 0 ? A.dv : A.ev +_diag(A::SymTridiagonal{<:Number}, k) = k == 0 ? A.dv : A.ev +_diag(A::SymTridiagonal, k) = k == 0 ? view(A, diagind(A, IndexStyle(A))) : view(A, diagind(A, 1, IndexStyle(A))) function _diag(A::Bidiagonal, k) if k == 0 return A.dv @@ -577,12 +578,45 @@ function _bibimul!(C, A, B, _add) check_A_mul_B!_sizes(size(C), size(A), size(B)) n = size(A,1) iszero(n) && return C - n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta) + if n <= 3 + # naive multiplication + for I in CartesianIndices(C) + _modify!(_add, sum(A[I[1], k] * B[k, I[2]] for k in axes(A,2)), C, I) + end + return C + end # We use `_rmul_or_fill!` instead of `_modify!` here since using # `_modify!` in the following loop will not update the # off-diagonal elements for non-zero beta. _rmul_or_fill!(C, _add.beta) iszero(_add.alpha) && return C + @inbounds begin + # first column of C + C[1,1] += _add(A[1,1]*B[1,1] + A[1, 2]*B[2,1]) + C[2,1] += _add(A[2,1]*B[1,1] + A[2,2]*B[2,1]) + C[3,1] += _add(A[3,2]*B[2,1]) + # second column of C + C[1,2] += _add(A[1,1]*B[1,2] + A[1,2]*B[2,2]) + C[2,2] += _add(A[2,1]*B[1,2] + A[2,2]*B[2,2] + A[2,3]*B[3,2]) + C[3,2] += _add(A[3,2]*B[2,2] + A[3,3]*B[3,2]) + C[4,2] += _add(A[4,3]*B[3,2]) + end # inbounds + # middle columns + __bibimul!(C, A, B, _add) + @inbounds begin + C[n-3,n-1] += _add(A[n-3,n-2]*B[n-2,n-1]) + C[n-2,n-1] += _add(A[n-2,n-2]*B[n-2,n-1] + A[n-2,n-1]*B[n-1,n-1]) + C[n-1,n-1] += _add(A[n-1,n-2]*B[n-2,n-1] + A[n-1,n-1]*B[n-1,n-1] + A[n-1,n]*B[n,n-1]) + C[n, n-1] += _add(A[n,n-1]*B[n-1,n-1] + A[n,n]*B[n,n-1]) + # last column of C + C[n-2, n] += _add(A[n-2,n-1]*B[n-1,n]) + C[n-1, n] += _add(A[n-1,n-1]*B[n-1,n ] + A[n-1,n]*B[n,n ]) + C[n, n] += _add(A[n,n-1]*B[n-1,n ] + A[n,n]*B[n,n ]) + end # inbounds + C +end +function __bibimul!(C, A, B, _add) + n = size(A,1) Al = _diag(A, -1) Ad = _diag(A, 0) Au = _diag(A, 1) @@ -590,44 +624,198 @@ function _bibimul!(C, A, B, _add) Bd = _diag(B, 0) Bu = _diag(B, 1) @inbounds begin - # first row of C - C[1,1] += _add(A[1,1]*B[1,1] + A[1, 2]*B[2, 1]) - C[1,2] += _add(A[1,1]*B[1,2] + A[1,2]*B[2,2]) - C[1,3] += _add(A[1,2]*B[2,3]) - # second row of C - C[2,1] += _add(A[2,1]*B[1,1] + A[2,2]*B[2,1]) - C[2,2] += _add(A[2,1]*B[1,2] + A[2,2]*B[2,2] + A[2,3]*B[3,2]) - C[2,3] += _add(A[2,2]*B[2,3] + A[2,3]*B[3,3]) - C[2,4] += _add(A[2,3]*B[3,4]) for j in 3:n-2 - Ajj₋1 = Al[j-1] - Ajj = Ad[j] + Aj₋2j₋1 = Au[j-2] + Aj₋1j = Au[j-1] Ajj₊1 = Au[j] - Bj₋1j₋2 = Bl[j-2] - Bj₋1j₋1 = Bd[j-1] + Aj₋1j₋1 = Ad[j-1] + Ajj = Ad[j] + Aj₊1j₊1 = Ad[j+1] + Ajj₋1 = Al[j-1] + Aj₊1j = Al[j] + Aj₊2j₊1 = Al[j+1] Bj₋1j = Bu[j-1] - Bjj₋1 = Bl[j-1] Bjj = Bd[j] - Bjj₊1 = Bu[j] Bj₊1j = Bl[j] - Bj₊1j₊1 = Bd[j+1] - Bj₊1j₊2 = Bu[j+1] - C[j,j-2] += _add( Ajj₋1*Bj₋1j₋2) - C[j, j-1] += _add(Ajj₋1*Bj₋1j₋1 + Ajj*Bjj₋1) - C[j, j ] += _add(Ajj₋1*Bj₋1j + Ajj*Bjj + Ajj₊1*Bj₊1j) - C[j, j+1] += _add(Ajj *Bjj₊1 + Ajj₊1*Bj₊1j₊1) - C[j, j+2] += _add(Ajj₊1*Bj₊1j₊2) + + C[j-2, j] += _add(Aj₋2j₋1*Bj₋1j) + C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j + Aj₋1j*Bjj) + C[j, j] += _add(Ajj₋1*Bj₋1j + Ajj*Bjj + Ajj₊1*Bj₊1j) + C[j+1, j] += _add(Aj₊1j*Bjj + Aj₊1j₊1*Bj₊1j) + C[j+2, j] += _add(Aj₊2j₊1*Bj₊1j) end - # row before last of C - C[n-1,n-3] += _add(A[n-1,n-2]*B[n-2,n-3]) - C[n-1,n-2] += _add(A[n-1,n-1]*B[n-1,n-2] + A[n-1,n-2]*B[n-2,n-2]) - C[n-1,n-1] += _add(A[n-1,n-2]*B[n-2,n-1] + A[n-1,n-1]*B[n-1,n-1] + A[n-1,n]*B[n,n-1]) - C[n-1,n ] += _add(A[n-1,n-1]*B[n-1,n ] + A[n-1, n]*B[n ,n ]) - # last row of C - C[n,n-2] += _add(A[n,n-1]*B[n-1,n-2]) - C[n,n-1] += _add(A[n,n-1]*B[n-1,n-1] + A[n,n]*B[n,n-1]) - C[n,n ] += _add(A[n,n-1]*B[n-1,n ] + A[n,n]*B[n,n ]) - end # inbounds + end + C +end +function __bibimul!(C, A, B::Bidiagonal, _add) + n = size(A,1) + Al = _diag(A, -1) + Ad = _diag(A, 0) + Au = _diag(A, 1) + Bd = _diag(B, 0) + if B.uplo == 'U' + Bu = _diag(B, 1) + @inbounds begin + for j in 3:n-2 + Aj₋2j₋1 = Au[j-2] + Aj₋1j = Au[j-1] + Aj₋1j₋1 = Ad[j-1] + Ajj = Ad[j] + Ajj₋1 = Al[j-1] + Aj₊1j = Al[j] + Bj₋1j = Bu[j-1] + Bjj = Bd[j] + + C[j-2, j] += _add(Aj₋2j₋1*Bj₋1j) + C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j + Aj₋1j*Bjj) + C[j, j] += _add(Ajj₋1*Bj₋1j + Ajj*Bjj) + C[j+1, j] += _add(Aj₊1j*Bjj) + end + end + else # B.uplo == 'L' + Bl = _diag(B, -1) + @inbounds begin + for j in 3:n-2 + Aj₋1j = Au[j-1] + Ajj₊1 = Au[j] + Ajj = Ad[j] + Aj₊1j₊1 = Ad[j+1] + Aj₊1j = Al[j] + Aj₊2j₊1 = Al[j+1] + Bjj = Bd[j] + Bj₊1j = Bl[j] + + C[j-1, j] += _add(Aj₋1j*Bjj) + C[j, j] += _add(Ajj*Bjj + Ajj₊1*Bj₊1j) + C[j+1, j] += _add(Aj₊1j*Bjj + Aj₊1j₊1*Bj₊1j) + C[j+2, j] += _add(Aj₊2j₊1*Bj₊1j) + end + end + end + C +end +function __bibimul!(C, A::Bidiagonal, B, _add) + n = size(A,1) + Bl = _diag(B, -1) + Bd = _diag(B, 0) + Bu = _diag(B, 1) + Ad = _diag(A, 0) + if A.uplo == 'U' + Au = _diag(A, 1) + @inbounds begin + for j in 3:n-2 + Aj₋2j₋1 = Au[j-2] + Aj₋1j = Au[j-1] + Ajj₊1 = Au[j] + Aj₋1j₋1 = Ad[j-1] + Ajj = Ad[j] + Aj₊1j₊1 = Ad[j+1] + Bj₋1j = Bu[j-1] + Bjj = Bd[j] + Bj₊1j = Bl[j] + + C[j-2, j] += _add(Aj₋2j₋1*Bj₋1j) + C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j + Aj₋1j*Bjj) + C[j, j] += _add(Ajj*Bjj + Ajj₊1*Bj₊1j) + C[j+1, j] += _add(Aj₊1j₊1*Bj₊1j) + end + end + else # A.uplo == 'L' + Al = _diag(A, -1) + @inbounds begin + for j in 3:n-2 + Aj₋1j₋1 = Ad[j-1] + Ajj = Ad[j] + Aj₊1j₊1 = Ad[j+1] + Ajj₋1 = Al[j-1] + Aj₊1j = Al[j] + Aj₊2j₊1 = Al[j+1] + Bj₋1j = Bu[j-1] + Bjj = Bd[j] + Bj₊1j = Bl[j] + + C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j) + C[j, j] += _add(Ajj₋1*Bj₋1j + Ajj*Bjj) + C[j+1, j] += _add(Aj₊1j*Bjj + Aj₊1j₊1*Bj₊1j) + C[j+2, j] += _add(Aj₊2j₊1*Bj₊1j) + end + end + end + C +end +function __bibimul!(C, A::Bidiagonal, B::Bidiagonal, _add) + n = size(A,1) + Ad = _diag(A, 0) + Bd = _diag(B, 0) + if A.uplo == 'U' && B.uplo == 'U' + Au = _diag(A, 1) + Bu = _diag(B, 1) + @inbounds begin + for j in 3:n-2 + Aj₋2j₋1 = Au[j-2] + Aj₋1j = Au[j-1] + Aj₋1j₋1 = Ad[j-1] + Ajj = Ad[j] + Bj₋1j = Bu[j-1] + Bjj = Bd[j] + + C[j-2, j] += _add(Aj₋2j₋1*Bj₋1j) + C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j + Aj₋1j*Bjj) + C[j, j] += _add(Ajj*Bjj) + end + end + elseif A.uplo == 'U' && B.uplo == 'L' + Au = _diag(A, 1) + Bl = _diag(B, -1) + @inbounds begin + for j in 3:n-2 + Aj₋1j = Au[j-1] + Ajj₊1 = Au[j] + Ajj = Ad[j] + Aj₊1j₊1 = Ad[j+1] + Bjj = Bd[j] + Bj₊1j = Bl[j] + + C[j-1, j] += _add(Aj₋1j*Bjj) + C[j, j] += _add(Ajj*Bjj + Ajj₊1*Bj₊1j) + C[j+1, j] += _add(Aj₊1j₊1*Bj₊1j) + end + end + elseif A.uplo == 'L' && B.uplo == 'U' + Al = _diag(A, -1) + Bu = _diag(B, 1) + @inbounds begin + for j in 3:n-2 + Aj₋1j₋1 = Ad[j-1] + Ajj = Ad[j] + Ajj₋1 = Al[j-1] + Aj₊1j = Al[j] + Bj₋1j = Bu[j-1] + Bjj = Bd[j] + + C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j) + C[j, j] += _add(Ajj₋1*Bj₋1j + Ajj*Bjj) + C[j+1, j] += _add(Aj₊1j*Bjj) + end + end + else # A.uplo == 'L' && B.uplo == 'L' + Al = _diag(A, -1) + Bl = _diag(B, -1) + @inbounds begin + for j in 3:n-2 + Ajj = Ad[j] + Aj₊1j₊1 = Ad[j+1] + Aj₊1j = Al[j] + Aj₊2j₊1 = Al[j+1] + Bjj = Bd[j] + Bj₊1j = Bl[j] + + C[j, j] += _add(Ajj*Bjj) + C[j+1, j] += _add(Aj₊1j*Bjj + Aj₊1j₊1*Bj₊1j) + C[j+2, j] += _add(Aj₊2j₊1*Bj₊1j) + end + end + end C end @@ -744,7 +932,52 @@ function _mul!(C::AbstractVecOrMat, A::BiTriSym, B::AbstractVecOrMat, _add::MulA nB = size(B,2) (iszero(nA) || iszero(nB)) && return C iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta) - nA <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta) + if nA <= 3 + # naive multiplication + for I in CartesianIndices(C) + col = Base.tail(Tuple(I)) + _modify!(_add, sum(A[I[1], k] * B[k, col...] for k in axes(A,2)), C, I) + end + return C + end + _mul_bitrisym!(C, A, B, _add) +end +function _mul_bitrisym!(C::AbstractVecOrMat, A::Bidiagonal, B::AbstractVecOrMat, _add::MulAddMul) + nA = size(A,1) + nB = size(B,2) + d = A.dv + if A.uplo == 'U' + u = A.ev + @inbounds begin + for j = 1:nB + b₀, b₊ = B[1, j], B[2, j] + _modify!(_add, d[1]*b₀ + u[1]*b₊, C, (1, j)) + for i = 2:nA - 1 + b₀, b₊ = b₊, B[i + 1, j] + _modify!(_add, d[i]*b₀ + u[i]*b₊, C, (i, j)) + end + _modify!(_add, d[nA]*b₊, C, (nA, j)) + end + end + else + l = A.ev + @inbounds begin + for j = 1:nB + b₀, b₊ = B[1, j], B[2, j] + _modify!(_add, d[1]*b₀, C, (1, j)) + for i = 2:nA - 1 + b₋, b₀, b₊ = b₀, b₊, B[i + 1, j] + _modify!(_add, l[i - 1]*b₋ + d[i]*b₀, C, (i, j)) + end + _modify!(_add, l[nA - 1]*b₀ + d[nA]*b₊, C, (nA, j)) + end + end + end + C +end +function _mul_bitrisym!(C::AbstractVecOrMat, A::TriSym, B::AbstractVecOrMat, _add::MulAddMul) + nA = size(A,1) + nB = size(B,2) l = _diag(A, -1) d = _diag(A, 0) u = _diag(A, 1) @@ -769,8 +1002,9 @@ function _mul!(C::AbstractMatrix, A::AbstractMatrix, B::TriSym, _add::MulAddMul) m = size(B,2) (iszero(m) || iszero(n)) && return C iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta) - if n <= 3 || m <= 1 - return mul!(C, Array(A), Array(B), _add.alpha, _add.beta) + if m == 1 + B11 = B[1,1] + return mul!(C, A, B11, _add.alpha, _add.beta) end Bl = _diag(B, -1) Bd = _diag(B, 0) @@ -804,21 +1038,18 @@ function _mul!(C::AbstractMatrix, A::AbstractMatrix, B::Bidiagonal, _add::MulAdd m, n = size(A) (iszero(m) || iszero(n)) && return C iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta) - if size(A, 1) <= 3 || size(B, 2) <= 1 - return mul!(C, Array(A), Array(B), _add.alpha, _add.beta) - end @inbounds if B.uplo == 'U' + for j in n:-1:2, i in 1:m + _modify!(_add, A[i,j] * B.dv[j] + A[i,j-1] * B.ev[j-1], C, (i, j)) + end for i in 1:m - for j in n:-1:2 - _modify!(_add, A[i,j] * B.dv[j] + A[i,j-1] * B.ev[j-1], C, (i, j)) - end _modify!(_add, A[i,1] * B.dv[1], C, (i, 1)) end else # uplo == 'L' + for j in 1:n-1, i in 1:m + _modify!(_add, A[i,j] * B.dv[j] + A[i,j+1] * B.ev[j], C, (i, j)) + end for i in 1:m - for j in 1:n-1 - _modify!(_add, A[i,j] * B.dv[j] + A[i,j+1] * B.ev[j], C, (i, j)) - end _modify!(_add, A[i,n] * B.dv[n], C, (i, n)) end end @@ -834,7 +1065,12 @@ function _dibimul!(C, A, B, _add) check_A_mul_B!_sizes(size(C), size(A), size(B)) n = size(A,1) iszero(n) && return C - n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta) + if n <= 3 + for I in CartesianIndices(C) + _modify!(_add, A.diag[I[1]] * B[I[1], I[2]], C, I) + end + return C + end _rmul_or_fill!(C, _add.beta) # see the same use above iszero(_add.alpha) && return C Ad = A.diag diff --git a/stdlib/LinearAlgebra/test/bidiag.jl b/stdlib/LinearAlgebra/test/bidiag.jl index ef50658a642fb..58c228e39e226 100644 --- a/stdlib/LinearAlgebra/test/bidiag.jl +++ b/stdlib/LinearAlgebra/test/bidiag.jl @@ -1026,26 +1026,71 @@ end @test_throws "cannot set entry" B[1,2] = 4 end -@testset "mul with empty arrays" begin - A = zeros(5,0) - B = Bidiagonal(zeros(0), zeros(0), :U) - BL = Bidiagonal(zeros(5), zeros(4), :U) - @test size(A * B) == size(A) - @test size(BL * A) == size(A) - @test size(B * B) == size(B) - C = similar(A) - @test mul!(C, A, B) == A * B - @test mul!(C, BL, A) == BL * A - @test mul!(similar(B), B, B) == B * B - @test mul!(similar(B, size(B)), B, B) == B * B - - v = zeros(size(B,2)) - @test size(B * v) == size(v) - @test mul!(similar(v), B, v) == B * v - - D = Diagonal(zeros(size(B,2))) - @test size(B * D) == size(D * B) == size(D) - @test mul!(similar(D), B, D) == mul!(similar(D), D, B) == B * D +@testset "mul for small matrices" begin + @testset for n in 0:6 + D = Diagonal(rand(n)) + v = rand(n) + @testset for uplo in (:L, :U) + B = Bidiagonal(rand(n), rand(max(n-1,0)), uplo) + M = Matrix(B) + + @test B * v ≈ M * v + @test mul!(similar(v), B, v) ≈ M * v + @test mul!(ones(size(v)), B, v, 2, 3) ≈ M * v * 2 .+ 3 + + @test B * B ≈ M * M + @test mul!(similar(B, size(B)), B, B) ≈ M * M + @test mul!(ones(size(B)), B, B, 2, 4) ≈ M * M * 2 .+ 4 + + for m in 0:6 + AL = rand(m,n) + AR = rand(n,m) + @test AL * B ≈ AL * M + @test B * AR ≈ M * AR + @test mul!(similar(AL), AL, B) ≈ AL * M + @test mul!(similar(AR), B, AR) ≈ M * AR + @test mul!(ones(size(AL)), AL, B, 2, 4) ≈ AL * M * 2 .+ 4 + @test mul!(ones(size(AR)), B, AR, 2, 4) ≈ M * AR * 2 .+ 4 + end + + @test B * D ≈ M * D + @test D * B ≈ D * M + @test mul!(similar(B), B, D) ≈ M * D + @test mul!(similar(B), B, D) ≈ M * D + @test mul!(similar(B, size(B)), D, B) ≈ D * M + @test mul!(similar(B, size(B)), B, D) ≈ M * D + @test mul!(ones(size(B)), D, B, 2, 4) ≈ D * M * 2 .+ 4 + @test mul!(ones(size(B)), B, D, 2, 4) ≈ M * D * 2 .+ 4 + end + BL = Bidiagonal(rand(n), rand(max(0, n-1)), :L) + ML = Matrix(BL) + BU = Bidiagonal(rand(n), rand(max(0, n-1)), :U) + MU = Matrix(BU) + T = Tridiagonal(zeros(max(0, n-1)), zeros(n), zeros(max(0, n-1))) + @test mul!(T, BL, BU) ≈ ML * MU + @test mul!(T, BU, BL) ≈ MU * ML + T = Tridiagonal(ones(max(0, n-1)), ones(n), ones(max(0, n-1))) + @test mul!(copy(T), BL, BU, 2, 3) ≈ ML * MU * 2 + T * 3 + @test mul!(copy(T), BU, BL, 2, 3) ≈ MU * ML * 2 + T * 3 + end + + n = 4 + arr = SizedArrays.SizedArray{(2,2)}(reshape([1:4;],2,2)) + for B in ( + Bidiagonal(fill(arr,n), fill(arr,n-1), :L), + Bidiagonal(fill(arr,n), fill(arr,n-1), :U), + ) + @test B * B ≈ Matrix(B) * Matrix(B) + BL = Bidiagonal(fill(arr,n), fill(arr,n-1), :L) + BU = Bidiagonal(fill(arr,n), fill(arr,n-1), :U) + @test BL * B ≈ Matrix(BL) * Matrix(B) + @test BU * B ≈ Matrix(BU) * Matrix(B) + @test B * BL ≈ Matrix(B) * Matrix(BL) + @test B * BU ≈ Matrix(B) * Matrix(BU) + D = Diagonal(fill(arr,n)) + @test D * B ≈ Matrix(D) * Matrix(B) + @test B * D ≈ Matrix(B) * Matrix(D) + end end end # module TestBidiagonal diff --git a/stdlib/LinearAlgebra/test/tridiag.jl b/stdlib/LinearAlgebra/test/tridiag.jl index 3330fa682fe5e..15ac7f9f2147f 100644 --- a/stdlib/LinearAlgebra/test/tridiag.jl +++ b/stdlib/LinearAlgebra/test/tridiag.jl @@ -970,4 +970,75 @@ end @test sprint(show, S) == "SymTridiagonal($(repr(diag(S))), $(repr(diag(S,1))))" end +@testset "mul for small matrices" begin + @testset for n in 0:6 + for T in ( + Tridiagonal(rand(max(n-1,0)), rand(n), rand(max(n-1,0))), + SymTridiagonal(rand(n), rand(max(n-1,0))), + ) + M = Matrix(T) + @test T * T ≈ M * M + @test mul!(similar(T, size(T)), T, T) ≈ M * M + @test mul!(ones(size(T)), T, T, 2, 4) ≈ M * M * 2 .+ 4 + + for m in 0:6 + AR = rand(n,m) + AL = rand(m,n) + @test AL * T ≈ AL * M + @test T * AR ≈ M * AR + @test mul!(similar(AL), AL, T) ≈ AL * M + @test mul!(similar(AR), T, AR) ≈ M * AR + @test mul!(ones(size(AL)), AL, T, 2, 4) ≈ AL * M * 2 .+ 4 + @test mul!(ones(size(AR)), T, AR, 2, 4) ≈ M * AR * 2 .+ 4 + end + + v = rand(n) + @test T * v ≈ M * v + @test mul!(similar(v), T, v) ≈ M * v + + D = Diagonal(rand(n)) + @test T * D ≈ M * D + @test D * T ≈ D * M + @test mul!(Tridiagonal(similar(T)), D, T) ≈ D * M + @test mul!(Tridiagonal(similar(T)), T, D) ≈ M * D + @test mul!(similar(T, size(T)), D, T) ≈ D * M + @test mul!(similar(T, size(T)), T, D) ≈ M * D + @test mul!(ones(size(T)), D, T, 2, 4) ≈ D * M * 2 .+ 4 + @test mul!(ones(size(T)), T, D, 2, 4) ≈ M * D * 2 .+ 4 + + for uplo in (:U, :L) + B = Bidiagonal(rand(n), rand(max(0, n-1)), uplo) + @test T * B ≈ M * B + @test B * T ≈ B * M + if n <= 2 + @test mul!(Tridiagonal(similar(T)), B, T) ≈ B * M + @test mul!(Tridiagonal(similar(T)), T, B) ≈ M * B + end + @test mul!(similar(T, size(T)), B, T) ≈ B * M + @test mul!(similar(T, size(T)), T, B) ≈ M * B + @test mul!(ones(size(T)), B, T, 2, 4) ≈ B * M * 2 .+ 4 + @test mul!(ones(size(T)), T, B, 2, 4) ≈ M * B * 2 .+ 4 + end + end + end + + n = 4 + arr = SizedArrays.SizedArray{(2,2)}(reshape([1:4;],2,2)) + for T in ( + SymTridiagonal(fill(arr,n), fill(arr,n-1)), + Tridiagonal(fill(arr,n-1), fill(arr,n), fill(arr,n-1)), + ) + @test T * T ≈ Matrix(T) * Matrix(T) + BL = Bidiagonal(fill(arr,n), fill(arr,n-1), :L) + BU = Bidiagonal(fill(arr,n), fill(arr,n-1), :U) + @test BL * T ≈ Matrix(BL) * Matrix(T) + @test BU * T ≈ Matrix(BU) * Matrix(T) + @test T * BL ≈ Matrix(T) * Matrix(BL) + @test T * BU ≈ Matrix(T) * Matrix(BU) + D = Diagonal(fill(arr,n)) + @test D * T ≈ Matrix(D) * Matrix(T) + @test T * D ≈ Matrix(T) * Matrix(D) + end +end + end # module TestTridiagonal From d2807927656efe2bfbfe4402f83e2ea417306db2 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 10 Sep 2024 07:33:56 -0400 Subject: [PATCH 22/57] Fix `@time_imports` extension recognition (#55718) --- base/loading.jl | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index fe86a8c198461..4e70d2bc257ea 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1203,7 +1203,7 @@ const TIMING_IMPORTS = Threads.Atomic{Int}(0) # these return either the array of modules loaded from the path / content given # or an Exception that describes why it couldn't be loaded # and it reconnects the Base.Docs.META -function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}, depmods::Vector{Any}, ignore_native::Union{Nothing,Bool}=nothing) +function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}, depmods::Vector{Any}, ignore_native::Union{Nothing,Bool}=nothing; register::Bool=true) if isnothing(ignore_native) if JLOptions().code_coverage == 0 && JLOptions().malloc_log == 0 ignore_native = false @@ -1252,13 +1252,14 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No for M in restored M = M::Module if parentmodule(M) === M && PkgId(M) == pkg + register && register_root_module(M) if timing_imports elapsed = round((time_ns() - t_before) / 1e6, digits = 1) comp_time, recomp_time = cumulative_compile_time_ns() .- t_comp_before print(lpad(elapsed, 9), " ms ") - parentid = get(EXT_PRIMED, pkg, nothing) - if parentid !== nothing - print(parentid.name, " → ") + ext_parent = extension_parent_name(M) + if ext_parent !== nothing + print(ext_parent::String, " → ") end print(pkg.name) if comp_time > 0 @@ -1280,6 +1281,27 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No end end +# if M is an extension, return the string name of the parent. Otherwise return nothing +function extension_parent_name(M::Module) + rootmodule = moduleroot(M) + src_path = pathof(rootmodule) + src_path === nothing && return nothing + pkgdir_parts = splitpath(src_path) + ext_pos = findlast(==("ext"), pkgdir_parts) + if ext_pos !== nothing && ext_pos >= length(pkgdir_parts) - 2 + parent_package_root = joinpath(pkgdir_parts[1:ext_pos-1]...) + parent_package_project_file = locate_project_file(parent_package_root) + if parent_package_project_file isa String + d = parsed_toml(parent_package_project_file) + name = get(d, "name", nothing) + if name !== nothing + return name + end + end + end + return nothing +end + function register_restored_modules(sv::SimpleVector, pkg::PkgId, path::String) # This function is also used by PkgCacheInspector.jl restored = sv[1]::Vector{Any} @@ -1461,7 +1483,7 @@ function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any} triggers = triggers::Union{String, Vector{String}} triggers isa String && (triggers = [triggers]) id = PkgId(uuid5(parent.uuid::UUID, ext), ext) - if id in keys(EXT_PRIMED) || haskey(Base.loaded_modules, id) + if haskey(EXT_PRIMED, id) || haskey(Base.loaded_modules, id) continue # extension is already primed or loaded, don't add it again end EXT_PRIMED[id] = parent @@ -1890,8 +1912,7 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union depmods[i] = dep end # then load the file - loaded = _include_from_serialized(pkg, path, ocachepath, depmods, ignore_native) - loaded isa Module && register_root_module(loaded) + loaded = _include_from_serialized(pkg, path, ocachepath, depmods, ignore_native; register = true) return loaded end @@ -1958,8 +1979,7 @@ end if dep === nothing try set_pkgorigin_version_path(modkey, modpath) - dep = _include_from_serialized(modkey, modcachepath, modocachepath, modstaledeps) - dep isa Module && stalecheck && register_root_module(dep) + dep = _include_from_serialized(modkey, modcachepath, modocachepath, modstaledeps; register = stalecheck) finally end_loading(modkey, dep) end @@ -1975,9 +1995,8 @@ end end restored = get(loaded_precompiles, pkg => newbuild_id, nothing) if !isa(restored, Module) - restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps) + restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps; register = stalecheck) end - isa(restored, Module) && stalecheck && register_root_module(restored) isa(restored, Module) && return restored @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored @label check_next_path From 3653b3898647d4c2528afde1f54bd3e65e3aa8ee Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 10 Sep 2024 12:51:55 -0400 Subject: [PATCH 23/57] drop typed GEP calls (#55708) Now that we use LLVM 18, and almost have LLVM 19 support, do cleanup to remove LLVM 15/16 type pointer support. LLVM now slightly prefers that we rewrite our complex GEP to use a simple emit_ptrgep call instead, which is also much simpler for julia to emit also. --- src/ccall.cpp | 11 +- src/cgutils.cpp | 126 ++++++-------------- src/codegen.cpp | 123 ++++++++----------- src/intrinsics.cpp | 4 +- src/llvm-alloc-opt.cpp | 21 +--- src/llvm-codegen-shared.h | 45 +++---- src/llvm-late-gc-lowering.cpp | 72 +---------- src/llvm-ptls.cpp | 4 +- test/llvmpasses/alloc-opt-gcframe.ll | 2 +- test/llvmpasses/late-lower-gc-addrspaces.ll | 56 ++++----- test/llvmpasses/late-lower-gc.ll | 70 +++++------ test/llvmpasses/names.jl | 3 +- 12 files changed, 187 insertions(+), 350 deletions(-) diff --git a/src/ccall.cpp b/src/ccall.cpp index 7ab8cfa974d6f..eac130ea43189 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -1854,8 +1854,8 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) ctx.builder.SetInsertPoint(checkBB); auto signal_page_load = ctx.builder.CreateLoad( ctx.types().T_size, - ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size, - get_current_signal_page_from_ptls(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const), -1), + emit_ptrgep(ctx, get_current_signal_page_from_ptls(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const), + -sizeof(size_t)), true); setName(ctx.emission_context, signal_page_load, "signal_page_load"); ctx.builder.CreateBr(contBB); @@ -1870,8 +1870,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) auto obj = emit_pointer_from_objref(ctx, boxed(ctx, argv[0])); // T_pprjlvalue // The inbounds gep makes it more clear to LLVM that the resulting value is not // a null pointer. - auto strp = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, obj, 1); - setName(ctx.emission_context, strp, "string_ptr"); + auto strp = emit_ptrgep(ctx, obj, ctx.types().sizeof_ptr, "string_ptr"); JL_GC_POP(); return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt); } @@ -1882,9 +1881,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) auto obj = emit_pointer_from_objref(ctx, boxed(ctx, argv[0])); // T_pprjlvalue // The inbounds gep makes it more clear to LLVM that the resulting value is not // a null pointer. - auto strp = ctx.builder.CreateConstInBoundsGEP1_32( - ctx.types().T_prjlvalue, obj, (sizeof(jl_sym_t) + sizeof(void*) - 1) / sizeof(void*)); - setName(ctx.emission_context, strp, "symbol_name"); + auto strp = emit_ptrgep(ctx, obj, sizeof(jl_sym_t), "symbol_name"); JL_GC_POP(); return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt); } diff --git a/src/cgutils.cpp b/src/cgutils.cpp index bec84d9901279..2a234f399f5c1 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -130,14 +130,8 @@ static Value *stringConstPtr( } // Doesn't need to be aligned, we shouldn't operate on these like julia objects GlobalVariable *gv = get_pointer_to_constant(emission_context, Data, Align(1), "_j_str_" + StringRef(ctxt.data(), ctxt.size()), *M); - Value *zero = ConstantInt::get(Type::getInt32Ty(irbuilder.getContext()), 0); - Value *Args[] = { zero, zero }; - auto gep = irbuilder.CreateInBoundsGEP(gv->getValueType(), - // AddrSpaceCast in case globals are in non-0 AS - irbuilder.CreateAddrSpaceCast(gv, gv->getValueType()->getPointerTo(0)), - Args); - setName(emission_context, gep, "string_const_ptr"); - return gep; + // AddrSpaceCast in case globals are in non-0 AS + return irbuilder.CreateAddrSpaceCast(gv, gv->getValueType()->getPointerTo(0)); } @@ -621,12 +615,6 @@ static unsigned convert_struct_offset(jl_codectx_t &ctx, Type *lty, unsigned byt return convert_struct_offset(ctx.builder.GetInsertBlock()->getModule()->getDataLayout(), lty, byte_offset); } -static Value *emit_struct_gep(jl_codectx_t &ctx, Type *lty, Value *base, unsigned byte_offset) -{ - unsigned idx = convert_struct_offset(ctx, lty, byte_offset); - return ctx.builder.CreateConstInBoundsGEP2_32(lty, base, 0, idx); -} - static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed, bool llvmcall=false); static Type *_julia_type_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed) @@ -1200,10 +1188,10 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull static Value *emit_datatype_types(jl_codectx_t &ctx, Value *dt) { Value *Ptr = decay_derived(ctx, dt); - Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, types) / sizeof(void*)); + unsigned Idx = offsetof(jl_datatype_t, types); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); auto types = ai.decorateInst(ctx.builder.CreateAlignedLoad( - ctx.types().T_pjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, Ptr, Idx), Align(sizeof(void*)))); + ctx.types().T_pjlvalue, emit_ptrgep(ctx, Ptr, Idx), Align(sizeof(void*)))); setName(ctx.emission_context, types, "datatype_types"); return types; } @@ -1222,16 +1210,13 @@ static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt, bool add_isunion= { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); Value *Ptr = decay_derived(ctx, dt); - Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, layout) / sizeof(int32_t*)); - Ptr = ctx.builder.CreateInBoundsGEP(getPointerTy(ctx.builder.getContext()), Ptr, Idx); + Ptr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_t, layout)); Ptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(getPointerTy(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t*)))); - Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_layout_t, size) / sizeof(int32_t)); - Value *SizePtr = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), Ptr, Idx); + Value *SizePtr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_layout_t, size)); Value *Size = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), SizePtr, Align(sizeof(int32_t)))); setName(ctx.emission_context, Size, "datatype_size"); if (add_isunion) { - Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_layout_t, flags) / sizeof(int8_t)); - Value *FlagPtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), Ptr, Idx); + Value *FlagPtr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_layout_t, flags)); Value *Flag = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), FlagPtr, Align(sizeof(int16_t)))); Flag = ctx.builder.CreateLShr(Flag, 4); Flag = ctx.builder.CreateAnd(Flag, ConstantInt::get(Flag->getType(), 1)); @@ -1308,7 +1293,7 @@ static Value *emit_datatype_mutabl(jl_codectx_t &ctx, Value *dt) static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *typ) { Value *isprimitive; - isprimitive = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), decay_derived(ctx, typ), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash)); + isprimitive = emit_ptrgep(ctx, decay_derived(ctx, typ), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); isprimitive = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isprimitive, Align(1))); isprimitive = ctx.builder.CreateLShr(isprimitive, 7); @@ -1320,10 +1305,7 @@ static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *typ) static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt) { unsigned n = offsetof(jl_datatype_t, name) / sizeof(char*); - Value *vptr = ctx.builder.CreateInBoundsGEP( - ctx.types().T_pjlvalue, - maybe_decay_tracked(ctx, dt), - ConstantInt::get(ctx.types().T_size, n)); + Value *vptr = emit_ptrgep(ctx, maybe_decay_tracked(ctx, dt), n * sizeof(jl_value_t*)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); auto name = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, vptr, Align(sizeof(void*)))); setName(ctx.emission_context, name, "datatype_name"); @@ -1522,7 +1504,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool just // we lied a bit: this wasn't really an object (though it was valid for GC rooting) // and we need to use it as an index to get the real object now Module *M = jl_Module; - Value *smallp = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), prepare_global_in(M, jl_small_typeof_var), tag); + Value *smallp = emit_ptrgep(ctx, prepare_global_in(M, jl_small_typeof_var), tag); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); auto small = ctx.builder.CreateAlignedLoad(typetag->getType(), smallp, M->getDataLayout().getPointerABIAlignment(0)); small->setMetadata(LLVMContext::MD_nonnull, MDNode::get(M->getContext(), None)); @@ -1802,7 +1784,7 @@ static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ) { Value *isconcrete; - isconcrete = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), decay_derived(ctx, typ), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash)); + isconcrete = emit_ptrgep(ctx, decay_derived(ctx, typ), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); isconcrete = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isconcrete, Align(1))); isconcrete = ctx.builder.CreateLShr(isconcrete, 1); @@ -2848,36 +2830,14 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st if (strct.ispointer()) { auto tbaa = best_field_tbaa(ctx, strct, jt, idx, byte_offset); Value *staddr = data_pointer(ctx, strct); - bool isboxed; - Type *lt = julia_type_to_llvm(ctx, (jl_value_t*)jt, &isboxed); Value *addr; - if (isboxed) { - // byte_offset == 0 is an important special case here, e.g. - // for single field wrapper types. Introducing the bitcast - // can pessimize mem2reg - if (byte_offset > 0) { - addr = ctx.builder.CreateInBoundsGEP( - getInt8Ty(ctx.builder.getContext()), - staddr, - ConstantInt::get(ctx.types().T_size, byte_offset)); - } - else { - addr = staddr; - } - } - else { - if (jl_is_vecelement_type((jl_value_t*)jt)) - addr = staddr; // VecElement types are unwrapped in LLVM. - else if (isa(lt)) - addr = emit_struct_gep(ctx, lt, staddr, byte_offset); - else - addr = ctx.builder.CreateConstInBoundsGEP2_32(lt, staddr, 0, idx); - if (addr != staddr) { - setNameWithField(ctx.emission_context, addr, get_objname, jt, idx, Twine("_ptr")); - } - } - if (jl_field_isptr(jt, idx)) { + if (jl_is_vecelement_type((jl_value_t*)jt) || byte_offset == 0) + addr = staddr; // VecElement types are unwrapped in LLVM. + else + addr = emit_ptrgep(ctx, staddr, byte_offset); + if (addr != staddr) setNameWithField(ctx.emission_context, addr, get_objname, jt, idx, Twine("_ptr")); + if (jl_field_isptr(jt, idx)) { LoadInst *Load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, Align(sizeof(void*))); setNameWithField(ctx.emission_context, Load, get_objname, jt, idx, Twine()); Load->setOrdering(order <= jl_memory_order_notatomic ? AtomicOrdering::Unordered : get_llvm_atomic_order(order)); @@ -2894,14 +2854,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st bool isptr = (union_max == 0); assert(!isptr && fsz < jl_field_size(jt, idx)); (void)isptr; size_t fsz1 = jl_field_size(jt, idx) - 1; - Value *ptindex; - if (isboxed) { - ptindex = ctx.builder.CreateConstInBoundsGEP1_32( - getInt8Ty(ctx.builder.getContext()), staddr, byte_offset + fsz1); - } - else { - ptindex = emit_struct_gep(ctx, cast(lt), staddr, byte_offset + fsz1); - } + Value *ptindex = emit_ptrgep(ctx, staddr, byte_offset + fsz1); auto val = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, !jl_field_isconst(jt, idx), union_max, strct.tbaa); if (val.V && val.V != addr) { setNameWithField(ctx.emission_context, val.V, get_objname, jt, idx, Twine()); @@ -2957,15 +2910,15 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st for (; i < fsz / align; i++) { unsigned fld = st_idx + i; Value *fldv = ctx.builder.CreateExtractValue(obj, ArrayRef(fld)); - Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i); + Value *fldp = emit_ptrgep(ctx, lv, i * align); ctx.builder.CreateAlignedStore(fldv, fldp, Align(align)); } // emit remaining bytes up to tindex if (i < ptindex - st_idx) { - Value *staddr = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i); + Value *staddr = emit_ptrgep(ctx, lv, i * align); for (; i < ptindex - st_idx; i++) { Value *fldv = ctx.builder.CreateExtractValue(obj, ArrayRef(st_idx + i)); - Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), staddr, i); + Value *fldp = emit_ptrgep(ctx, staddr, i); ctx.builder.CreateAlignedStore(fldv, fldp, Align(1)); } } @@ -3105,7 +3058,7 @@ static Value *emit_genericmemoryowner(jl_codectx_t &ctx, Value *t) LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None)); jl_aliasinfo_t aliasinfo_mem = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryown); aliasinfo_mem.decorateInst(LI); - addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, m, JL_SMALL_BYTE_ALIGNMENT / sizeof(void*)); + addr = emit_ptrgep(ctx, m, JL_SMALL_BYTE_ALIGNMENT); Value *foreign = ctx.builder.CreateICmpNE(addr, decay_derived(ctx, LI)); return emit_guarded_test(ctx, foreign, t, [&] { addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_jlgenericmemory, m, 1); @@ -3867,19 +3820,14 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx, auto tbaa = best_field_tbaa(ctx, strct, sty, idx0, byte_offset); Value *addr = data_pointer(ctx, strct); if (byte_offset > 0) { - addr = ctx.builder.CreateInBoundsGEP( - getInt8Ty(ctx.builder.getContext()), - addr, - ConstantInt::get(ctx.types().T_size, byte_offset)); + addr = emit_ptrgep(ctx, addr, byte_offset); setNameWithField(ctx.emission_context, addr, get_objname, sty, idx0, Twine("_ptr")); } jl_value_t *jfty = jl_field_type(sty, idx0); bool isboxed = jl_field_isptr(sty, idx0); if (!isboxed && jl_is_uniontype(jfty)) { size_t fsz1 = jl_field_size(sty, idx0) - 1; - Value *ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), - addr, - ConstantInt::get(ctx.types().T_size, fsz1)); + Value *ptindex = emit_ptrgep(ctx, addr, fsz1); setNameWithField(ctx.emission_context, ptindex, get_objname, sty, idx0, Twine(".tindex_ptr")); return union_store(ctx, addr, ptindex, rhs, cmp, jfty, tbaa, ctx.tbaa().tbaa_unionselbyte, Order, FailOrder, @@ -3971,8 +3919,8 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg if (!init_as_value) { // avoid unboxing the argument explicitly // and use memcpy instead - Instruction *inst; - dest = inst = cast(ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx)); + Instruction *inst = cast(emit_ptrgep(ctx, strct, offs)); + dest = inst; // Our promotion point needs to come before // A) All of our arguments' promotion points // B) Any instructions we insert at any of our arguments' promotion points @@ -4025,16 +3973,16 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg // emit all of the align-sized words unsigned i = 0; for (; i < fsz1 / al; i++) { - Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i); + Value *fldp = emit_ptrgep(ctx, lv, i * al); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack); Value *fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(ET, fldp, Align(al))); strct = ctx.builder.CreateInsertValue(strct, fldv, ArrayRef(llvm_idx + i)); } // emit remaining bytes up to tindex if (i < ptindex - llvm_idx) { - Value *staddr = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i); + Value *staddr = emit_ptrgep(ctx, lv, i * al); for (; i < ptindex - llvm_idx; i++) { - Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), staddr, i); + Value *fldp = emit_ptrgep(ctx, staddr, i); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack); Value *fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), fldp, Align(1))); strct = ctx.builder.CreateInsertValue(strct, fldv, ArrayRef(llvm_idx + i)); @@ -4047,7 +3995,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg fval = ctx.builder.CreateInsertValue(strct, fval, ArrayRef(llvm_idx)); } else { - Value *ptindex = emit_struct_gep(ctx, lt, strct, offs + fsz1); + Value *ptindex = emit_ptrgep(ctx, strct, offs + fsz1); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte); ai.decorateInst(ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1))); if (!rhs_union.isghost) @@ -4083,14 +4031,15 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) { unsigned offs = jl_field_offset(sty, i); int fsz = jl_field_size(sty, i) - 1; - unsigned llvm_idx = convert_struct_offset(ctx, cast(lt), offs + fsz); - if (init_as_value) + if (init_as_value) { + unsigned llvm_idx = convert_struct_offset(ctx, cast(lt), offs + fsz); strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), ArrayRef(llvm_idx)); + } else { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte); ai.decorateInst(ctx.builder.CreateAlignedStore( ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), - ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx), + emit_ptrgep(ctx, strct, offs + fsz), Align(1))); } } @@ -4126,8 +4075,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte); ai.decorateInst(ctx.builder.CreateAlignedStore( ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), - ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), strct, - ConstantInt::get(ctx.types().T_size, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1)), + emit_ptrgep(ctx, strct, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1), Align(1))); } } @@ -4169,9 +4117,7 @@ static Value *emit_defer_signal(jl_codectx_t &ctx) { ++EmittedDeferSignal; Value *ptls = get_current_ptls(ctx); - Constant *offset = ConstantInt::getSigned(getInt32Ty(ctx.builder.getContext()), - offsetof(jl_tls_states_t, defer_signal) / sizeof(sig_atomic_t)); - return ctx.builder.CreateInBoundsGEP(ctx.types().T_sigatomic, ptls, ArrayRef(offset), "jl_defer_signal"); + return emit_ptrgep(ctx, ptls, offsetof(jl_tls_states_t, defer_signal)); } #ifndef JL_NDEBUG diff --git a/src/codegen.cpp b/src/codegen.cpp index 9f80791f2882d..9184e4895ab6d 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -2161,6 +2161,20 @@ static inline GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G) return cast(local); } +static Value *emit_ptrgep(jl_codectx_t &ctx, Value *base, size_t byte_offset, const Twine &Name="") +{ + auto *gep = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), base, byte_offset); + setName(ctx.emission_context, gep, Name); + return gep; +} + +static Value *emit_ptrgep(jl_codectx_t &ctx, Value *base, Value *byte_offset, const Twine &Name="") +{ + auto *gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), base, byte_offset, Name); + setName(ctx.emission_context, gep, Name); + return gep; +} + // --- convenience functions for tagging llvm values with julia types --- @@ -2211,7 +2225,7 @@ static void undef_derived_strct(jl_codectx_t &ctx, Value *ptr, jl_datatype_t *st size_t i, np = sty->layout->npointers; auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx.builder.getContext()); for (i = 0; i < np; i++) { - Value *fld = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, ptr, jl_ptr_offset(sty, i)); + Value *fld = emit_ptrgep(ctx, ptr, jl_ptr_offset(sty, i) * sizeof(jl_value_t*)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); ai.decorateInst(ctx.builder.CreateStore(Constant::getNullValue(T_prjlvalue), fld)); } @@ -3542,8 +3556,6 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a return ctx.builder.CreateICmpEQ(answer, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)); } else if (sz > 512 && jl_struct_try_layout(sty) && sty->layout->flags.isbitsegal) { - Type *TInt8 = getInt8Ty(ctx.builder.getContext()); - Type *TInt1 = getInt1Ty(ctx.builder.getContext()); Value *varg1 = arg1.ispointer() ? data_pointer(ctx, arg1) : value_to_pointer(ctx, arg1).V; Value *varg2 = arg2.ispointer() ? data_pointer(ctx, arg2) : @@ -3562,8 +3574,8 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a Value *ptr1 = varg1; Value *ptr2 = varg2; if (desc.offset != 0) { - ptr1 = ctx.builder.CreateConstInBoundsGEP1_32(TInt8, ptr1, desc.offset); - ptr2 = ctx.builder.CreateConstInBoundsGEP1_32(TInt8, ptr2, desc.offset); + ptr1 = emit_ptrgep(ctx, ptr1, desc.offset); + ptr2 = emit_ptrgep(ctx, ptr2, desc.offset); } Value *new_ptr1 = ptr1; @@ -3573,7 +3585,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a PHINode *answerphi = nullptr; if (desc.nrepeats != 1) { // Set up loop - endptr1 = ctx.builder.CreateConstInBoundsGEP1_32(TInt8, ptr1, desc.nrepeats * (desc.data_bytes + desc.padding_bytes));; + endptr1 = emit_ptrgep(ctx, ptr1, desc.nrepeats * (desc.data_bytes + desc.padding_bytes));; BasicBlock *currBB = ctx.builder.GetInsertBlock(); loopBB = BasicBlock::Create(ctx.builder.getContext(), "egal_loop", ctx.f); @@ -3581,6 +3593,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a ctx.builder.CreateBr(loopBB); ctx.builder.SetInsertPoint(loopBB); + Type *TInt1 = getInt1Ty(ctx.builder.getContext()); answerphi = ctx.builder.CreatePHI(TInt1, 2); answerphi->addIncoming(answer ? answer : ConstantInt::get(TInt1, 1), currBB); answer = answerphi; @@ -3588,11 +3601,11 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a PHINode *itr1 = ctx.builder.CreatePHI(ptr1->getType(), 2); PHINode *itr2 = ctx.builder.CreatePHI(ptr2->getType(), 2); - new_ptr1 = ctx.builder.CreateConstInBoundsGEP1_32(TInt8, itr1, desc.data_bytes + desc.padding_bytes); + new_ptr1 = emit_ptrgep(ctx, itr1, desc.data_bytes + desc.padding_bytes); itr1->addIncoming(ptr1, currBB); itr1->addIncoming(new_ptr1, loopBB); - Value *new_ptr2 = ctx.builder.CreateConstInBoundsGEP1_32(TInt8, itr2, desc.data_bytes + desc.padding_bytes); + Value *new_ptr2 = emit_ptrgep(ctx, itr2, desc.data_bytes + desc.padding_bytes); itr2->addIncoming(ptr2, currBB); itr2->addIncoming(new_ptr2, loopBB); @@ -4074,7 +4087,7 @@ static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, ptindex = ctx.builder.CreateInBoundsGEP(AT, data, mlen); data = ctx.builder.CreateInBoundsGEP(AT, data, idx0); } - ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx0); + ptindex = emit_ptrgep(ctx, ptindex, idx0); *ret = union_store(ctx, data, ptindex, val, cmp, ety, ctx.tbaa().tbaa_arraybuf, ctx.tbaa().tbaa_arrayselbyte, Order, FailOrder, @@ -4089,7 +4102,7 @@ static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, assert(ptr); lock = ptr; // ptr += sizeof(lock); - ptr = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT)); + ptr = emit_ptrgep(ctx, ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT)); } Value *data_owner = NULL; // owner object against which the write barrier must check if (isboxed || layout->first_ptr >= 0) { // if elements are just bits, don't need a write barrier @@ -4204,7 +4217,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, #ifdef _P64 nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext())); #endif - Value *theArgs = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs)); + Value *theArgs = emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*)); Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva }); *ret = mark_julia_type(ctx, r, true, jl_any_type); return true; @@ -4354,7 +4367,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, ptindex = ctx.builder.CreateInBoundsGEP(AT, data, mlen); data = ctx.builder.CreateInBoundsGEP(AT, data, idx0); } - ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx0); + ptindex = emit_ptrgep(ctx, ptindex, idx0); size_t elsz_c = 0, al_c = 0; int union_max = jl_islayout_inline(ety, &elsz_c, &al_c); assert(union_max && LLT_ALIGN(elsz_c, al_c) == elsz && al_c == al); @@ -4367,7 +4380,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, assert(ptr); lock = ptr; // ptr += sizeof(lock); - ptr = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT)); + ptr = emit_ptrgep(ctx, ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT)); emit_lockstate_value(ctx, lock, true); } *ret = typed_load(ctx, ptr, nullptr, ety, @@ -4458,10 +4471,10 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, if (needlock) { // n.b. no actual lock acquire needed, as the check itself only needs to load a single pointer and check for null // elem += sizeof(lock); - elem = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), elem, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT)); + elem = emit_ptrgep(ctx, elem, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT)); } if (!isboxed) - elem = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, elem, layout->first_ptr); + elem = emit_ptrgep(ctx, elem, layout->first_ptr * sizeof(void*)); // emit this using the same type as jl_builtin_memoryrefget // so that LLVM may be able to load-load forward them and fold the result auto tbaa = isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf; @@ -4549,7 +4562,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) { Value *valen = emit_n_varargs(ctx); jl_cgval_t va_ary( // fake instantiation of a cgval, in order to call emit_bounds_check (it only checks the `.V` field) - ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs)), + emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*)), NULL, NULL); Value *idx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type); idx = emit_bounds_check(ctx, va_ary, NULL, idx, valen, boundscheck); @@ -4899,7 +4912,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, if (!jl_field_isptr(stt, fieldidx)) offs += ((jl_datatype_t*)jl_field_type(stt, fieldidx))->layout->first_ptr; Value *ptr = data_pointer(ctx, obj); - Value *addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, ptr, offs); + Value *addr = emit_ptrgep(ctx, ptr, offs * sizeof(jl_value_t*)); // emit this using the same type as emit_getfield_knownidx // so that LLVM may be able to load-load forward them and fold the result jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); @@ -5583,10 +5596,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i) } } assert(ctx.spvals_ptr != NULL); - Value *bp = ctx.builder.CreateConstInBoundsGEP1_32( - ctx.types().T_prjlvalue, - ctx.spvals_ptr, - i + sizeof(jl_svec_t) / sizeof(jl_value_t*)); + Value *bp = emit_ptrgep(ctx, ctx.spvals_ptr, i * sizeof(jl_value_t*) + sizeof(jl_svec_t)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)))); setName(ctx.emission_context, sp, "sparam"); @@ -5639,10 +5649,7 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym, int allow_i } } assert(ctx.spvals_ptr != NULL); - Value *bp = ctx.builder.CreateConstInBoundsGEP1_32( - ctx.types().T_prjlvalue, - ctx.spvals_ptr, - i + sizeof(jl_svec_t) / sizeof(jl_value_t*)); + Value *bp = emit_ptrgep(ctx, ctx.spvals_ptr, i * sizeof(jl_value_t*) + sizeof(jl_svec_t)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)))); isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false, true), emit_tagfrom(ctx, jl_tvar_type)); @@ -6753,34 +6760,26 @@ static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0, bool or_new=fal static Value *get_current_task(jl_codectx_t &ctx) { - return get_current_task_from_pgcstack(ctx.builder, ctx.types().T_size, ctx.pgcstack); + return get_current_task_from_pgcstack(ctx.builder, ctx.pgcstack); } // Get PTLS through current task. static Value *get_current_ptls(jl_codectx_t &ctx) { - return get_current_ptls_from_task(ctx.builder, ctx.types().T_size, get_current_task(ctx), ctx.tbaa().tbaa_gcframe); + return get_current_ptls_from_task(ctx.builder, get_current_task(ctx), ctx.tbaa().tbaa_gcframe); } // Get the address of the world age of the current task static Value *get_tls_world_age_field(jl_codectx_t &ctx) { Value *ct = get_current_task(ctx); - return ctx.builder.CreateInBoundsGEP( - ctx.types().T_size, - ct, - ConstantInt::get(ctx.types().T_size, offsetof(jl_task_t, world_age) / ctx.types().sizeof_ptr), - "world_age"); + return emit_ptrgep(ctx, ct, offsetof(jl_task_t, world_age), "world_age"); } static Value *get_scope_field(jl_codectx_t &ctx) { Value *ct = get_current_task(ctx); - return ctx.builder.CreateInBoundsGEP( - ctx.types().T_prjlvalue, - ct, - ConstantInt::get(ctx.types().T_size, offsetof(jl_task_t, scope) / ctx.types().sizeof_ptr), - "current_scope"); + return emit_ptrgep(ctx, ct, offsetof(jl_task_t, scope), "current_scope"); } static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t ¶ms) @@ -6912,10 +6911,7 @@ static void emit_cfunc_invalidate( case jl_returninfo_t::SRet: { if (return_roots) { Value *root1 = gf_thunk->arg_begin() + 1; // root1 has type [n x {}*]* - #if JL_LLVM_VERSION < 170000 - assert(cast(root1->getType())->isOpaqueOrPointeeTypeMatches(get_returnroots_type(ctx, return_roots))); - #endif - root1 = ctx.builder.CreateConstInBoundsGEP2_32(get_returnroots_type(ctx, return_roots), root1, 0, 0); + // store the whole object in the first slot ctx.builder.CreateStore(gf_ret, root1); } Align alignment(julia_alignment(rettype)); @@ -7094,10 +7090,7 @@ static Function* gen_cfun_wrapper( if (calltype) { LoadInst *lam_max = ctx.builder.CreateAlignedLoad( ctx.types().T_size, - ctx.builder.CreateConstInBoundsGEP1_32( - ctx.types().T_size, - literal_pointer_val(ctx, (jl_value_t*)codeinst), - offsetof(jl_code_instance_t, max_world) / ctx.types().sizeof_ptr), + emit_ptrgep(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), offsetof(jl_code_instance_t, max_world)), ctx.types().alignof_ptr); age_ok = ctx.builder.CreateICmpUGE(lam_max, world_v); } @@ -7178,7 +7171,7 @@ static Function* gen_cfun_wrapper( *closure_types = jl_alloc_vec_any(0); jl_array_ptr_1d_push(*closure_types, jargty); Value *runtime_dt = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, - ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_nrows(*closure_types)), + emit_ptrgep(ctx, nestPtr, jl_array_nrows(*closure_types) * ctx.types().sizeof_ptr), Align(sizeof(void*))); BasicBlock *boxedBB = BasicBlock::Create(ctx.builder.getContext(), "isboxed", cw); BasicBlock *loadBB = BasicBlock::Create(ctx.builder.getContext(), "need-load", cw); @@ -7244,7 +7237,7 @@ static Function* gen_cfun_wrapper( *closure_types = jl_alloc_vec_any(0); jl_array_ptr_1d_push(*closure_types, jargty); Value *runtime_dt = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, - ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_nrows(*closure_types)), + emit_ptrgep(ctx, nestPtr, jl_array_nrows(*closure_types) * ctx.types().sizeof_ptr), Align(sizeof(void*))); Value *strct = box_ccall_result(ctx, val, runtime_dt, jargty); inputarg = mark_julia_type(ctx, strct, true, jargty_proper); @@ -7823,7 +7816,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret theArg = funcArg; } else { - Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1); + Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); theArg = ai.decorateInst(maybe_mark_load_dereferenceable( ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))), @@ -7850,7 +7843,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret theArg = funcArg; else theArg = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, - ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, retarg - 1), + emit_ptrgep(ctx, argArray, (retarg - 1) * ctx.types().sizeof_ptr), Align(sizeof(void*))); retval = mark_julia_type(ctx, theArg, true, jl_any_type); } @@ -7969,7 +7962,7 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value param.addAttribute(Attribute::NoCapture); param.addAttribute(Attribute::NoUndef); attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param)); - fsig.push_back(get_returnroots_type(ctx, props.return_roots)->getPointerTo(0)); + fsig.push_back(ctx.types().T_ptr); argnames.push_back("return_roots"); } @@ -8069,9 +8062,9 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value return props; } -static void emit_sret_roots(jl_codectx_t &ctx, bool isptr, Value *Src, Type *T, Value *Shadow, Type *ShadowT, unsigned count) +static void emit_sret_roots(jl_codectx_t &ctx, bool isptr, Value *Src, Type *T, Value *Shadow, unsigned count) { - unsigned emitted = TrackWithShadow(Src, T, isptr, Shadow, ShadowT, ctx.builder); //This comes from Late-GC-Lowering?? + unsigned emitted = TrackWithShadow(Src, T, isptr, Shadow, ctx.builder); //This comes from Late-GC-Lowering?? assert(emitted == count); (void)emitted; (void)count; } @@ -8773,9 +8766,7 @@ static jl_llvm_functions_t // Load closure world Value *oc_this = decay_derived(ctx, &*AI++); Value *argaddr = oc_this; - Value *worldaddr = ctx.builder.CreateInBoundsGEP( - getInt8Ty(ctx.builder.getContext()), argaddr, - ConstantInt::get(ctx.types().T_size, offsetof(jl_opaque_closure_t, world))); + Value *worldaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, world)); jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type, nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, ctx.types().alignof_ptr.value()); @@ -8783,9 +8774,7 @@ static jl_llvm_functions_t emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr); // Load closure env - Value *envaddr = ctx.builder.CreateInBoundsGEP( - getInt8Ty(ctx.builder.getContext()), argaddr, - ConstantInt::get(ctx.types().T_size, offsetof(jl_opaque_closure_t, captures))); + Value *envaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, captures)); jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type, nullptr, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*)); @@ -8800,7 +8789,7 @@ static jl_llvm_functions_t theArg = mark_julia_type(ctx, fArg, true, vi.value.typ); } else { - Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1); + Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); Value *load = ai.decorateInst(maybe_mark_load_dereferenceable( ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))), @@ -8876,10 +8865,8 @@ static jl_llvm_functions_t restTuple = ctx.builder.CreateCall(F, { Constant::getNullValue(ctx.types().T_prjlvalue), - ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, argArray, - ConstantInt::get(ctx.types().T_size, nreq - 1)), - ctx.builder.CreateSub(argCount, - ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nreq - 1)) }); + emit_ptrgep(ctx, argArray, (nreq - 1) * sizeof(jl_value_t*)), + ctx.builder.CreateSub(argCount, ctx.builder.getInt32(nreq - 1)) }); restTuple->setAttributes(F->getAttributes()); ctx.builder.CreateStore(restTuple, vi.boxroot); } @@ -9319,7 +9306,7 @@ static jl_llvm_functions_t if (retvalinfo.ispointer()) { if (returninfo.return_roots) { Type *store_ty = julia_type_to_llvm(ctx, retvalinfo.typ); - emit_sret_roots(ctx, true, data_pointer(ctx, retvalinfo), store_ty, f->arg_begin() + 1, get_returnroots_type(ctx, returninfo.return_roots), returninfo.return_roots); + emit_sret_roots(ctx, true, data_pointer(ctx, retvalinfo), store_ty, f->arg_begin() + 1, returninfo.return_roots); } if (returninfo.cc == jl_returninfo_t::SRet) { assert(jl_is_concrete_type(jlrettype)); @@ -9336,7 +9323,7 @@ static jl_llvm_functions_t Value *Val = retvalinfo.V; if (returninfo.return_roots) { assert(julia_type_to_llvm(ctx, retvalinfo.typ) == store_ty); - emit_sret_roots(ctx, false, Val, store_ty, f->arg_begin() + 1, get_returnroots_type(ctx, returninfo.return_roots), returninfo.return_roots); + emit_sret_roots(ctx, false, Val, store_ty, f->arg_begin() + 1, returninfo.return_roots); } ctx.builder.CreateAlignedStore(Val, sret, Align(julia_alignment(retvalinfo.typ))); assert(retvalinfo.TIndex == NULL && "unreachable"); // unimplemented representation @@ -9447,11 +9434,7 @@ static jl_llvm_functions_t ctx.builder.CreateBr(handlr); } ctx.builder.SetInsertPoint(tryblk); - auto ehptr = ctx.builder.CreateInBoundsGEP( - ctx.types().T_ptr, - ct, - ConstantInt::get(ctx.types().T_size, offsetof(jl_task_t, eh) / ctx.types().sizeof_ptr), - "eh"); + auto ehptr = emit_ptrgep(ctx, ct, offsetof(jl_task_t, eh)); ctx.builder.CreateAlignedStore(ehbuf, ehptr, ctx.types().alignof_ptr); } } diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 4bfe3f184d24b..194b45886bb0d 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -767,7 +767,7 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, ArrayRef argv) LLT_ALIGN(size, jl_datatype_align(ety)))); setName(ctx.emission_context, im1, "pointerref_offset"); Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ); - thePtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1); + thePtr = emit_ptrgep(ctx, thePtr, im1); setName(ctx.emission_context, thePtr, "pointerref_src"); MDNode *tbaa = best_tbaa(ctx.tbaa(), ety); emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, Align(sizeof(jl_value_t*)), Align(align_nb)); @@ -848,7 +848,7 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, ArrayRef argv) im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size, LLT_ALIGN(size, jl_datatype_align(ety)))); setName(ctx.emission_context, im1, "pointerset_offset"); - auto gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1); + auto gep = emit_ptrgep(ctx, thePtr, im1); setName(ctx.emission_context, gep, "pointerset_ptr"); emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, Align(align_nb), Align(julia_alignment(ety))); } diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 5984ad55d221c..188955fd50972 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -770,26 +770,7 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF user->replaceUsesOfWith(orig_i, replace); } else if (isa(user) || isa(user)) { - #if JL_LLVM_VERSION >= 170000 - #ifndef JL_NDEBUG - auto cast_t = PointerType::get(user->getType(), new_i->getType()->getPointerAddressSpace()); - Type *new_t = new_i->getType(); - assert(cast_t == new_t); - #endif - auto replace_i = new_i; - #else - auto cast_t = PointerType::getWithSamePointeeType(cast(user->getType()), new_i->getType()->getPointerAddressSpace()); - auto replace_i = new_i; - Type *new_t = new_i->getType(); - if (cast_t != new_t) { - // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine - assert(cast_t->getContext().supportsTypedPointers()); - replace_i = new BitCastInst(replace_i, cast_t, "", user); - replace_i->setDebugLoc(user->getDebugLoc()); - replace_i->takeName(user); - } - #endif - push_frame(user, replace_i); + push_frame(user, new_i); } else if (auto gep = dyn_cast(user)) { SmallVector IdxOperands(gep->idx_begin(), gep->idx_end()); diff --git a/src/llvm-codegen-shared.h b/src/llvm-codegen-shared.h index 242dab021f101..956c04dbc7ded 100644 --- a/src/llvm-codegen-shared.h +++ b/src/llvm-codegen-shared.h @@ -125,7 +125,7 @@ struct CountTrackedPointers { CountTrackedPointers(llvm::Type *T, bool ignore_loaded=false); }; -unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::Type *DTy, llvm::IRBuilder<> &irbuilder); +unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::IRBuilder<> &irbuilder); llvm::SmallVector ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder, llvm::ArrayRef perm_offsets={}); static inline void llvm_dump(llvm::Value *v) @@ -187,45 +187,39 @@ static inline llvm::Instruction *tbaa_decorate(llvm::MDNode *md, llvm::Instructi } // Get PTLS through current task. -static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *pgcstack) +static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Value *pgcstack) { using namespace llvm; - auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext()); + auto i8 = builder.getInt8Ty(); const int pgcstack_offset = offsetof(jl_task_t, gcstack); - return builder.CreateInBoundsGEP( - T_pjlvalue, pgcstack, - ConstantInt::get(T_size, -(pgcstack_offset / sizeof(void *))), - "current_task"); + return builder.CreateConstInBoundsGEP1_32(i8, pgcstack, -pgcstack_offset, "current_task"); } // Get PTLS through current task. -static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *current_task, llvm::MDNode *tbaa) +static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Value *current_task, llvm::MDNode *tbaa) { using namespace llvm; - auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext()); + auto i8 = builder.getInt8Ty(); + auto T_ptr = builder.getPtrTy(); const int ptls_offset = offsetof(jl_task_t, ptls); - llvm::Value *pptls = builder.CreateInBoundsGEP( - T_pjlvalue, current_task, - ConstantInt::get(T_size, ptls_offset / sizeof(void *)), - "ptls_field"); - LoadInst *ptls_load = builder.CreateAlignedLoad(T_pjlvalue, - pptls, Align(sizeof(void *)), "ptls_load"); + llvm::Value *pptls = builder.CreateConstInBoundsGEP1_32(i8, current_task, ptls_offset, "ptls_field"); + LoadInst *ptls_load = builder.CreateAlignedLoad(T_ptr, pptls, Align(sizeof(void *)), "ptls_load"); // Note: Corresponding store (`t->ptls = ptls`) happens in `ctx_switch` of tasks.c. tbaa_decorate(tbaa, ptls_load); return ptls_load; } // Get signal page through current task. -static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa) +static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa) { using namespace llvm; // return builder.CreateCall(prepare_call(reuse_signal_page_func)); - auto T_psize = T_size->getPointerTo(); - int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *); - llvm::Value *psafepoint = builder.CreateInBoundsGEP( - T_psize, ptls, ConstantInt::get(T_size, nthfield)); + auto T_ptr = builder.getPtrTy(); + auto i8 = builder.getInt8Ty(); + int nthfield = offsetof(jl_tls_states_t, safepoint); + llvm::Value *psafepoint = builder.CreateConstInBoundsGEP1_32(i8, ptls, nthfield); LoadInst *ptls_load = builder.CreateAlignedLoad( - T_psize, psafepoint, Align(sizeof(void *)), "safepoint"); + T_ptr, psafepoint, Align(sizeof(void *)), "safepoint"); tbaa_decorate(tbaa, ptls_load); return ptls_load; } @@ -239,7 +233,7 @@ static inline void emit_signal_fence(llvm::IRBuilder<> &builder) static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa, bool final = false) { using namespace llvm; - llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, T_size, ptls, tbaa); + llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, ptls, tbaa); emit_signal_fence(builder); Module *M = builder.GetInsertBlock()->getModule(); LLVMContext &C = builder.getContext(); @@ -250,8 +244,7 @@ static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_s else { Function *F = M->getFunction("julia.safepoint"); if (!F) { - auto T_psize = T_size->getPointerTo(); - FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_psize}, false); + FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_size->getPointerTo()}, false); F = Function::Create(FT, Function::ExternalLinkage, "julia.safepoint", M); #if JL_LLVM_VERSION >= 160000 F->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); @@ -268,8 +261,8 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T { using namespace llvm; Type *T_int8 = state->getType(); - Constant *offset = ConstantInt::getSigned(builder.getInt32Ty(), offsetof(jl_tls_states_t, gc_state)); - Value *gc_state = builder.CreateInBoundsGEP(T_int8, ptls, ArrayRef(offset), "gc_state"); + unsigned offset = offsetof(jl_tls_states_t, gc_state); + Value *gc_state = builder.CreateConstInBoundsGEP1_32(T_int8, ptls, offset, "gc_state"); if (old_state == nullptr) { old_state = builder.CreateLoad(T_int8, gc_state); cast(old_state)->setOrdering(AtomicOrdering::Monotonic); diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index e08f08860dfaf..8d1d5ff73b261 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -350,15 +350,7 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) { ConstantInt::get(Type::getInt32Ty(Cond->getContext()), i), "", SI); } - #if JL_LLVM_VERSION >= 170000 assert(FalseElem->getType() == TrueElem->getType()); - #else - if (FalseElem->getType() != TrueElem->getType()) { - // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine - assert(FalseElem->getContext().supportsTypedPointers()); - FalseElem = new BitCastInst(FalseElem, TrueElem->getType(), "", SI); - } - #endif SelectInst *SelectBase = SelectInst::Create(Cond, TrueElem, FalseElem, "gclift", SI); int Number = ++S.MaxPtrNumber; S.AllPtrNumbering[SelectBase] = Number; @@ -427,33 +419,7 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) { BaseElem = Base; else BaseElem = IncomingBases[i]; - #if JL_LLVM_VERSION >= 170000 assert(BaseElem->getType() == T_prjlvalue); - #else - if (BaseElem->getType() != T_prjlvalue) { - // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine - assert(BaseElem->getContext().supportsTypedPointers()); - auto &remap = CastedRoots[i][BaseElem]; - if (!remap) { - if (auto constant = dyn_cast(BaseElem)) { - remap = ConstantExpr::getBitCast(constant, T_prjlvalue, ""); - } else { - Instruction *InsertBefore; - if (auto arg = dyn_cast(BaseElem)) { - InsertBefore = &*arg->getParent()->getEntryBlock().getFirstInsertionPt(); - } else { - assert(isa(BaseElem) && "Unknown value type detected!"); - InsertBefore = cast(BaseElem)->getNextNonDebugInstruction(); - } - while (isa(InsertBefore)) { - InsertBefore = InsertBefore->getNextNonDebugInstruction(); - } - remap = new BitCastInst(BaseElem, T_prjlvalue, "", InsertBefore); - } - } - BaseElem = remap; - } - #endif lift->addIncoming(BaseElem, IncomingBB); } } @@ -1528,14 +1494,11 @@ SmallVector ExtractTrackedValues(Value *Src, Type *STy, bool isptr, I return Ptrs; } -unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, Type *DTy, IRBuilder<> &irbuilder) { +unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, IRBuilder<> &irbuilder) { auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder); for (unsigned i = 0; i < Ptrs.size(); ++i) { - Value *Elem = Ptrs[i];// Dst has type `[n x {}*]*` - Value *Slot = irbuilder.CreateConstInBoundsGEP2_32(DTy, Dst, 0, i); - #if JL_LLVM_VERSION < 170000 - assert(cast(Dst->getType())->isOpaqueOrPointeeTypeMatches(DTy)); - #endif + Value *Elem = Ptrs[i]; + Value *Slot = irbuilder.CreateConstInBoundsGEP1_32(irbuilder.getInt8Ty(), Dst, i * sizeof(void*)); StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, Align(sizeof(void*))); shadowStore->setOrdering(AtomicOrdering::NotAtomic); // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe); @@ -2133,7 +2096,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { // the type tag. (Note that if the size is not a constant, it will call // gc_alloc_obj, and will redundantly set the tag.) auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes); - auto ptls = get_current_ptls_from_task(builder, T_size, CI->getArgOperand(0), tbaa_gcframe); + auto ptls = get_current_ptls_from_task(builder, CI->getArgOperand(0), tbaa_gcframe); auto newI = builder.CreateCall( allocBytesIntrinsic, { @@ -2319,15 +2282,7 @@ void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColor // Pointee types don't have semantics, so the optimizer is // free to rewrite them if convenient. We need to change // it back here for the store. - #if JL_LLVM_VERSION >= 170000 assert(Val->getType() == T_prjlvalue); - #else - if (Val->getType() != T_prjlvalue) { - // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine - assert(Val->getContext().supportsTypedPointers()); - Val = new BitCastInst(Val, T_prjlvalue, "", InsertBefore); - } - #endif new StoreInst(Val, slotAddress, InsertBefore); } @@ -2407,18 +2362,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl &Colors, St for (CallInst *II : ToDelete) { II->eraseFromParent(); } - #if JL_LLVM_VERSION >= 170000 assert(slotAddress->getType() == AI->getType()); - #else - if (slotAddress->getType() != AI->getType()) { - // If we're replacing an ArrayAlloca, the pointer element type may need to be fixed up - // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine - assert(slotAddress->getContext().supportsTypedPointers()); - auto BCI = new BitCastInst(slotAddress, AI->getType()); - BCI->insertAfter(slotAddress); - slotAddress = BCI; - } - #endif AI->replaceAllUsesWith(slotAddress); AI->eraseFromParent(); AI = NULL; @@ -2443,15 +2387,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl &Colors, St slotAddress->insertAfter(gcframe); auto ValExpr = std::make_pair(Base, isa(Base->getType()) ? -1 : i); auto Elem = MaybeExtractScalar(S, ValExpr, SI); - #if JL_LLVM_VERSION >= 170000 assert(Elem->getType() == T_prjlvalue); - #else - if (Elem->getType() != T_prjlvalue) { - // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine - assert(Elem->getContext().supportsTypedPointers()); - Elem = new BitCastInst(Elem, T_prjlvalue, "", SI); - } - #endif //auto Idxs = ArrayRef(Tracked[i]); //Value *Elem = ExtractScalar(Base, true, Idxs, SI); Value *shadowStore = new StoreInst(Elem, slotAddress, SI); diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp index 736c1acd9525a..488dd46cade21 100644 --- a/src/llvm-ptls.cpp +++ b/src/llvm-ptls.cpp @@ -191,7 +191,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, builder.SetInsertPoint(fastTerm->getParent()); fastTerm->removeFromParent(); MDNode *tbaa = tbaa_gcframe; - Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, pgcstack), tbaa), true); + Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa), true); builder.Insert(fastTerm); phi->addIncoming(pgcstack, fastTerm->getParent()); // emit pre-return cleanup @@ -203,7 +203,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, for (auto &BB : *pgcstack->getParent()->getParent()) { if (isa(BB.getTerminator())) { builder.SetInsertPoint(BB.getTerminator()); - emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, phi), tbaa), last_gc_state, true); + emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state, true); } } } diff --git a/test/llvmpasses/alloc-opt-gcframe.ll b/test/llvmpasses/alloc-opt-gcframe.ll index e8644899f0914..f53a4d5c01df7 100644 --- a/test/llvmpasses/alloc-opt-gcframe.ll +++ b/test/llvmpasses/alloc-opt-gcframe.ll @@ -10,7 +10,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; CHECK-NOT: @julia.gc_alloc_obj ; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %gcstack, i64 -12 -; OPAQUE: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 +; OPAQUE: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task, ; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 ; OPAQUE-NEXT: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_small_alloc(ptr [[ptls_load]], i32 [[SIZE_T:[0-9]+]], i32 16, i64 {{.*}} @tag {{.*}}) ; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4 diff --git a/test/llvmpasses/late-lower-gc-addrspaces.ll b/test/llvmpasses/late-lower-gc-addrspaces.ll index 702e44b2b0e28..9c041664a9682 100644 --- a/test/llvmpasses/late-lower-gc-addrspaces.ll +++ b/test/llvmpasses/late-lower-gc-addrspaces.ll @@ -1,6 +1,6 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s target triple = "amdgcn-amd-amdhsa" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13" @@ -19,28 +19,28 @@ define void @gc_frame_lowering(i64 %a, i64 %b) { top: ; CHECK-LABEL: @gc_frame_lowering -; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2) -; OPAQUE: %pgcstack = call ptr @julia.get_pgcstack() +; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 2) +; CHECK: %pgcstack = call ptr @julia.get_pgcstack() %pgcstack = call {}*** @julia.get_pgcstack() -; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2) -; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64 +; CHECK-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2) +; CHECK-NEXT: call ptr addrspace(10) @jl_box_int64 %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]) -; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]] +; CHECK: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]) +; CHECK-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]] %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b) ; CHECK-NEXT: %bboxed = ; Make sure the same gc slot isn't re-used -; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]]) -; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]]) -; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]] +; CHECK-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]]) +; CHECK: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]]) +; CHECK-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]] ; CHECK-NEXT: call void @boxed_simple call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) -; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe) +; CHECK-NEXT: call void @julia.pop_gc_frame(ptr %gcframe) ret void } @@ -51,14 +51,14 @@ top: %0 = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 -; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 -; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 -; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}}) -; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 -; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 +; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 +; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task, +; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 +; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}}) +; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 +; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) -; OPAQUE-NEXT: ret ptr addrspace(10) %v +; CHECK-NEXT: ret ptr addrspace(10) %v ret {} addrspace(10)* %v } @@ -74,20 +74,20 @@ top: %0 = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 -; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 -; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 -; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}}) -; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 -; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 +; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 +; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task, +; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 +; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}}) +; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 +; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) -; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10) +; CHECK-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10) %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* -; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7 +; CHECK-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7 %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1 -; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8 +; CHECK-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8 store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2 -; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7 +; CHECK-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7 %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4 ; CHECK-NEXT: ret void ret void diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll index 093cab1358141..d294847db8f9d 100644 --- a/test/llvmpasses/late-lower-gc.ll +++ b/test/llvmpasses/late-lower-gc.ll @@ -1,6 +1,6 @@ ; This file is a part of Julia. License is MIT: https://julialang.org/license -; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE +; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s @tag = external addrspace(10) global {}, align 16 @@ -16,28 +16,28 @@ define void @gc_frame_lowering(i64 %a, i64 %b) { top: ; CHECK-LABEL: @gc_frame_lowering -; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2) -; OPAQUE: %pgcstack = call ptr @julia.get_pgcstack() +; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 2) +; CHECK: %pgcstack = call ptr @julia.get_pgcstack() %pgcstack = call {}*** @julia.get_pgcstack() -; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2) -; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64 +; CHECK-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2) +; CHECK-NEXT: call ptr addrspace(10) @jl_box_int64 %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a) -; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]) -; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]] +; CHECK: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]]) +; CHECK-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]] %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b) ; CHECK-NEXT: %bboxed = ; Make sure the same gc slot isn't re-used -; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]]) -; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]]) -; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]] +; CHECK-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]]) +; CHECK: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]]) +; CHECK-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]] ; CHECK-NEXT: call void @boxed_simple call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed) -; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe) +; CHECK-NEXT: call void @julia.pop_gc_frame(ptr %gcframe) ret void } @@ -48,14 +48,14 @@ top: %0 = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 -; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 -; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 -; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}}) -; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 -; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 +; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 +; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task, +; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 +; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}}) +; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 +; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) -; OPAQUE-NEXT: ret ptr addrspace(10) %v +; CHECK-NEXT: ret ptr addrspace(10) %v ret {} addrspace(10)* %v } @@ -71,20 +71,20 @@ top: %0 = bitcast {}*** %pgcstack to {}** %current_task = getelementptr inbounds {}*, {}** %0, i64 -12 -; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 -; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16 -; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 -; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}}) -; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 -; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 +; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12 +; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task, +; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0 +; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}}) +; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1 +; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4 %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag) -; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10) +; CHECK-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10) %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)* -; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7 +; CHECK-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7 %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1 -; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8 +; CHECK-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8 store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2 -; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7 +; CHECK-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7 %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4 ; CHECK-NEXT: ret void ret void @@ -162,13 +162,13 @@ define void @decayar([2 x {} addrspace(10)* addrspace(11)*] %ar) { ; CHECK-LABEL: @decayar -; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2) -; OPAQUE: [[gc_slot_addr_:%.*]]1 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1) -; OPAQUE: store ptr addrspace(10) %l0, ptr [[gc_slot_addr_:%.*]], align 8 -; OPAQUE: [[gc_slot_addr_:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0) -; OPAQUE: store ptr addrspace(10) %l1, ptr [[gc_slot_addr_:%.*]], align 8 -; OPAQUE: %r = call i32 @callee_root(ptr addrspace(10) %l0, ptr addrspace(10) %l1) -; OPAQUE: call void @julia.pop_gc_frame(ptr %gcframe) +; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 2) +; CHECK: [[gc_slot_addr_:%.*]]1 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1) +; CHECK: store ptr addrspace(10) %l0, ptr [[gc_slot_addr_:%.*]], align 8 +; CHECK: [[gc_slot_addr_:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0) +; CHECK: store ptr addrspace(10) %l1, ptr [[gc_slot_addr_:%.*]], align 8 +; CHECK: %r = call i32 @callee_root(ptr addrspace(10) %l0, ptr addrspace(10) %l1) +; CHECK: call void @julia.pop_gc_frame(ptr %gcframe) !0 = !{i64 0, i64 23} !1 = !{!1} diff --git a/test/llvmpasses/names.jl b/test/llvmpasses/names.jl index fe692d0fab787..1ab2204044804 100644 --- a/test/llvmpasses/names.jl +++ b/test/llvmpasses/names.jl @@ -135,7 +135,8 @@ emit(f2, Float64, Float64, Float64, Float64, Float64, Float64, Float64) # CHECK: define {{(swiftcc )?}}nonnull ptr @julia_f5 # CHECK-SAME: %"a::A" -# CHECK: %"a::A.b_ptr.c_ptr.d +# CHECK: %"a::A.d +# COM: this text check relies on our LLVM code emission being relatively poor, which is not always the case emit(f5, A) # CHECK: define {{(swiftcc )?}}nonnull ptr @julia_f6 From bcf41ba0cdd2aa8c1f21c8aa08b52b017ffbd014 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Wed, 11 Sep 2024 02:04:35 +0900 Subject: [PATCH 24/57] minor fixup for JuliaLang/julia#55705 (#55726) --- base/compiler/abstractinterpretation.jl | 3 ++- test/compiler/inference.jl | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 83a39ce10d891..bb5f2dd1ad180 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -2232,7 +2232,8 @@ function abstract_throw_methoderror(interp::AbstractInterpreter, argtypes::Vecto elseif !isvarargtype(argtypes[2]) MethodError else - tmerge(𝕃ᵢ, MethodError, ArgumentError) + ⊔ = join(typeinf_lattice(interp)) + MethodError ⊔ ArgumentError end return CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()) end diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index f15df49d75745..9454c53a09fb7 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -6137,3 +6137,7 @@ end == TypeError @test Base.infer_exception_type((Char,)) do x invoke(f_invoke_exct, Tuple{Number}, x) end == TypeError + +@test Base.infer_exception_type((Vector{Any},)) do args + Core.throw_methoderror(args...) +end == Union{MethodError,ArgumentError} From a7c9235afe27fd34e31fcce387ade7f508d45003 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 10 Sep 2024 14:43:33 -0400 Subject: [PATCH 25/57] [REPL] prevent silent hang if precompile script async blocks fail (#55685) --- stdlib/REPL/src/Terminals.jl | 2 ++ stdlib/REPL/src/precompile.jl | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/stdlib/REPL/src/Terminals.jl b/stdlib/REPL/src/Terminals.jl index 4f3e99f1d206c..0cf6888d248e8 100644 --- a/stdlib/REPL/src/Terminals.jl +++ b/stdlib/REPL/src/Terminals.jl @@ -97,6 +97,7 @@ abstract type UnixTerminal <: TextTerminal end pipe_reader(t::UnixTerminal) = t.in_stream::IO pipe_writer(t::UnixTerminal) = t.out_stream::IO +@nospecialize mutable struct TerminalBuffer <: UnixTerminal out_stream::IO end @@ -107,6 +108,7 @@ mutable struct TTYTerminal <: UnixTerminal out_stream::IO err_stream::IO end +@specialize const CSI = "\x1b[" diff --git a/stdlib/REPL/src/precompile.jl b/stdlib/REPL/src/precompile.jl index a6effb9f013fc..82a1a0bb78ee8 100644 --- a/stdlib/REPL/src/precompile.jl +++ b/stdlib/REPL/src/precompile.jl @@ -96,7 +96,7 @@ let repltask = @task try Base.run_std_repl(REPL, false, :yes, true) finally - redirect_stderr(isopen(orig_stderr) ? orig_stderr : devnull) + redirect_stdin(isopen(orig_stdin) ? orig_stdin : devnull) redirect_stdout(isopen(orig_stdout) ? orig_stdout : devnull) close(pts) end @@ -106,14 +106,14 @@ let redirect_stdin(pts) redirect_stdout(pts) redirect_stderr(pts) - REPL.print_qualified_access_warning(Base.Iterators, Base, :minimum) # trigger the warning while stderr is suppressed try - schedule(repltask) - # wait for the definitive prompt before start writing to the TTY - readuntil(output_copy, JULIA_PROMPT) + REPL.print_qualified_access_warning(Base.Iterators, Base, :minimum) # trigger the warning while stderr is suppressed finally redirect_stderr(isopen(orig_stderr) ? orig_stderr : devnull) end + schedule(repltask) + # wait for the definitive prompt before start writing to the TTY + readuntil(output_copy, JULIA_PROMPT) write(debug_output, "\n#### REPL STARTED ####\n") sleep(0.1) readavailable(output_copy) @@ -148,9 +148,9 @@ let write(ptm, "$CTRL_D") wait(repltask) finally - close(pts) redirect_stdin(isopen(orig_stdin) ? orig_stdin : devnull) redirect_stdout(isopen(orig_stdout) ? orig_stdout : devnull) + close(pts) end wait(tee) end From 56451d8eb18f6a1f9339eeb656033b417ec615b6 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Tue, 10 Sep 2024 20:45:38 +0200 Subject: [PATCH 26/57] Various fixes to byte / bytearray search (#54579) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was originally intended as a targeted fix to #54578, but I ran into a bunch of smaller issues with this code that also needed to be solved and it turned out to be difficult to fix them with small, trivial PRs. I would also like to refactor this whole file, but I want these correctness fixes to be merged first, because a larger refactoring has higher risk of getting stuck without getting reviewed and merged. ## Larger things that needs decisions * The internal union `Base.ByteArray` has been deleted. Instead, the unions `DenseInt8` and `DenseUInt8` have been added. These more comprehensively cover the types that was meant, e.g. `Memory{UInt8}` was incorrectly not covered by the former. As stated in the TODO, the concept of a "memory backed dense byte array" is needed throughout Julia, so this ideally needs to be implemented as a single type and used throughout Base. The fix here is a decent temporary solution. See #53178 #54581 * The `findall` docstring between two arrays was incorrectly not attached to the method - now it is. **Note that this change _changes_ the documentation** since it includes a docstring that was previously missed. Hence, it's an API addition. * Added a new minimal `testhelpers/OffsetDenseArrays.jl` which provide a `DenseVector` with offset axes for testing purposes. ## Trivial fixes * `findfirst(==(Int8(-1)), [0xff])` and similar findlast, findnext and findprev is no longer buggy, see #54578 * `findfirst([0x0ff], Int8[-1])` is similarly no longer buggy, see #54578 * `findnext(==('\xa6'), "æ", 1)` and `findprev(==('\xa6'), "æa", 2)` no longer incorrectly throws an error * The byte-oriented find* functions now work correctly with offset arrays * Fixed incorrect use of `GC.@preserve`, where the pointer was taken before the preserve block. * More of the optimised string methods now also apply to `SubString{String}` Closes #54578 Co-authored-by: Martin Holters --- base/char.jl | 1 + base/strings/search.jl | 136 +++++++++++++++++--------- test/strings/search.jl | 49 ++++++++++ test/testhelpers/OffsetDenseArrays.jl | 31 ++++++ 4 files changed, 170 insertions(+), 47 deletions(-) create mode 100644 test/testhelpers/OffsetDenseArrays.jl diff --git a/base/char.jl b/base/char.jl index bc68a672ce0ca..2e8410f6903e2 100644 --- a/base/char.jl +++ b/base/char.jl @@ -223,6 +223,7 @@ hash(x::Char, h::UInt) = hash_uint64(((bitcast(UInt32, x) + UInt64(0xd4d64234)) << 32) ⊻ UInt64(h)) first_utf8_byte(c::Char) = (bitcast(UInt32, c) >> 24) % UInt8 +first_utf8_byte(c::AbstractChar) = first_utf8_byte(Char(c)::Char) # fallbacks: isless(x::AbstractChar, y::AbstractChar) = isless(Char(x), Char(y)) diff --git a/base/strings/search.jl b/base/strings/search.jl index b9c14f06e0898..9bd69ae2f8a03 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -10,7 +10,29 @@ match strings with [`match`](@ref). """ abstract type AbstractPattern end -nothing_sentinel(i) = i == 0 ? nothing : i +# TODO: These unions represent bytes in memory that can be accessed via a pointer. +# this property is used throughout Julia, e.g. also in IO code. +# This deserves a better solution - see #53178. +# If such a better solution comes in place, these unions should be replaced. +const DenseInt8 = Union{ + DenseArray{Int8}, + FastContiguousSubArray{Int8,N,<:DenseArray} where N +} + +# Note: This union is different from that above in that it includes CodeUnits. +# Currently, this is redundant as CodeUnits <: DenseVector, but this subtyping +# is buggy and may be removed in the future, see #54002 +const DenseUInt8 = Union{ + DenseArray{UInt8}, + FastContiguousSubArray{UInt8,N,<:DenseArray} where N, + CodeUnits{UInt8, <:Union{String, SubString{String}}}, + FastContiguousSubArray{UInt8,N,<:CodeUnits{UInt8, <:Union{String, SubString{String}}}} where N, +} + +const DenseUInt8OrInt8 = Union{DenseUInt8, DenseInt8} + +last_byteindex(x::Union{String, SubString{String}}) = ncodeunits(x) +last_byteindex(x::DenseUInt8OrInt8) = lastindex(x) function last_utf8_byte(c::Char) u = reinterpret(UInt32, c) @@ -30,11 +52,11 @@ function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar} end @inbounds isvalid(s, i) || string_index_err(s, i) c = pred.x - c ≤ '\x7f' && return nothing_sentinel(_search(s, c % UInt8, i)) + c ≤ '\x7f' && return _search(s, first_utf8_byte(c), i) while true i = _search(s, first_utf8_byte(c), i) - i == 0 && return nothing - pred(s[i]) && return i + i === nothing && return nothing + isvalid(s, i) && pred(s[i]) && return i i = nextind(s, i) end end @@ -47,31 +69,41 @@ const DenseBytes = Union{ CodeUnits{UInt8, <:Union{String, SubString{String}}}, } -const ByteArray = Union{DenseBytes, DenseArrayType{Int8}} +function findfirst(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{UInt8, Int8}}, a::Union{DenseInt8, DenseUInt8}) + findnext(pred, a, firstindex(a)) +end -findfirst(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray) = - nothing_sentinel(_search(a, pred.x)) +function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},UInt8}, a::DenseUInt8, i::Integer) + _search(a, pred.x, i) +end -findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) = - nothing_sentinel(_search(a, pred.x, i)) +function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},Int8}, a::DenseInt8, i::Integer) + _search(a, pred.x, i) +end -findfirst(::typeof(iszero), a::ByteArray) = nothing_sentinel(_search(a, zero(UInt8))) -findnext(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_search(a, zero(UInt8), i)) +# iszero is special, in that the bitpattern for zero for Int8 and UInt8 is the same, +# so we can use memchr even if we search for an Int8 in an UInt8 array or vice versa +findfirst(::typeof(iszero), a::DenseUInt8OrInt8) = _search(a, zero(UInt8)) +findnext(::typeof(iszero), a::DenseUInt8OrInt8, i::Integer) = _search(a, zero(UInt8), i) -function _search(a::Union{String,SubString{String},<:ByteArray}, b::Union{Int8,UInt8}, i::Integer = 1) - if i < 1 +function _search(a::Union{String,SubString{String},DenseUInt8OrInt8}, b::Union{Int8,UInt8}, i::Integer = firstindex(a)) + fst = firstindex(a) + lst = last_byteindex(a) + if i < fst throw(BoundsError(a, i)) end - n = sizeof(a) - if i > n - return i == n+1 ? 0 : throw(BoundsError(a, i)) + n_bytes = lst - i + 1 + if i > lst + return i == lst+1 ? nothing : throw(BoundsError(a, i)) end - p = pointer(a) - q = GC.@preserve a ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+i-1, b, n-i+1) - return q == C_NULL ? 0 : Int(q-p+1) + GC.@preserve a begin + p = pointer(a) + q = ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+i-fst, b, n_bytes) + end + return q == C_NULL ? nothing : (q-p+fst) % Int end -function _search(a::ByteArray, b::AbstractChar, i::Integer = 1) +function _search(a::DenseUInt8, b::AbstractChar, i::Integer = firstindex(a)) if isascii(b) _search(a,UInt8(b),i) else @@ -80,41 +112,51 @@ function _search(a::ByteArray, b::AbstractChar, i::Integer = 1) end function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar}, - s::String, i::Integer) + s::Union{String, SubString{String}}, i::Integer) c = pred.x - c ≤ '\x7f' && return nothing_sentinel(_rsearch(s, c % UInt8, i)) + c ≤ '\x7f' && return _rsearch(s, first_utf8_byte(c), i) b = first_utf8_byte(c) while true i = _rsearch(s, b, i) - i == 0 && return nothing - pred(s[i]) && return i + i == nothing && return nothing + isvalid(s, i) && pred(s[i]) && return i i = prevind(s, i) end end -findlast(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray) = - nothing_sentinel(_rsearch(a, pred.x)) +function findlast(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::DenseUInt8OrInt8) + findprev(pred, a, lastindex(a)) +end -findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) = - nothing_sentinel(_rsearch(a, pred.x, i)) +function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},Int8}, a::DenseInt8, i::Integer) + _rsearch(a, pred.x, i) +end -findlast(::typeof(iszero), a::ByteArray) = nothing_sentinel(_rsearch(a, zero(UInt8))) -findprev(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_rsearch(a, zero(UInt8), i)) +function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},UInt8}, a::DenseUInt8, i::Integer) + _rsearch(a, pred.x, i) +end -function _rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = sizeof(a)) - if i < 1 - return i == 0 ? 0 : throw(BoundsError(a, i)) +# See comments above for findfirst(::typeof(iszero)) methods +findlast(::typeof(iszero), a::DenseUInt8OrInt8) = _rsearch(a, zero(UInt8)) +findprev(::typeof(iszero), a::DenseUInt8OrInt8, i::Integer) = _rsearch(a, zero(UInt8), i) + +function _rsearch(a::Union{String,SubString{String},DenseUInt8OrInt8}, b::Union{Int8,UInt8}, i::Integer = last_byteindex(a)) + fst = firstindex(a) + lst = last_byteindex(a) + if i < fst + return i == fst - 1 ? nothing : throw(BoundsError(a, i)) + end + if i > lst + return i == lst+1 ? nothing : throw(BoundsError(a, i)) end - n = sizeof(a) - if i > n - return i == n+1 ? 0 : throw(BoundsError(a, i)) + GC.@preserve a begin + p = pointer(a) + q = ccall(:memrchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, b, i-fst+1) end - p = pointer(a) - q = GC.@preserve a ccall(:memrchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, b, i) - return q == C_NULL ? 0 : Int(q-p+1) + return q == C_NULL ? nothing : (q-p+fst) % Int end -function _rsearch(a::ByteArray, b::AbstractChar, i::Integer = length(a)) +function _rsearch(a::DenseUInt8, b::AbstractChar, i::Integer = length(a)) if isascii(b) _rsearch(a,UInt8(b),i) else @@ -224,18 +266,19 @@ end in(c::AbstractChar, s::AbstractString) = (findfirst(isequal(c),s)!==nothing) -function _searchindex(s::Union{AbstractString,ByteArray}, +function _searchindex(s::Union{AbstractString,DenseUInt8OrInt8}, t::Union{AbstractString,AbstractChar,Int8,UInt8}, i::Integer) + sentinel = firstindex(s) - 1 x = Iterators.peel(t) if isnothing(x) - return 1 <= i <= nextind(s,lastindex(s))::Int ? i : + return firstindex(s) <= i <= nextind(s,lastindex(s))::Int ? i : throw(BoundsError(s, i)) end t1, trest = x while true i = findnext(isequal(t1),s,i) - if i === nothing return 0 end + if i === nothing return sentinel end ii = nextind(s, i)::Int a = Iterators.Stateful(trest) matched = all(splat(==), zip(SubString(s, ii), a)) @@ -509,9 +552,8 @@ julia> findall(UInt8[1,2], UInt8[1,2,3,1,2]) !!! compat "Julia 1.3" This method requires at least Julia 1.3. """ - -function findall(t::Union{AbstractString, AbstractPattern, AbstractVector{<:Union{Int8,UInt8}}}, - s::Union{AbstractString, AbstractPattern, AbstractVector{<:Union{Int8,UInt8}}}, +function findall(t::Union{AbstractString, AbstractPattern, AbstractVector{UInt8}}, + s::Union{AbstractString, AbstractPattern, AbstractVector{UInt8}}, ; overlap::Bool=false) found = UnitRange{Int}[] i, e = firstindex(s), lastindex(s) @@ -564,7 +606,7 @@ function _rsearchindex(s::AbstractString, end end -function _rsearchindex(s::String, t::String, i::Integer) +function _rsearchindex(s::Union{String, SubString{String}}, t::Union{String, SubString{String}}, i::Integer) # Check for fast case of a single byte if lastindex(t) == 1 return something(findprev(isequal(t[1]), s, i), 0) diff --git a/test/strings/search.jl b/test/strings/search.jl index 692286359868d..d8883bad24b48 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -155,6 +155,16 @@ for str in [u8str] @test findprev(isequal('ε'), str, 4) === nothing end +# See the comments in #54579 +@testset "Search for invalid chars" begin + @test findfirst(==('\xff'), "abc\xffde") == 4 + @test findprev(isequal('\xa6'), "abc\xa69", 5) == 4 + @test isnothing(findfirst(==('\xff'), "abcdeæd")) + + @test isnothing(findnext(==('\xa6'), "æ", 1)) + @test isnothing(findprev(==('\xa6'), "æa", 2)) +end + # string forward search with a single-char string @test findfirst("x", astr) === nothing @test findfirst("H", astr) == 1:1 @@ -445,6 +455,45 @@ end @test_throws BoundsError findprev(pattern, A, -3) end end + + @test findall([0x01, 0x02], [0x03, 0x01, 0x02, 0x01, 0x02, 0x06]) == [2:3, 4:5] + @test isempty(findall([0x04, 0x05], [0x03, 0x04, 0x06])) +end + +# Issue 54578 +@testset "No conflation of Int8 and UInt8" begin + # Work for mixed types if the values are the same + @test findfirst(==(Int8(1)), [0x01]) == 1 + @test findnext(iszero, Int8[0, -2, 0, -3], 2) == 3 + @test findfirst(Int8[1,4], UInt8[0, 2, 4, 1, 8, 1, 4, 2]) == 6:7 + @test findprev(UInt8[5, 6], Int8[1, 9, 2, 5, 6, 3], 6) == 4:5 + + # Returns nothing for the same methods if the values are different, + # even if the bitpatterns are the same + @test isnothing(findfirst(==(Int8(-1)), [0xff])) + @test isnothing(findnext(isequal(0xff), Int8[-1, -2, -1], 2)) + @test isnothing(findfirst(UInt8[0xff, 0xfe], Int8[0, -1, -2, 1, 8, 1, 4, 2])) + @test isnothing(findprev(UInt8[0xff, 0xfe], Int8[1, 9, 2, -1, -2, 3], 6)) +end + +@testset "DenseArray with offsets" begin + isdefined(Main, :OffsetDenseArrays) || @eval Main include("../testhelpers/OffsetDenseArrays.jl") + OffsetDenseArrays = Main.OffsetDenseArrays + + A = OffsetDenseArrays.OffsetDenseArray(collect(0x61:0x69), 100) + @test findfirst(==(0x61), A) == 101 + @test findlast(==(0x61), A) == 101 + @test findfirst(==(0x00), A) === nothing + + @test findfirst([0x62, 0x63, 0x64], A) == 102:104 + @test findlast([0x63, 0x64], A) == 103:104 + @test findall([0x62, 0x63], A) == [102:103] + + @test findfirst(iszero, A) === nothing + A = OffsetDenseArrays.OffsetDenseArray([0x01, 0x02, 0x00, 0x03], -100) + @test findfirst(iszero, A) == -97 + @test findnext(==(0x02), A, -99) == -98 + @test findnext(==(0x02), A, -97) === nothing end # issue 32568 diff --git a/test/testhelpers/OffsetDenseArrays.jl b/test/testhelpers/OffsetDenseArrays.jl new file mode 100644 index 0000000000000..44a1b8d627800 --- /dev/null +++ b/test/testhelpers/OffsetDenseArrays.jl @@ -0,0 +1,31 @@ +""" + module OffsetDenseArrays + +A minimal implementation of an offset array which is also <: DenseArray. +""" +module OffsetDenseArrays + +struct OffsetDenseArray{A <: DenseVector, T} <: DenseVector{T} + x::A + offset::Int +end +OffsetDenseArray(x::AbstractVector{T}, i::Integer) where {T} = OffsetDenseArray{typeof(x), T}(x, Int(i)) + +Base.size(x::OffsetDenseArray) = size(x.x) +Base.pointer(x::OffsetDenseArray) = pointer(x.x) + +function Base.getindex(x::OffsetDenseArray, i::Integer) + @boundscheck checkbounds(x.x, i - x.offset) + x.x[i - x.offset] +end + +function Base.setindex(x::OffsetDenseArray, v, i::Integer) + @boundscheck checkbounds(x.x, i - x.offset) + x.x[i - x.offset] = v +end + +IndexStyle(::Type{<:OffsetDenseArray}) = Base.IndexLinear() +Base.axes(x::OffsetDenseArray) = (x.offset + 1 : x.offset + length(x.x),) +Base.keys(x::OffsetDenseArray) = only(axes(x)) + +end # module From c6c449ccdb75fbd72700704ae669e1f98e512206 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 10 Sep 2024 16:13:52 -0400 Subject: [PATCH 27/57] codegen: deduplicate code for calling a specsig (#55728) I am tired of having 3 gratuitously different versions of this code to maintain. --- src/codegen.cpp | 296 +++++++++--------------------------------------- 1 file changed, 56 insertions(+), 240 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 9184e4895ab6d..a82056eb36e21 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -2342,7 +2342,8 @@ static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isbox // replace T::Type{T} with T return ghostValue(ctx, typ); } - } else if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) { + } + else if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) { // no need to explicitly load/store a constant/ghost value return ghostValue(ctx, typ); } @@ -5007,17 +5008,13 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction<> *theFptr, Value return emit_jlcall(ctx, prepare_call(theFptr), theF, argv, nargs, trampoline); } -static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, llvm::Value *callee, StringRef specFunctionObject, jl_code_instance_t *fromexternal, - ArrayRef argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty) +static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, jl_returninfo_t &returninfo, jl_code_instance_t *fromexternal, + ArrayRef argv, size_t nargs) { ++EmittedSpecfunCalls; // emit specialized call site bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); - jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg); FunctionType *cft = returninfo.decl.getFunctionType(); - *cc = returninfo.cc; - *return_roots = returninfo.return_roots; - size_t nfargs = cft->getNumParams(); SmallVector argvals(nfargs); unsigned idx = 0; @@ -5059,16 +5056,17 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos // n.b.: specTypes is required to be a datatype by construction for specsig jl_cgval_t arg = argv[i]; if (is_opaque_closure && i == 0) { - // Special optimization for opaque closures: We know that specsig opaque - // closures don't look at their type tag (they are fairly quickly discarded - // for their environments). Therefore, we can just pass these as a pointer, - // rather than a boxed value. + // Special implementation for opaque closures: their jt and thus + // julia_type_to_llvm values are likely wrong, so override the + // behavior here to directly pass the expected pointer based instead + // just on passing arg as a pointer arg = value_to_pointer(ctx, arg); argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg)); } else if (is_uniquerep_Type(jt)) { continue; - } else { + } + else { bool isboxed = deserves_argbox(jt); Type *et = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt); if (type_is_ghost(et)) @@ -5079,7 +5077,6 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos } else if (et->isAggregateType()) { arg = value_to_pointer(ctx, arg); - // can lazy load on demand, no copy needed argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg)); } else { @@ -5135,7 +5132,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos ctx.builder.CreateICmpEQ( ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)), ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)), - decay_derived(ctx, argvals[0]), + decay_derived(ctx, result), decay_derived(ctx, box) ); retval = mark_julia_slot(derived, @@ -5149,6 +5146,19 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos retval = mark_julia_slot(NULL, jlretty, call, ctx.tbaa().tbaa_stack); break; } + return retval; +} + +static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, llvm::Value *callee, StringRef specFunctionObject, jl_code_instance_t *fromexternal, + ArrayRef argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *nreturn_roots, jl_value_t *inferred_retty) +{ + ++EmittedSpecfunCalls; + // emit specialized call site + bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); + jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg); + *cc = returninfo.cc; + *nreturn_roots = returninfo.return_roots; + jl_cgval_t retval = emit_call_specfun_other(ctx, is_opaque_closure, specTypes, jlretty, returninfo, fromexternal, argv, nargs); // see if inference has a different / better type for the call than the lambda return update_julia_type(ctx, retval, inferred_retty); } @@ -6248,7 +6258,8 @@ static std::pair get_oc_function(jl_codectx_t &ctx, jl_met if (closure_method->source) { mi = jl_specializations_get_linfo(closure_method, sigtype, jl_emptysvec); ci = (jl_code_instance_t*)jl_rettype_inferred_addr(mi, ctx.min_world, ctx.max_world); - } else { + } + else { mi = (jl_method_instance_t*)jl_atomic_load_relaxed(&closure_method->specializations); assert(jl_is_method_instance(mi)); ci = jl_atomic_load_relaxed(&mi->cache); @@ -6291,7 +6302,8 @@ static std::pair get_oc_function(jl_codectx_t &ctx, jl_met closure_decls.specFunctionObject; if (GlobalValue *V = jl_Module->getNamedValue(fname)) { F = cast(V); - } else { + } + else { F = Function::Create(get_func_sig(ctx.builder.getContext()), Function::ExternalLinkage, fname, jl_Module); @@ -6302,7 +6314,8 @@ static std::pair get_oc_function(jl_codectx_t &ctx, jl_met Function *specF = NULL; if (!isspecsig) { specF = F; - } else { + } + else { //emission context holds context lock so can get module specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject); if (specF) { @@ -6817,14 +6830,6 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptr return f; } -static Type *get_returnroots_type(jl_codectx_t &ctx, unsigned rootcount) { - return ArrayType::get(ctx.types().T_prjlvalue, rootcount); -} - -static Type *get_unionbytes_type(LLVMContext &C, unsigned unionbytes) { - return ArrayType::get(getInt8Ty(C), unionbytes); -} - static void emit_cfunc_invalidate( Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots, jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure, @@ -6876,15 +6881,13 @@ static void emit_cfunc_invalidate( else { Value *arg_v = &*AI; ++AI; - Type *at = arg_v->getType(); if ((i == 0 && is_for_opaque_closure) || (!isboxed && et->isAggregateType())) { myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const); } else { - assert(at == et); + assert(arg_v->getType() == et); myargs[i] = mark_julia_type(ctx, arg_v, isboxed, jt); } - (void)at; } } assert(AI == gf_thunk->arg_end()); @@ -7306,77 +7309,9 @@ static Function* gen_cfun_wrapper( bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure; assert(calltype == 3); // emit a specsig call - bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); StringRef protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, invoke, codeinst); + bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure, gcstack_arg); - FunctionType *cft = returninfo.decl.getFunctionType(); - jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet); - - // TODO: Can use use emit_call_specfun_other here? - SmallVector args; - Value *result = nullptr; - if (jlfunc_sret || returninfo.cc == jl_returninfo_t::Union) { - // fuse the two sret together, or emit an alloca to hold it - if (sig.sret && jlfunc_sret) { - result = emit_bitcast(ctx, sretPtr, cft->getParamType(0)); - } - else { - if (jlfunc_sret) { - result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType()); - setName(ctx.emission_context, result, "sret"); - #if JL_LLVM_VERSION < 170000 - assert(cast(result->getType())->hasSameElementTypeAs(cast(cft->getParamType(0)))); - #endif - } else { - result = emit_static_alloca(ctx, get_unionbytes_type(ctx.builder.getContext(), returninfo.union_bytes)); - setName(ctx.emission_context, result, "result_union"); - #if JL_LLVM_VERSION < 170000 - assert(cast(result->getType())->hasSameElementTypeAs(cast(cft->getParamType(0)))); - #endif - } - } - args.push_back(result); - } - if (returninfo.return_roots) { - AllocaInst *return_roots = emit_static_alloca(ctx, get_returnroots_type(ctx, returninfo.return_roots)); - setName(ctx.emission_context, return_roots, "return_roots"); - args.push_back(return_roots); - } - if (gcstack_arg) - args.push_back(ctx.pgcstack); - for (size_t i = 0; i < nargs + 1; i++) { - // figure out how to repack the arguments - jl_cgval_t &inputarg = inputargs[i]; - Value *arg; - jl_value_t *spect = (i == 0 && is_opaque_closure) ? (jl_value_t*)jl_any_type : - jl_nth_slot_type(lam->specTypes, i); - // n.b. specTypes is required to be a datatype by construction for specsig - bool isboxed = deserves_argbox(spect); - Type *T = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, spect); - if (is_uniquerep_Type(spect)) { - continue; - } - else if (isboxed) { - arg = boxed(ctx, inputarg); - } - else if (type_is_ghost(T)) { - continue; // ghost types are skipped by the specsig method signature - } - else if (T->isAggregateType()) { - // aggregate types are passed by pointer - inputarg = value_to_pointer(ctx, inputarg); - arg = decay_derived(ctx, data_pointer(ctx, inputarg)); - } - else { - arg = emit_unbox(ctx, T, inputarg, spect); - assert(!isa(arg)); - } - - // add to argument list - args.push_back(arg); - } - Value *theFptr = returninfo.decl.getCallee(); - assert(theFptr); if (age_ok) { funcName += "_gfthunk"; Function *gf_thunk = Function::Create(returninfo.decl.getFunctionType(), @@ -7388,49 +7323,17 @@ static Function* gen_cfun_wrapper( // but which has the signature of a specsig emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context, min_world, max_world); - theFptr = ctx.builder.CreateSelect(age_ok, theFptr, gf_thunk); + returninfo.decl = FunctionCallee(returninfo.decl.getFunctionType(), ctx.builder.CreateSelect(age_ok, returninfo.decl.getCallee(), gf_thunk)); } - - #if JL_LLVM_VERSION < 170000 - assert(cast(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl.getFunctionType())); - #endif - CallInst *call = ctx.builder.CreateCall( - returninfo.decl.getFunctionType(), - theFptr, ArrayRef(args)); - call->setAttributes(returninfo.attrs); - if (gcstack_arg) - call->setCallingConv(CallingConv::Swift); - - switch (returninfo.cc) { - case jl_returninfo_t::Boxed: - retval = mark_julia_type(ctx, call, true, astrt); - break; - case jl_returninfo_t::Register: - retval = mark_julia_type(ctx, call, false, astrt); - break; - case jl_returninfo_t::SRet: - retval = mark_julia_slot(result, astrt, NULL, ctx.tbaa().tbaa_stack); - break; - case jl_returninfo_t::Union: { - Value *box = ctx.builder.CreateExtractValue(call, 0); - Value *tindex = ctx.builder.CreateExtractValue(call, 1); - Value *derived = ctx.builder.CreateSelect( - ctx.builder.CreateICmpEQ( - ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)), - ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)), - decay_derived(ctx, result), - decay_derived(ctx, box)); - retval = mark_julia_slot(derived, - astrt, - tindex, - ctx.tbaa().tbaa_stack); - assert(box->getType() == ctx.types().T_prjlvalue); - retval.Vboxed = box; - break; - } - case jl_returninfo_t::Ghosts: - retval = mark_julia_slot(NULL, astrt, call, ctx.tbaa().tbaa_stack); - break; + retval = emit_call_specfun_other(ctx, is_opaque_closure, lam->specTypes, codeinst->rettype, returninfo, nullptr, inputargs, nargs + 1); + jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet); + if (jlfunc_sret && sig.sret) { + // fuse the two sret together + assert(retval.ispointer()); + AllocaInst *result = cast(retval.V); + retval.V = sretPtr; + result->replaceAllUsesWith(sretPtr); + result->eraseFromParent(); } } @@ -7729,7 +7632,7 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi } // generate a julia-callable function that calls f (AKA lam) -static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, const jl_returninfo_t &f, int retarg, StringRef funcName, +static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, jl_returninfo_t &f, unsigned nargs, int retarg, StringRef funcName, Module *M, jl_codegen_params_t ¶ms) { ++GeneratedInvokeWrappers; @@ -7757,86 +7660,26 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret ctx.builder.SetCurrentDebugLocation(noDbg); allocate_gc_frame(ctx, b0); - // TODO: replace this with emit_call_specfun_other? - FunctionType *ftype = const_cast(f.decl).getFunctionType(); - size_t nfargs = ftype->getNumParams(); - SmallVector args(nfargs); - unsigned idx = 0; - AllocaInst *result = NULL; - switch (f.cc) { - case jl_returninfo_t::Boxed: - case jl_returninfo_t::Register: - case jl_returninfo_t::Ghosts: - break; - case jl_returninfo_t::SRet: - #if JL_LLVM_VERSION < 170000 - assert(cast(ftype->getParamType(0))->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType())); - #endif - result = ctx.builder.CreateAlloca(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType()); - setName(ctx.emission_context, result, "sret"); - args[idx] = result; - idx++; - break; - case jl_returninfo_t::Union: - result = ctx.builder.CreateAlloca(ArrayType::get(getInt8Ty(ctx.builder.getContext()), f.union_bytes)); - if (f.union_align > 1) - result->setAlignment(Align(f.union_align)); - args[idx] = result; - idx++; - setName(ctx.emission_context, result, "result_union"); - break; - } - if (f.return_roots) { - AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, f.return_roots)); - setName(ctx.emission_context, return_roots, "return_roots"); - args[idx] = return_roots; - idx++; - } - bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg); - if (gcstack_arg) { - args[idx] = ctx.pgcstack; - idx++; - } + SmallVector argv(nargs); bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure; - for (size_t i = 0; i < jl_nparams(lam->specTypes) && idx < nfargs; ++i) { + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); + for (size_t i = 0; i < nargs; ++i) { jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type : jl_nth_slot_type(lam->specTypes, i); - // n.b. specTypes is required to be a datatype by construction for specsig - bool isboxed = deserves_argbox(ty); - Type *lty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, ty); - if (type_is_ghost(lty) || is_uniquerep_Type(ty)) - continue; Value *theArg; if (i == 0) { - // This function adapts from generic jlcall to OC specsig. Generic jlcall pointers - // come in as ::Tracked, but specsig expected ::Derived. - if (is_opaque_closure) - theArg = decay_derived(ctx, funcArg); - else - theArg = funcArg; + theArg = funcArg; } else { Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr); - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); theArg = ai.decorateInst(maybe_mark_load_dereferenceable( ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))), false, ty)); } - if (!isboxed) { - theArg = decay_derived(ctx, theArg); - if (!lty->isAggregateType()) // keep "aggregate" type values in place as pointers - theArg = ctx.builder.CreateAlignedLoad(lty, theArg, Align(julia_alignment(ty))); - } - assert(!isa(theArg)); - args[idx] = theArg; - idx++; + argv[i] = mark_julia_type(ctx, theArg, true, ty); } - CallInst *call = ctx.builder.CreateCall(f.decl, args); - call->setAttributes(f.attrs); - if (gcstack_arg) - call->setCallingConv(CallingConv::Swift); - jl_cgval_t retval; + jl_cgval_t retval = emit_call_specfun_other(ctx, is_opaque_closure, lam->specTypes, jlretty, f, nullptr, argv, nargs); if (retarg != -1) { Value *theArg; if (retarg == 0) @@ -7847,34 +7690,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret Align(sizeof(void*))); retval = mark_julia_type(ctx, theArg, true, jl_any_type); } - else { - switch (f.cc) { - case jl_returninfo_t::Boxed: - retval = mark_julia_type(ctx, call, true, jlretty); - break; - case jl_returninfo_t::Register: - retval = mark_julia_type(ctx, call, false, jlretty); - break; - case jl_returninfo_t::SRet: - retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_stack); - break; - case jl_returninfo_t::Union: - // result is technically not right here, but `boxed` will only look at it - // for the unboxed values, so it's ok. - retval = mark_julia_slot(result, - jlretty, - ctx.builder.CreateExtractValue(call, 1), - ctx.tbaa().tbaa_stack); - retval.Vboxed = ctx.builder.CreateExtractValue(call, 0); - assert(retval.Vboxed->getType() == ctx.types().T_prjlvalue); - break; - case jl_returninfo_t::Ghosts: - retval = mark_julia_slot(NULL, jlretty, call, ctx.tbaa().tbaa_stack); - break; - } - } ctx.builder.CreateRet(boxed(ctx, retval)); - return w; } static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg, BitVector *used_arguments, size_t *arg_offset) @@ -7986,22 +7802,21 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value bool isboxed = false; Type *ty = NULL; if (i == 0 && is_opaque_closure) { - ty = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived); - isboxed = true; // true-ish anyway - we might not have the type tag + ty = nullptr; // special token to avoid computing this unnecessarily } else { if (is_uniquerep_Type(jt)) continue; isboxed = deserves_argbox(jt); ty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt); + if (type_is_ghost(ty)) + continue; } - if (type_is_ghost(ty)) - continue; AttrBuilder param(ctx.builder.getContext()); - if (ty->isAggregateType()) { // aggregate types are passed by pointer + if (ty == nullptr || ty->isAggregateType()) { // aggregate types are passed by pointer param.addAttribute(Attribute::NoCapture); param.addAttribute(Attribute::ReadOnly); - ty = PointerType::get(ty, AddressSpace::Derived); + ty = ctx.builder.getPtrTy(AddressSpace::Derived); } else if (isboxed && jl_is_immutable_datatype(jt)) { param.addAttribute(Attribute::ReadOnly); @@ -8351,7 +8166,8 @@ static jl_llvm_functions_t std::string wrapName; raw_string_ostream(wrapName) << "jfptr_" << ctx.name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1); declarations.functionObject = wrapName; - (void)gen_invoke_wrapper(lam, jlrettype, returninfo, retarg, declarations.functionObject, M, ctx.emission_context); + size_t nparams = jl_nparams(lam->specTypes); + gen_invoke_wrapper(lam, jlrettype, returninfo, nparams, retarg, declarations.functionObject, M, ctx.emission_context); // TODO: add attributes: maybe_mark_argument_dereferenceable(Arg, argType) // TODO: add attributes: dereferenceable // TODO: (if needsparams) add attributes: dereferenceable, readonly, nocapture From d7e417d6b8112cf90aa689ca6a00197729aeedb3 Mon Sep 17 00:00:00 2001 From: Nathan Zimmerberg <39104088+nhz2@users.noreply.github.com> Date: Tue, 10 Sep 2024 23:35:40 -0400 Subject: [PATCH 28/57] Fix "Various fixes to byte / bytearray search" (#55734) Fixes the conflict between #54593 and #54579 `_search` returns `nothing` instead of zero as a sentinal in #54579 --- base/strings/search.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/strings/search.jl b/base/strings/search.jl index 9bd69ae2f8a03..a481b3af775e0 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -178,7 +178,7 @@ function findall( i = firstindex(s) while true i = _search(s, byte, i) - iszero(i) && return result + isnothing(i) && return result i += 1 index = i - ncu # If the char is invalid, it's possible that its first byte is From bee75f73bbc4b295ff0899ac9aced852c6719364 Mon Sep 17 00:00:00 2001 From: Zentrik Date: Wed, 11 Sep 2024 11:52:14 +0100 Subject: [PATCH 29/57] Fix `make binary-dist` when using `USE_BINARYBUILDER_LLVM=0` (#55731) `make binary-dist` expects lld to be in usr/tools but it ends up in usr/bin so I copied it into usr/tools. Should fix the scheduled source tests which currently fail at linking. I think this is also broken with `USE_BINARYBUILDER_LLVM=0` and `BUILD_LLD=0`, maybe https://github.com/JuliaLang/julia/commit/ceaeb7b71bc76afaca2f3b80998164a47e30ce33 is the fix? --------- Co-authored-by: Zentrik --- deps/llvm.mk | 3 +++ 1 file changed, 3 insertions(+) diff --git a/deps/llvm.mk b/deps/llvm.mk index 08aff443dcff8..73697069a4fac 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -292,6 +292,9 @@ ifeq ($(OS),Darwin) # https://github.com/JuliaLang/julia/issues/29981 LLVM_INSTALL += && ln -s libLLVM.dylib $2$$(build_shlibdir)/libLLVM-$$(LLVM_VER_SHORT).dylib endif +ifeq ($(BUILD_LLD), 1) +LLVM_INSTALL += && cp $2$$(build_bindir)/lld$$(EXE) $2$$(build_depsbindir) +endif $(eval $(call staged-install, \ llvm,$$(LLVM_SRC_DIR)/build_$$(LLVM_BUILDTYPE), \ From 255162c7197e973d0427cc11d1e0117cdd76a1bf Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Wed, 11 Sep 2024 11:50:05 -0400 Subject: [PATCH 30/57] Precompile the `@time_imports` printing so it doesn't confuse reports (#55729) Makes functions for the report printing that can be precompiled into the sysimage. --- base/loading.jl | 94 +++++++++++++++++++++------------- contrib/generate_precompile.jl | 9 ++++ 2 files changed, 66 insertions(+), 37 deletions(-) diff --git a/base/loading.jl b/base/loading.jl index 4e70d2bc257ea..8d180845f942f 100644 --- a/base/loading.jl +++ b/base/loading.jl @@ -1254,22 +1254,9 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No if parentmodule(M) === M && PkgId(M) == pkg register && register_root_module(M) if timing_imports - elapsed = round((time_ns() - t_before) / 1e6, digits = 1) + elapsed_time = time_ns() - t_before comp_time, recomp_time = cumulative_compile_time_ns() .- t_comp_before - print(lpad(elapsed, 9), " ms ") - ext_parent = extension_parent_name(M) - if ext_parent !== nothing - print(ext_parent::String, " → ") - end - print(pkg.name) - if comp_time > 0 - printstyled(" ", Ryu.writefixed(Float64(100 * comp_time / (elapsed * 1e6)), 2), "% compilation time", color = Base.info_color()) - end - if recomp_time > 0 - perc = Float64(100 * recomp_time / comp_time) - printstyled(" (", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% recompilation)", color = Base.warn_color()) - end - println() + print_time_imports_report(M, elapsed_time, comp_time, recomp_time) end return M end @@ -1281,6 +1268,52 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No end end +# printing functions for @time_imports +# note that the time inputs are UInt64 on all platforms. Give default values here so that we don't have +# confusing UInt64 types in generate_precompile.jl +function print_time_imports_report( + mod::Module, + elapsed_time::UInt64=UInt64(1), + comp_time::UInt64=UInt64(1), + recomp_time::UInt64=UInt64(1) + ) + print(lpad(round(elapsed_time / 1e6, digits=1), 9), " ms ") + ext_parent = extension_parent_name(mod) + if ext_parent !== nothing + print(ext_parent::String, " → ") + end + print(string(mod)) + if comp_time > 0 + perc = Ryu.writefixed(Float64(100 * comp_time / (elapsed_time)), 2) + printstyled(" $perc% compilation time", color = Base.info_color()) + end + if recomp_time > 0 + perc = Float64(100 * recomp_time / comp_time) + perc_show = perc < 1 ? "<1" : Ryu.writefixed(perc, 0) + printstyled(" ($perc_show% recompilation)", color = Base.warn_color()) + end + println() +end +function print_time_imports_report_init( + mod::Module, i::Int=1, + elapsed_time::UInt64=UInt64(1), + comp_time::UInt64=UInt64(1), + recomp_time::UInt64=UInt64(1) + ) + connector = i > 1 ? "├" : "┌" + printstyled(" $connector ", color = :light_black) + print("$(round(elapsed_time / 1e6, digits=1)) ms $mod.__init__() ") + if comp_time > 0 + perc = Ryu.writefixed(Float64(100 * (comp_time) / elapsed_time), 2) + printstyled("$perc% compilation time", color = Base.info_color()) + end + if recomp_time > 0 + perc = Float64(100 * recomp_time / comp_time) + printstyled(" ($(perc < 1 ? "<1" : Ryu.writefixed(perc, 0))% recompilation)", color = Base.warn_color()) + end + println() +end + # if M is an extension, return the string name of the parent. Otherwise return nothing function extension_parent_name(M::Module) rootmodule = moduleroot(M) @@ -1338,31 +1371,18 @@ function run_module_init(mod::Module, i::Int=1) # `i` informs ordering for the `@time_imports` report formatting if TIMING_IMPORTS[] == 0 ccall(:jl_init_restored_module, Cvoid, (Any,), mod) - else - if isdefined(mod, :__init__) - connector = i > 1 ? "├" : "┌" - printstyled(" $connector ", color = :light_black) - - elapsedtime = time_ns() - cumulative_compile_timing(true) - compile_elapsedtimes = cumulative_compile_time_ns() + elseif isdefined(mod, :__init__) + elapsed_time = time_ns() + cumulative_compile_timing(true) + compile_elapsedtimes = cumulative_compile_time_ns() - ccall(:jl_init_restored_module, Cvoid, (Any,), mod) + ccall(:jl_init_restored_module, Cvoid, (Any,), mod) - elapsedtime = (time_ns() - elapsedtime) / 1e6 - cumulative_compile_timing(false); - comp_time, recomp_time = (cumulative_compile_time_ns() .- compile_elapsedtimes) ./ 1e6 + elapsed_time = time_ns() - elapsed_time + cumulative_compile_timing(false); + comp_time, recomp_time = cumulative_compile_time_ns() .- compile_elapsedtimes - print("$(round(elapsedtime, digits=1)) ms $mod.__init__() ") - if comp_time > 0 - printstyled(Ryu.writefixed(Float64(100 * comp_time / elapsedtime), 2), "% compilation time", color = Base.info_color()) - end - if recomp_time > 0 - perc = Float64(100 * recomp_time / comp_time) - printstyled(" ($(perc < 1 ? "<1" : Ryu.writefixed(perc, 0))% recompilation)", color = Base.warn_color()) - end - println() - end + print_time_imports_report_init(mod, i, elapsed_time, comp_time, recomp_time) end end diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl index 2a0e4faff7f1c..50ae560e99401 100644 --- a/contrib/generate_precompile.jl +++ b/contrib/generate_precompile.jl @@ -39,6 +39,8 @@ precompile(Base.__require_prelocked, (Base.PkgId, Nothing)) precompile(Base._require, (Base.PkgId, Nothing)) precompile(Base.indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int)) precompile(Base.indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int, Int)) +precompile(Tuple{typeof(Base.Threads.atomic_add!), Base.Threads.Atomic{Int}, Int}) +precompile(Tuple{typeof(Base.Threads.atomic_sub!), Base.Threads.Atomic{Int}, Int}) # Pkg loading precompile(Tuple{typeof(Base.Filesystem.normpath), String, String, Vararg{String}}) @@ -161,6 +163,8 @@ for match = Base._methods(+, (Int, Int), -1, Base.get_world_counter()) push!(Expr[], Expr(:return, false)) vcat(String[], String[]) k, v = (:hello => nothing) + Base.print_time_imports_report(Base) + Base.print_time_imports_report_init(Base) # Preferences uses these get(Dict{String,Any}(), "missing", nothing) @@ -172,6 +176,11 @@ for match = Base._methods(+, (Int, Int), -1, Base.get_world_counter()) # interactive startup uses this write(IOBuffer(), "") + # not critical, but helps hide unrelated compilation from @time when using --trace-compile + foo() = rand(2,2) * rand(2,2) + @time foo() + @time foo() + break # only actually need to do this once end """ From 1eabe90fa054abc74f541798ae4dfcc9445bb564 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 11 Sep 2024 14:04:10 -0400 Subject: [PATCH 31/57] codegen: some cleanup of layout computations (#55730) Change Alloca to take an explicit alignment, rather than relying on LLVM to guess our intended alignment from the DataLayout. Eventually we should try to change this code to just get all layout data from julia queries (jl_field_offset, julia_alignment, etc.) instead of relying on creating an LLVM element type for memory and inspecting it (CountTrackedPointers, DataLayout, and so on). --- src/ccall.cpp | 14 +++++--------- src/cgutils.cpp | 36 ++++++++++++++++-------------------- src/codegen.cpp | 34 +++++++++++++++------------------- src/intrinsics.cpp | 14 ++++++++------ 4 files changed, 44 insertions(+), 54 deletions(-) diff --git a/src/ccall.cpp b/src/ccall.cpp index eac130ea43189..e336de8e3574f 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -446,15 +446,13 @@ static Value *llvm_type_rewrite( const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout(); Align align = std::max(DL.getPrefTypeAlign(target_type), DL.getPrefTypeAlign(from_type)); if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) { - to = emit_static_alloca(ctx, target_type); + to = emit_static_alloca(ctx, target_type, align); setName(ctx.emission_context, to, "type_rewrite_buffer"); - cast(to)->setAlignment(align); from = to; } else { - from = emit_static_alloca(ctx, from_type); + from = emit_static_alloca(ctx, from_type, align); setName(ctx.emission_context, from, "type_rewrite_buffer"); - cast(from)->setAlignment(align); to = from; } ctx.builder.CreateAlignedStore(v, from, align); @@ -555,9 +553,8 @@ static Value *julia_to_native( // pass the address of an alloca'd thing, not a box // since those are immutable. - Value *slot = emit_static_alloca(ctx, to); Align align(julia_alignment(jlto)); - cast(slot)->setAlignment(align); + Value *slot = emit_static_alloca(ctx, to, align); setName(ctx.emission_context, slot, "native_convert_buffer"); if (!jvinfo.ispointer()) { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa); @@ -2090,7 +2087,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( if (sret) { assert(!retboxed && jl_is_datatype(rt) && "sret return type invalid"); if (jl_is_pointerfree(rt)) { - result = emit_static_alloca(ctx, lrt); + result = emit_static_alloca(ctx, lrt, Align(julia_alignment(rt))); setName(ctx.emission_context, result, "ccall_sret"); sretty = lrt; argvals[0] = result; @@ -2266,9 +2263,8 @@ jl_cgval_t function_sig_t::emit_a_ccall( if (DL.getTypeStoreSize(resultTy) > rtsz) { // ARM and AArch64 can use a LLVM type larger than the julia type. // When this happens, cast through memory. - auto slot = emit_static_alloca(ctx, resultTy); + auto slot = emit_static_alloca(ctx, resultTy, boxalign); setName(ctx.emission_context, slot, "type_pun_slot"); - slot->setAlignment(boxalign); ctx.builder.CreateAlignedStore(result, slot, boxalign); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); emit_memcpy(ctx, strct, ai, slot, ai, rtsz, boxalign, boxalign); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 2a234f399f5c1..bf5c67ae9f849 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1937,18 +1937,22 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j ctx.builder.CreateFence(Order); return ghostValue(ctx, jltype); } + if (isboxed) + alignment = sizeof(void*); + else if (!alignment) + alignment = julia_alignment(jltype); unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype); // note that nb == jl_Module->getDataLayout().getTypeAllocSize(elty) or getTypeStoreSize, depending on whether it is a struct or primitive type AllocaInst *intcast = NULL; if (Order == AtomicOrdering::NotAtomic) { if (!isboxed && !aliasscope && elty->isAggregateType() && !CountTrackedPointers(elty).count) { - intcast = emit_static_alloca(ctx, elty); + intcast = emit_static_alloca(ctx, elty, Align(alignment)); setName(ctx.emission_context, intcast, "aggregate_load_box"); } } else { if (!isboxed && !elty->isIntOrPtrTy()) { - intcast = emit_static_alloca(ctx, elty); + intcast = emit_static_alloca(ctx, elty, Align(alignment)); setName(ctx.emission_context, intcast, "atomic_load_box"); elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb); } @@ -1963,10 +1967,6 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j if (idx_0based) data = ctx.builder.CreateInBoundsGEP(elty, data, idx_0based); Value *instr = nullptr; - if (isboxed) - alignment = sizeof(void*); - else if (!alignment) - alignment = julia_alignment(jltype); if (intcast && Order == AtomicOrdering::NotAtomic) { emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, Align(alignment), intcast->getAlign()); } @@ -2053,6 +2053,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, ret = update_julia_type(ctx, ret, jltype); return ret; }; + if (isboxed) + alignment = sizeof(void*); + else if (!alignment) + alignment = julia_alignment(jltype); Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jltype); if (type_is_ghost(elty) || (issetfieldonce && !maybe_null_if_boxed) || @@ -2095,7 +2099,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, intcast_eltyp = elty; elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb); if (!issetfield) { - intcast = emit_static_alloca(ctx, elty); + intcast = emit_static_alloca(ctx, elty, Align(alignment)); setName(ctx.emission_context, intcast, "atomic_store_box"); } } @@ -2121,10 +2125,6 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, if (realelty != elty) r = ctx.builder.CreateZExt(r, elty); } - if (isboxed) - alignment = sizeof(void*); - else if (!alignment) - alignment = julia_alignment(jltype); Value *instr = nullptr; Value *Compare = nullptr; Value *Success = nullptr; @@ -2657,10 +2657,8 @@ static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex, if (fsz > 0 && mutabl) { // move value to an immutable stack slot (excluding tindex) Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (fsz + al - 1) / al); - AllocaInst *lv = emit_static_alloca(ctx, AT); + AllocaInst *lv = emit_static_alloca(ctx, AT, Align(al)); setName(ctx.emission_context, lv, "immutable_union"); - if (al > 1) - lv->setAlignment(Align(al)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); emit_memcpy(ctx, lv, ai, addr, ai, fsz, Align(al), Align(al)); addr = lv; @@ -2903,7 +2901,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st unsigned st_idx = convert_struct_offset(ctx, T, byte_offset); IntegerType *ET = cast(T->getStructElementType(st_idx)); unsigned align = (ET->getBitWidth() + 7) / 8; - lv = emit_static_alloca(ctx, ET); + lv = emit_static_alloca(ctx, ET, Align(align)); lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + align - 1) / align)); // emit all of the align-sized words unsigned i = 0; @@ -3324,10 +3322,8 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, // at least some of the values can live on the stack // try to pick an Integer type size such that SROA will emit reasonable code Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * min_align), (nbytes + min_align - 1) / min_align); - AllocaInst *lv = emit_static_alloca(ctx, AT); + AllocaInst *lv = emit_static_alloca(ctx, AT, Align(align)); setName(ctx.emission_context, lv, "unionalloca"); - if (align > 1) - lv->setAlignment(Align(align)); return lv; } return NULL; @@ -3886,7 +3882,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg } } else { - strct = emit_static_alloca(ctx, lt); + strct = emit_static_alloca(ctx, lt, Align(julia_alignment(ty))); setName(ctx.emission_context, strct, arg_typename); if (tracked.count) undef_derived_strct(ctx, strct, sty, ctx.tbaa().tbaa_stack); @@ -3966,7 +3962,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg if (fsz1 > 0 && !fval_info.isghost) { Type *ET = IntegerType::get(ctx.builder.getContext(), 8 * al); assert(lt->getStructElementType(llvm_idx) == ET); - AllocaInst *lv = emit_static_alloca(ctx, ET); + AllocaInst *lv = emit_static_alloca(ctx, ET, Align(al)); setName(ctx.emission_context, lv, "unioninit"); lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz1 + al - 1) / al)); emit_unionmove(ctx, lv, ctx.tbaa().tbaa_stack, fval_info, nullptr); diff --git a/src/codegen.cpp b/src/codegen.cpp index a82056eb36e21..73a5f844b31da 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -2208,10 +2208,10 @@ static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_con return gv; } -static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty) +static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty, Align align) { ++EmittedAllocas; - return new AllocaInst(lty, ctx.topalloca->getModule()->getDataLayout().getAllocaAddrSpace(), "", /*InsertBefore=*/ctx.topalloca); + return new AllocaInst(lty, ctx.topalloca->getModule()->getDataLayout().getAllocaAddrSpace(), nullptr, align, "", /*InsertBefore=*/ctx.topalloca); } static void undef_derived_strct(jl_codectx_t &ctx, Value *ptr, jl_datatype_t *sty, MDNode *tbaa) @@ -2323,7 +2323,7 @@ static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, Value *v, jl_value_ loc = get_pointer_to_constant(ctx.emission_context, cast(v), Align(julia_alignment(typ)), "_j_const", *jl_Module); } else { - loc = emit_static_alloca(ctx, v->getType()); + loc = emit_static_alloca(ctx, v->getType(), Align(julia_alignment(typ))); ctx.builder.CreateStore(v, loc); } return mark_julia_slot(loc, typ, tindex, ctx.tbaa().tbaa_stack); @@ -2435,7 +2435,7 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi) { assert((!vi.boxroot || vi.pTIndex) && "undef check is null pointer for boxed things"); if (vi.usedUndef) { - vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext())); + vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext()), Align(1)); setName(ctx.emission_context, vi.defFlag, "isdefined"); store_def_flag(ctx, vi, false); } @@ -5025,25 +5025,20 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos case jl_returninfo_t::Ghosts: break; case jl_returninfo_t::SRet: - result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType()); - #if JL_LLVM_VERSION < 170000 - assert(cast(result->getType())->hasSameElementTypeAs(cast(cft->getParamType(0)))); - #endif + result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType(), Align(julia_alignment(jlretty))); argvals[idx] = result; idx++; break; case jl_returninfo_t::Union: - result = emit_static_alloca(ctx, ArrayType::get(getInt8Ty(ctx.builder.getContext()), returninfo.union_bytes)); + result = emit_static_alloca(ctx, ArrayType::get(getInt8Ty(ctx.builder.getContext()), returninfo.union_bytes), Align(returninfo.union_align)); setName(ctx.emission_context, result, "sret_box"); - if (returninfo.union_align > 1) - result->setAlignment(Align(returninfo.union_align)); argvals[idx] = result; idx++; break; } if (returninfo.return_roots) { - AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, returninfo.return_roots)); + AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, returninfo.return_roots), Align(alignof(jl_value_t*))); argvals[idx] = return_roots; idx++; } @@ -5922,11 +5917,10 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) if (vtype->isAggregateType() && CountTrackedPointers(vtype).count == 0) { // the value will be moved into dest in the predecessor critical block. // here it's moved into phi in the successor (from dest) - dest = emit_static_alloca(ctx, vtype); - Value *phi = emit_static_alloca(ctx, vtype); - ctx.builder.CreateMemCpy(phi, Align(julia_alignment(phiType)), - dest, dest->getAlign(), - jl_datatype_size(phiType), false); + Align align(julia_alignment(phiType)); + dest = emit_static_alloca(ctx, vtype, align); + Value *phi = emit_static_alloca(ctx, vtype, align); + ctx.builder.CreateMemCpy(phi, align, dest, align, jl_datatype_size(phiType), false); ctx.builder.CreateLifetimeEnd(dest); slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa().tbaa_stack); } @@ -7737,6 +7731,8 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value if (tracked.count && !tracked.all) props.return_roots = tracked.count; props.cc = jl_returninfo_t::SRet; + props.union_bytes = jl_datatype_size(jlrettype); + props.union_align = props.union_minalign = jl_datatype_align(jlrettype); // sret is always passed from alloca assert(M); fsig.push_back(rt->getPointerTo(M->getDataLayout().getAllocaAddrSpace())); @@ -8365,13 +8361,13 @@ static jl_llvm_functions_t if (lv) { lv->setName(jl_symbol_name(s)); varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack); - varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext())); + varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()), Align(1)); setName(ctx.emission_context, varinfo.pTIndex, "tindex"); // TODO: attach debug metadata to this variable } else if (allunbox) { // all ghost values just need a selector allocated - AllocaInst *lv = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext())); + AllocaInst *lv = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()), Align(1)); lv->setName(jl_symbol_name(s)); varinfo.pTIndex = lv; varinfo.value.tbaa = NULL; diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 194b45886bb0d..c747edfeffe5f 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -405,10 +405,11 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed) } else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) { assert(DL.getTypeSizeInBits(ty) == DL.getTypeSizeInBits(to)); - AllocaInst *cast = emit_static_alloca(ctx, ty); + Align align = std::max(DL.getPrefTypeAlign(ty), DL.getPrefTypeAlign(to)); + AllocaInst *cast = emit_static_alloca(ctx, ty, align); setName(ctx.emission_context, cast, "coercion"); - ctx.builder.CreateStore(unboxed, cast); - unboxed = ctx.builder.CreateLoad(to, cast); + ctx.builder.CreateAlignedStore(unboxed, cast, align); + unboxed = ctx.builder.CreateAlignedLoad(to, cast, align); } else if (frompointer) { Type *INTT_to = INTT(to, DL); @@ -692,10 +693,11 @@ static jl_cgval_t generic_cast( // understood that everything is implicitly rounded to 23 bits, // but if we start looking at more bits we need to actually do the // rounding first instead of carrying around incorrect low bits. - Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType()); + Align align(julia_alignment((jl_value_t*)jlto)); + Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType(), align); setName(ctx.emission_context, jlfloattemp_var, "rounding_slot"); - ctx.builder.CreateStore(from, jlfloattemp_var); - from = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true); + ctx.builder.CreateAlignedStore(from, jlfloattemp_var, align); + from = ctx.builder.CreateAlignedLoad(from->getType(), jlfloattemp_var, align, /*force this to load from the stack*/true); setName(ctx.emission_context, from, "rounded"); } } From bf6962ca282831a29c6b4e8a2617e63fce623150 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Wed, 11 Sep 2024 19:25:01 -0400 Subject: [PATCH 32/57] Add some loading / LazyArtifacts precompiles to the sysimage (#55740) Fixes https://github.com/JuliaLang/julia/issues/55725 These help LazyArtifacts mainly but seem beneficial for the sysimage. --- contrib/generate_precompile.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl index 50ae560e99401..d3e73a1b1865a 100644 --- a/contrib/generate_precompile.jl +++ b/contrib/generate_precompile.jl @@ -42,6 +42,13 @@ precompile(Base.indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int, Int precompile(Tuple{typeof(Base.Threads.atomic_add!), Base.Threads.Atomic{Int}, Int}) precompile(Tuple{typeof(Base.Threads.atomic_sub!), Base.Threads.Atomic{Int}, Int}) +# LazyArtifacts (but more generally helpful) +precompile(Tuple{Type{Base.Val{x} where x}, Module}) +precompile(Tuple{Type{NamedTuple{(:honor_overrides,), T} where T<:Tuple}, Tuple{Bool}}) +precompile(Tuple{typeof(Base.unique!), Array{String, 1}}) +precompile(Tuple{typeof(Base.invokelatest), Any}) +precompile(Tuple{typeof(Base.vcat), Array{String, 1}, Array{String, 1}}) + # Pkg loading precompile(Tuple{typeof(Base.Filesystem.normpath), String, String, Vararg{String}}) precompile(Tuple{typeof(Base.append!), Array{String, 1}, Array{String, 1}}) From 22eded8bbfeb6557c012e4b78c3c69c993d0d4e9 Mon Sep 17 00:00:00 2001 From: Gabriele Bozzola Date: Wed, 11 Sep 2024 21:14:37 -0700 Subject: [PATCH 33/57] Update stable version number in readme to v1.10.5 (#55742) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bd9e9b9c0bd02..465adcf049922 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ and then use the command prompt to change into the resulting julia directory. By Julia. However, most users should use the [most recent stable version](https://github.com/JuliaLang/julia/releases) of Julia. You can get this version by running: - git checkout v1.10.4 + git checkout v1.10.5 To build the `julia` executable, run `make` from within the julia directory. From 945517ba4e15f7470b8790a696ba5404ef047f2f Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Thu, 12 Sep 2024 03:41:49 -0400 Subject: [PATCH 34/57] Add `invokelatest` barrier to `string(...)` in `@assert` (#55739) This change protects `@assert` from invalidations to `Base.string(...)` by adding an `invokelatest` barrier. A common source of invalidations right now is `print(io, join(args...))`. The problem is: 1. Inference concludes that `join(::Any...)` returns `Union{String,AnnotatedString}` 2. The `print` call is union-split to `String` and `AnnotatedString` 3. This code is now invalidated when StyledStrings defines `print(io, ::AnnotatedString)` The invalidation chain for `@assert` is similar: ` @assert 1 == 1` calls into `string(::Expr)` which calls into `print(io, join(args::Any...))`. Unfortunately that leads to the invalidation of almost all `@assert`s without an explicit error message Similar to https://github.com/JuliaLang/julia/pull/55583#issuecomment-2308969806 --- base/error.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/error.jl b/base/error.jl index d169cdc8085ac..ee533cee0b57d 100644 --- a/base/error.jl +++ b/base/error.jl @@ -232,12 +232,12 @@ macro assert(ex, msgs...) msg = msg # pass-through elseif !isempty(msgs) && (isa(msg, Expr) || isa(msg, Symbol)) # message is an expression needing evaluating - msg = :(Main.Base.string($(esc(msg)))) + msg = :(Main.Base.invokelatest(Main.Base.string, $(esc(msg)))) elseif isdefined(Main, :Base) && isdefined(Main.Base, :string) && applicable(Main.Base.string, msg) msg = Main.Base.string(msg) else # string() might not be defined during bootstrap - msg = :(_assert_tostring($(Expr(:quote,msg)))) + msg = :(Main.Base.invokelatest(_assert_tostring, $(Expr(:quote,msg)))) end return :($(esc(ex)) ? $(nothing) : throw(AssertionError($msg))) end From 8a9f384d594878e58dd46ce30a42ba03e50ce824 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Thu, 12 Sep 2024 12:59:20 -0300 Subject: [PATCH 35/57] Don't show string concatenation error hint with zero arg `+` (#55749) Closes #55745 --- base/errorshow.jl | 2 +- test/errorshow.jl | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/base/errorshow.jl b/base/errorshow.jl index a3bf464439d44..d805cb64fb81e 100644 --- a/base/errorshow.jl +++ b/base/errorshow.jl @@ -1067,7 +1067,7 @@ Experimental.register_error_hint(nonsetable_type_hint_handler, MethodError) # (probably attempting concatenation) function string_concatenation_hint_handler(io, ex, arg_types, kwargs) @nospecialize - if (ex.f === +) && all(i -> i <: AbstractString, arg_types) + if (ex.f === +) && !isempty(arg_types) && all(i -> i <: AbstractString, arg_types) print(io, "\nString concatenation is performed with ") printstyled(io, "*", color=:cyan) print(io, " (See also: https://docs.julialang.org/en/v1/manual/strings/#man-concatenation).") diff --git a/test/errorshow.jl b/test/errorshow.jl index 80352ddeaa9cf..a82ab7743dc5a 100644 --- a/test/errorshow.jl +++ b/test/errorshow.jl @@ -1079,6 +1079,12 @@ let err_str @test occursin("String concatenation is performed with *", err_str) end +# https://github.com/JuliaLang/julia/issues/55745 +let err_str + err_str = @except_str +() MethodError + @test !occursin("String concatenation is performed with *", err_str) +end + struct MissingLength; end struct MissingSize; end Base.IteratorSize(::Type{MissingSize}) = Base.HasShape{2}() From 76428563cdccf34aa030dffe1303bff8e12d742c Mon Sep 17 00:00:00 2001 From: Nick Robinson Date: Thu, 12 Sep 2024 20:17:17 +0100 Subject: [PATCH 36/57] Don't leave trailing whitespace when printing do-block expr (#55738) Before, when printing a `do`-block, we'd print a white-space after `do` even if no arguments follow. Now we don't print that space. --------- Co-authored-by: Lilith Orion Hafner --- base/show.jl | 8 ++++++-- test/show.jl | 5 +++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/base/show.jl b/base/show.jl index 0a2976e7ebe42..ec6776d81f2d5 100644 --- a/base/show.jl +++ b/base/show.jl @@ -2196,8 +2196,12 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In elseif head === :do && nargs == 2 iob = IOContext(io, beginsym=>false) show_unquoted(iob, args[1], indent, -1, quote_level) - print(io, " do ") - show_list(iob, (((args[2]::Expr).args[1])::Expr).args, ", ", 0, 0, quote_level) + print(io, " do") + do_args = (((args[2]::Expr).args[1])::Expr).args + if !isempty(do_args) + print(io, ' ') + show_list(iob, do_args, ", ", 0, 0, quote_level) + end for stmt in (((args[2]::Expr).args[2])::Expr).args print(io, '\n', " "^(indent + indent_width)) show_unquoted(iob, stmt, indent + indent_width, -1, quote_level) diff --git a/test/show.jl b/test/show.jl index 65c65111606c5..d9c3585b7c1df 100644 --- a/test/show.jl +++ b/test/show.jl @@ -2768,3 +2768,8 @@ let topmi = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ()); topmi.def = Main @test contains(repr(topmi), "Toplevel MethodInstance") end + +@testset "show() no trailing whitespace" begin + do_expr1 = :(foo() do; bar(); end) + @test !contains(sprint(show, do_expr1), " \n") +end From 4079648edf487626cc56cc8e6a518e67343614a7 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Thu, 12 Sep 2024 16:52:47 -0300 Subject: [PATCH 37/57] Don't pass lSystem to the linker since macos always links it (#55722) This stops it complaing about duplicated libs. For libunwind there isn't much we can do because it's part of lsystem and we also need out own. --- cli/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/cli/Makefile b/cli/Makefile index 7b8d3587f5386..3cc0af1a76afd 100644 --- a/cli/Makefile +++ b/cli/Makefile @@ -25,8 +25,6 @@ else ifeq ($(OS),FreeBSD) LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed else ifeq ($(OS),OpenBSD) LOADER_LDFLAGS += -Wl,--no-as-needed -lpthread -rdynamic -lc -Wl,--as-needed -else ifeq ($(OS),Darwin) -LOADER_LDFLAGS += -lSystem endif # Build list of dependent libraries that must be opened From e52a46c5c192bfe16853ae0b63ac33b280fba063 Mon Sep 17 00:00:00 2001 From: Neven Sajko Date: Thu, 12 Sep 2024 23:02:46 +0200 Subject: [PATCH 38/57] define `numerator` and `denominator` for `Complex` (#55694) Fixes #55693 --- base/rational.jl | 11 +++++++++-- test/rational.jl | 17 +++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/base/rational.jl b/base/rational.jl index fb1824acb6b31..b4e450fd73abc 100644 --- a/base/rational.jl +++ b/base/rational.jl @@ -293,8 +293,14 @@ julia> numerator(4) 4 ``` """ -numerator(x::Integer) = x +numerator(x::Union{Integer,Complex{<:Integer}}) = x numerator(x::Rational) = x.num +function numerator(z::Complex{<:Rational}) + den = denominator(z) + reim = (real(z), imag(z)) + result = checked_mul.(numerator.(reim), div.(den, denominator.(reim))) + complex(result...) +end """ denominator(x) @@ -310,8 +316,9 @@ julia> denominator(4) 1 ``` """ -denominator(x::Integer) = one(x) +denominator(x::Union{Integer,Complex{<:Integer}}) = one(x) denominator(x::Rational) = x.den +denominator(z::Complex{<:Rational}) = lcm(denominator(real(z)), denominator(imag(z))) sign(x::Rational) = oftype(x, sign(x.num)) signbit(x::Rational) = signbit(x.num) diff --git a/test/rational.jl b/test/rational.jl index c6f81372de0b9..20a0971068876 100644 --- a/test/rational.jl +++ b/test/rational.jl @@ -801,3 +801,20 @@ end @test rationalize(Int64, nextfloat(0.1) * im; tol=0) == precise_next * im @test rationalize(0.1im; tol=eps(0.1)) == rationalize(0.1im) end + +@testset "complex numerator, denominator" begin + z = complex(3*3, 2*3*5) + @test z === numerator(z) === numerator(z // 2) === numerator(z // 5) + @test complex(3, 2*5) === numerator(z // 3) + @test isone(denominator(z)) + @test 2 === denominator(z // 2) + @test 1 === denominator(z // 3) + @test 5 === denominator(z // 5) + for den ∈ 1:10 + q = z // den + @test q === (numerator(q)//denominator(q)) + end + @testset "do not overflow silently" begin + @test_throws OverflowError numerator(Int8(1)//Int8(31) + Int8(8)im//Int8(3)) + end +end From 94f8a3d6723c61df052a431769a1726fd27a8cc7 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 13 Sep 2024 08:17:21 -0400 Subject: [PATCH 39/57] More testsets for SubString and a few missing tests (#55656) Co-authored-by: Simeon David Schaub --- test/strings/types.jl | 527 ++++++++++++++++++++++-------------------- 1 file changed, 271 insertions(+), 256 deletions(-) diff --git a/test/strings/types.jl b/test/strings/types.jl index d1e89e0e85196..c09652c3a608d 100644 --- a/test/strings/types.jl +++ b/test/strings/types.jl @@ -2,196 +2,211 @@ ## SubString and Cstring tests ## -## SubString tests ## -u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε" -u8str2 = u8str^2 -len_u8str = length(u8str) -slen_u8str = length(u8str) -len_u8str2 = length(u8str2) -slen_u8str2 = length(u8str2) - -@test len_u8str2 == 2 * len_u8str -@test slen_u8str2 == 2 * slen_u8str - -u8str2plain = String(u8str2) - -for i1 = 1:length(u8str2) - if !isvalid(u8str2, i1); continue; end - for i2 = i1:length(u8str2) - if !isvalid(u8str2, i2); continue; end - @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2]) - @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2]) - @test u8str2[i1:i2] == u8str2plain[i1:i2] +@testset "SubString" begin + u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε" + u8str2 = u8str^2 + len_u8str = length(u8str) + slen_u8str = length(u8str) + len_u8str2 = length(u8str2) + slen_u8str2 = length(u8str2) + + @test len_u8str2 == 2 * len_u8str + @test slen_u8str2 == 2 * slen_u8str + + u8str2plain = String(u8str2) + @test !isascii(u8str2) + @test cmp(u8str2, u8str^3) == -1 + @test cmp(u8str2, u8str2) == 0 + @test cmp(u8str^3, u8str2) == 1 + @test codeunit(u8str2) == codeunit(u8str2plain) + + @test convert(Union{String, SubString{String}}, u8str2) === u8str2 + @test convert(Union{String, SubString{String}}, u8str2plain) === u8str2plain + + for i1 = 1:ncodeunits(u8str2) + if !isvalid(u8str2, i1); continue; end + for i2 = i1:ncodeunits(u8str2) + if !isvalid(u8str2, i2); continue; end + @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2]) + @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2]) + @test u8str2[i1:i2] == u8str2plain[i1:i2] + end end -end -# tests that SubString of a single multibyte `Char` string, like "∀" which takes 3 bytes -# gives the same result as `getindex` (except that it is a view not a copy) -for idx in 0:1 - @test SubString("∀", 1, idx) == "∀"[1:idx] -end + # tests that SubString of a single multibyte `Char` string, like "∀" which takes 3 bytes + # gives the same result as `getindex` (except that it is a view not a copy) + for idx in 0:1 + @test SubString("∀", 1, idx) == "∀"[1:idx] + end -# Substring provided with invalid end index throws BoundsError -@test_throws StringIndexError SubString("∀", 1, 2) -@test_throws StringIndexError SubString("∀", 1, 3) -@test_throws BoundsError SubString("∀", 1, 4) - -# Substring provided with invalid start index throws BoundsError -@test SubString("∀∀", 1:1) == "∀" -@test SubString("∀∀", 1:4) == "∀∀" -@test SubString("∀∀", 4:4) == "∀" -@test_throws StringIndexError SubString("∀∀", 1:2) -@test_throws StringIndexError SubString("∀∀", 1:5) -@test_throws StringIndexError SubString("∀∀", 2:4) -@test_throws BoundsError SubString("∀∀", 0:1) -@test_throws BoundsError SubString("∀∀", 0:4) -@test_throws BoundsError SubString("∀∀", 1:7) -@test_throws BoundsError SubString("∀∀", 4:7) - -# tests for SubString of more than one multibyte `Char` string -# we are consistent with `getindex` for `String` -for idx in [0, 1, 4] - @test SubString("∀∀", 1, idx) == "∀∀"[1:idx] - @test SubString("∀∀", 4, idx) == "∀∀"[4:idx] -end + @testset "invalid end index" begin + # Substring provided with invalid end index throws BoundsError + @test_throws StringIndexError SubString("∀", 1, 2) + @test_throws StringIndexError SubString("∀", 1, 3) + @test_throws BoundsError SubString("∀", 1, 4) + end -# index beyond lastindex("∀∀") -for idx in [2:3; 5:6] - @test_throws StringIndexError SubString("∀∀", 1, idx) -end -for idx in 7:8 - @test_throws BoundsError SubString("∀∀", 1, idx) -end + @testset "invalid start index" begin + # Substring provided with invalid start index throws BoundsError + @test SubString("∀∀", 1:1) == "∀" + @test SubString("∀∀", 1:4) == "∀∀" + @test SubString("∀∀", 4:4) == "∀" + @test_throws StringIndexError SubString("∀∀", 1:2) + @test_throws StringIndexError SubString("∀∀", 1:5) + @test_throws StringIndexError SubString("∀∀", 2:4) + @test_throws BoundsError SubString("∀∀", 0:1) + @test_throws BoundsError SubString("∀∀", 0:4) + @test_throws BoundsError SubString("∀∀", 1:7) + @test_throws BoundsError SubString("∀∀", 4:7) + end + + # tests for SubString of more than one multibyte `Char` string + # we are consistent with `getindex` for `String` + for idx in [0, 1, 4] + @test SubString("∀∀", 1, idx) == "∀∀"[1:idx] + @test SubString("∀∀", 4, idx) == "∀∀"[4:idx] + end -let str="tempus fugit" #length(str)==12 - ss=SubString(str,1,lastindex(str)) #match source string - @test length(ss)==length(str) + @testset "index beyond lastindex(\"∀∀\")" begin + for idx in [2:3; 5:6] + @test_throws StringIndexError SubString("∀∀", 1, idx) + end + for idx in 7:8 + @test_throws BoundsError SubString("∀∀", 1, idx) + end + end - ss=SubString(str,1:lastindex(str)) - @test length(ss)==length(str) + let str="tempus fugit" #length(str)==12 + ss=SubString(str,1,lastindex(str)) #match source string + @test length(ss)==length(str) - ss=SubString(str,1,0) #empty SubString - @test length(ss)==0 + ss=SubString(str,1:lastindex(str)) + @test length(ss)==length(str) - ss=SubString(str,1:0) - @test length(ss)==0 + ss=SubString(str,1,0) #empty SubString + @test length(ss)==0 - @test_throws BoundsError SubString(str, 14, 20) #start indexing beyond source string length - @test_throws BoundsError SubString(str, 10, 16) #end indexing beyond source string length + ss=SubString(str,1:0) + @test length(ss)==0 - @test_throws BoundsError SubString("", 1, 4) #empty source string - @test_throws BoundsError SubString("", 1, 1) #empty source string, identical start and end index - @test_throws BoundsError SubString("", 10, 12) - @test SubString("", 12, 10) == "" -end + @test_throws BoundsError SubString(str, 14, 20) #start indexing beyond source string length + @test_throws BoundsError SubString(str, 10, 16) #end indexing beyond source string length -@test SubString("foobar", big(1), big(3)) == "foo" - -let str = "aa\u2200\u2222bb" - u = SubString(str, 3, 6) - @test length(u) == 2 - b = IOBuffer() - write(b, u) - @test String(take!(b)) == "\u2200\u2222" - - @test_throws StringIndexError SubString(str, 4, 5) - @test_throws BoundsError iterate(u, 0) - @test_throws BoundsError iterate(u, 8) - @test_throws BoundsError getindex(u, 0) - @test_throws BoundsError getindex(u, 7) - @test_throws BoundsError getindex(u, 0:1) - @test_throws BoundsError getindex(u, 7:7) - @test reverseind(u, 1) == 4 - @test typeof(Base.cconvert(Ptr{Int8}, u)) == SubString{String} - @test Base.cconvert(Ptr{Int8}, u) == u -end + @test_throws BoundsError SubString("", 1, 4) #empty source string + @test_throws BoundsError SubString("", 1, 1) #empty source string, identical start and end index + @test_throws BoundsError SubString("", 10, 12) + @test SubString("", 12, 10) == "" + end -let str = "føøbar" - @test_throws BoundsError SubString(str, 10, 10) - u = SubString(str, 4, 3) - @test length(u) == 0 - b = IOBuffer() - write(b, u) - @test String(take!(b)) == "" -end + @test SubString("foobar", big(1), big(3)) == "foo" + + let str = "aa\u2200\u2222bb" + u = SubString(str, 3, 6) + @test length(u) == 2 + b = IOBuffer() + write(b, u) + @test String(take!(b)) == "\u2200\u2222" + + @test_throws StringIndexError SubString(str, 4, 5) + @test_throws BoundsError iterate(u, 0) + @test_throws BoundsError iterate(u, 8) + @test_throws BoundsError getindex(u, 0) + @test_throws BoundsError getindex(u, 7) + @test_throws BoundsError getindex(u, 0:1) + @test_throws BoundsError getindex(u, 7:7) + @test reverseind(u, 1) == 4 + @test typeof(Base.cconvert(Ptr{Int8}, u)) == SubString{String} + @test Base.cconvert(Ptr{Int8}, u) == u + end -# search and SubString (issue #5679) -let str = "Hello, world!" - u = SubString(str, 1, 5) - @test findlast("World", u) === nothing - @test findlast(isequal('z'), u) === nothing - @test findlast("ll", u) == 3:4 -end + let str = "føøbar" + @test_throws BoundsError SubString(str, 10, 10) + u = SubString(str, 4, 3) + @test length(u) == 0 + b = IOBuffer() + write(b, u) + @test String(take!(b)) == "" + end -# SubString created from SubString -let str = "Hello, world!" - u = SubString(str, 2, 5) - for idx in 1:4 - @test SubString(u, 2, idx) == u[2:idx] - @test SubString(u, 2:idx) == u[2:idx] + @testset "search and SubString (issue #5679)" begin + str = "Hello, world!" + u = SubString(str, 1, 5) + @test findlast("World", u) === nothing + @test findlast(isequal('z'), u) === nothing + @test findlast("ll", u) == 3:4 end - @test_throws BoundsError SubString(u, 1, 10) - @test_throws BoundsError SubString(u, 1:10) - @test_throws BoundsError SubString(u, 20:30) - @test SubString(u, 20:15) == "" - @test_throws BoundsError SubString(u, -1:10) - @test SubString(u, -1, -10) == "" - @test SubString(SubString("123", 1, 2), -10, -20) == "" -end -# sizeof -@test sizeof(SubString("abc\u2222def",4,4)) == 3 - -# issue #3710 -@test prevind(SubString("{var}",2,4),4) == 3 - -# issue #4183 -@test split(SubString("x", 2, 0), "y") == [""] - -# issue #6772 -@test parse(Float64, SubString("10",1,1)) === 1.0 -@test parse(Float64, SubString("1 0",1,1)) === 1.0 -@test parse(Float32, SubString("10",1,1)) === 1.0f0 - -# issue #5870 -@test !occursin(Regex("aa"), SubString("",1,0)) -@test occursin(Regex(""), SubString("",1,0)) - -# isvalid, length, prevind, nextind for SubString{String} -let s = "lorem ipsum", sdict = Dict( - SubString(s, 1, 11) => "lorem ipsum", - SubString(s, 1, 6) => "lorem ", - SubString(s, 1, 0) => "", - SubString(s, 2, 4) => "ore", - SubString(s, 2, 11) => "orem ipsum", - SubString(s, 15, 14) => "", -) - for (ss, s) in sdict - @test ncodeunits(ss) == ncodeunits(s) - for i in -2:13 - @test isvalid(ss, i) == isvalid(s, i) - end - for i in 1:ncodeunits(ss), j = i-1:ncodeunits(ss) - @test length(ss, i, j) == length(s, i, j) + @testset "SubString created from SubString" begin + str = "Hello, world!" + u = SubString(str, 2, 5) + for idx in 1:4 + @test SubString(u, 2, idx) == u[2:idx] + @test SubString(u, 2:idx) == u[2:idx] end + @test_throws BoundsError SubString(u, 1, 10) + @test_throws BoundsError SubString(u, 1:10) + @test_throws BoundsError SubString(u, 20:30) + @test SubString(u, 20:15) == "" + @test_throws BoundsError SubString(u, -1:10) + @test SubString(u, -1, -10) == "" + @test SubString(SubString("123", 1, 2), -10, -20) == "" + end + + # sizeof + @test sizeof(SubString("abc\u2222def",4,4)) == 3 + + # issue #3710 + @test prevind(SubString("{var}",2,4),4) == 3 + + # issue #4183 + @test split(SubString("x", 2, 0), "y") == [""] + + @testset "issue #6772" begin + @test parse(Float64, SubString("10",1,1)) === 1.0 + @test parse(Float64, SubString("1 0",1,1)) === 1.0 + @test parse(Float32, SubString("10",1,1)) === 1.0f0 + end + + @testset "issue #5870" begin + @test !occursin(Regex("aa"), SubString("",1,0)) + @test occursin(Regex(""), SubString("",1,0)) end - for (ss, s) in sdict - @test length(ss) == length(s) - for i in 0:ncodeunits(ss), j = 0:length(ss)+1 - @test prevind(ss, i+1, j) == prevind(s, i+1, j) - @test nextind(ss, i, j) == nextind(s, i, j) + @testset" isvalid, length, prevind, nextind for SubString{String}" begin + s = "lorem ipsum" + sdict = Dict( + SubString(s, 1, 11) => "lorem ipsum", + SubString(s, 1, 6) => "lorem ", + SubString(s, 1, 0) => "", + SubString(s, 2, 4) => "ore", + SubString(s, 2, 11) => "orem ipsum", + SubString(s, 15, 14) => "", + ) + for (ss, s) in sdict + @test ncodeunits(ss) == ncodeunits(s) + for i in -2:13 + @test isvalid(ss, i) == isvalid(s, i) + end + for i in 1:ncodeunits(ss), j = i-1:ncodeunits(ss) + @test length(ss, i, j) == length(s, i, j) + end + end + for (ss, s) in sdict + @test length(ss) == length(s) + for i in 0:ncodeunits(ss), j = 0:length(ss)+1 + @test prevind(ss, i+1, j) == prevind(s, i+1, j) + @test nextind(ss, i, j) == nextind(s, i, j) + end + @test_throws BoundsError prevind(s, 0) + @test_throws BoundsError prevind(ss, 0) + @test_throws BoundsError nextind(s, ncodeunits(ss)+1) + @test_throws BoundsError nextind(ss, ncodeunits(ss)+1) end - @test_throws BoundsError prevind(s, 0) - @test_throws BoundsError prevind(ss, 0) - @test_throws BoundsError nextind(s, ncodeunits(ss)+1) - @test_throws BoundsError nextind(ss, ncodeunits(ss)+1) end -end -# proper nextind/prevind/thisind for SubString{String} -let rng = MersenneTwister(1), strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*"∀∃∀", + rng = MersenneTwister(1) + strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*"∀∃∀", String(rand(rng, UInt8, 50))] - for s in strs + @testset "proper nextind/prevind/thisind for SubString{String}: $(repr(s))" for s in strs a = 0 while a <= ncodeunits(s) a = nextind(s, a) @@ -223,111 +238,111 @@ let rng = MersenneTwister(1), strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*" end end end -end -# for isvalid(SubString{String}) -let s = "Σx + βz - 2" - for i in -1:ncodeunits(s)+2 - if checkbounds(Bool, s, i) - if isvalid(s, i) - ss = SubString(s, 1, i) - for j = 1:ncodeunits(ss) - @test isvalid(ss, j) == isvalid(s, j) + # for isvalid(SubString{String}) + let s = "Σx + βz - 2" + for i in -1:ncodeunits(s)+2 + if checkbounds(Bool, s, i) + if isvalid(s, i) + ss = SubString(s, 1, i) + for j = 1:ncodeunits(ss) + @test isvalid(ss, j) == isvalid(s, j) + end + else + @test_throws StringIndexError SubString(s, 1, i) end + elseif i > 0 + @test_throws BoundsError SubString(s, 1, i) else - @test_throws StringIndexError SubString(s, 1, i) + @test SubString(s, 1, i) == "" end - elseif i > 0 - @test_throws BoundsError SubString(s, 1, i) - else - @test SubString(s, 1, i) == "" end end -end -let ss = SubString("hello", 1, 5) - @test length(ss, 1, 0) == 0 - @test_throws BoundsError length(ss, 1, -1) - @test_throws BoundsError length(ss, 1, 6) - @test_throws BoundsError length(ss, 1, 10) - @test_throws BoundsError prevind(ss, 0, 1) - @test prevind(ss, 1, 1) == 0 - @test prevind(ss, 6, 1) == 5 - @test_throws BoundsError prevind(ss, 7, 1) - @test_throws BoundsError nextind(ss, -1, 1) - @test nextind(ss, 0, 1) == 1 - @test nextind(ss, 5, 1) == 6 - @test_throws BoundsError nextind(ss, 6, 1) -end + let ss = SubString("hello", 1, 5) + @test length(ss, 1, 0) == 0 + @test_throws BoundsError length(ss, 1, -1) + @test_throws BoundsError length(ss, 1, 6) + @test_throws BoundsError length(ss, 1, 10) + @test_throws BoundsError prevind(ss, 0, 1) + @test prevind(ss, 1, 1) == 0 + @test prevind(ss, 6, 1) == 5 + @test_throws BoundsError prevind(ss, 7, 1) + @test_throws BoundsError nextind(ss, -1, 1) + @test nextind(ss, 0, 1) == 1 + @test nextind(ss, 5, 1) == 6 + @test_throws BoundsError nextind(ss, 6, 1) + end -# length(SubString{String}) performance specialization -let s = "|η(α)-ϕ(κ)| < ε" - @test length(SubString(s, 1, 0)) == length(s[1:0]) - @test length(SubString(s, 4, 4)) == length(s[4:4]) - @test length(SubString(s, 1, 7)) == length(s[1:7]) - @test length(SubString(s, 4, 11)) == length(s[4:11]) -end + # length(SubString{String}) performance specialization + let s = "|η(α)-ϕ(κ)| < ε" + @test length(SubString(s, 1, 0)) == length(s[1:0]) + @test length(SubString(s, 4, 4)) == length(s[4:4]) + @test length(SubString(s, 1, 7)) == length(s[1:7]) + @test length(SubString(s, 4, 11)) == length(s[4:11]) + end -@testset "reverseind" for T in (String, SubString, GenericString) - for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1") - for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4") - for c in ('X', 'δ', '\U0001d6a5') - s = convert(T, string(prefix, c, suffix)) - r = reverse(s) - ri = findfirst(isequal(c), r) - @test c == s[reverseind(s, ri)] == r[ri] - s = convert(T, string(prefix, prefix, c, suffix, suffix)) - pre = convert(T, prefix) - sb = SubString(s, nextind(pre, lastindex(pre)), - lastindex(convert(T, string(prefix, prefix, c, suffix)))) - r = reverse(sb) - ri = findfirst(isequal(c), r) - @test c == sb[reverseind(sb, ri)] == r[ri] + @testset "reverseind" for T in (String, SubString, GenericString) + for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1") + for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4") + for c in ('X', 'δ', '\U0001d6a5') + s = convert(T, string(prefix, c, suffix)) + r = reverse(s) + ri = findfirst(isequal(c), r) + @test c == s[reverseind(s, ri)] == r[ri] + s = convert(T, string(prefix, prefix, c, suffix, suffix)) + pre = convert(T, prefix) + sb = SubString(s, nextind(pre, lastindex(pre)), + lastindex(convert(T, string(prefix, prefix, c, suffix)))) + r = reverse(sb) + ri = findfirst(isequal(c), r) + @test c == sb[reverseind(sb, ri)] == r[ri] + end end end end -end -@testset "reverseind of empty strings" begin - for s in ("", - SubString("", 1, 0), - SubString("ab", 1, 0), - SubString("ab", 2, 1), - SubString("ab", 3, 2), - GenericString("")) - @test reverseind(s, 0) == 1 - @test reverseind(s, 1) == 0 + @testset "reverseind of empty strings" begin + for s in ("", + SubString("", 1, 0), + SubString("ab", 1, 0), + SubString("ab", 2, 1), + SubString("ab", 3, 2), + GenericString("")) + @test reverseind(s, 0) == 1 + @test reverseind(s, 1) == 0 + end end end -## Cstring tests ## - -@testset "issue #13974: comparison against pointers" begin - str = String("foobar") - ptr = pointer(str) - cstring = Cstring(ptr) - @test ptr == cstring - @test cstring == ptr - - # convenient NULL string creation from Ptr{Cvoid} - nullstr = Cstring(C_NULL) - - # Comparisons against NULL strings - @test ptr != nullstr - @test nullstr != ptr - - # Short-hand comparison against C_NULL - @test nullstr == C_NULL - @test C_NULL == nullstr - @test cstring != C_NULL - @test C_NULL != cstring -end +@testset "Cstring" begin + @testset "issue #13974: comparison against pointers" begin + str = String("foobar") + ptr = pointer(str) + cstring = Cstring(ptr) + @test ptr == cstring + @test cstring == ptr + + # convenient NULL string creation from Ptr{Cvoid} + nullstr = Cstring(C_NULL) + + # Comparisons against NULL strings + @test ptr != nullstr + @test nullstr != ptr + + # Short-hand comparison against C_NULL + @test nullstr == C_NULL + @test C_NULL == nullstr + @test cstring != C_NULL + @test C_NULL != cstring + end -@testset "issue #31381: eltype(Cstring) != Cchar" begin - s = Cstring(C_NULL) - @test eltype(Cstring) == Cchar - @test eltype(s) == Cchar - @test pointer(s) isa Ptr{Cchar} + @testset "issue #31381: eltype(Cstring) != Cchar" begin + s = Cstring(C_NULL) + @test eltype(Cstring) == Cchar + @test eltype(s) == Cchar + @test pointer(s) isa Ptr{Cchar} + end end @testset "Codeunits" begin From 467ab852bb7392843ce74a17de89c221e0f64df0 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 13 Sep 2024 08:18:03 -0400 Subject: [PATCH 40/57] Reorganize search tests into testsets (#55658) Some of these tests are nearly 10 years old! Organized some of them into testsets just in case one breaks in the future, should make it easier to find the problem. --------- Co-authored-by: Simeon David Schaub --- test/strings/search.jl | 506 +++++++++++++++++++++-------------------- 1 file changed, 258 insertions(+), 248 deletions(-) diff --git a/test/strings/search.jl b/test/strings/search.jl index d8883bad24b48..c43327fe2971b 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -4,26 +4,27 @@ astr = "Hello, world.\n" u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε" -# I think these should give error on 4 also, and "" is not treated -# consistently with SubString("",1,1), nor with Char[] -for ind in (0, 5) - @test_throws BoundsError findnext(SubString("",1,1), "foo", ind) - @test_throws BoundsError findprev(SubString("",1,1), "foo", ind) -end +@testset "BoundsError for findnext/findprev" begin + # I think these should give error on 4 also, and "" is not treated + # consistently with SubString("",1,1), nor with Char[] + for ind in (0, 5) + @test_throws BoundsError findnext(SubString("",1,1), "foo", ind) + @test_throws BoundsError findprev(SubString("",1,1), "foo", ind) + end -# Note: the commented out test will be enabled after fixes to make -# sure that findnext/findprev are consistent -# no matter what type of AbstractString the second argument is -@test_throws BoundsError findnext(isequal('a'), "foo", 0) -@test_throws BoundsError findnext(in(Char[]), "foo", 5) -# @test_throws BoundsError findprev(in(Char[]), "foo", 0) -@test_throws BoundsError findprev(in(Char[]), "foo", 5) + # Note: the commented out test will be enabled after fixes to make + # sure that findnext/findprev are consistent + # no matter what type of AbstractString the second argument is + @test_throws BoundsError findnext(isequal('a'), "foo", 0) + @test_throws BoundsError findnext(in(Char[]), "foo", 5) + # @test_throws BoundsError findprev(in(Char[]), "foo", 0) + @test_throws BoundsError findprev(in(Char[]), "foo", 5) -# @test_throws ErrorException in("foobar","bar") -@test_throws BoundsError findnext(isequal(0x1),b"\x1\x2",0) + # @test_throws ErrorException in("foobar","bar") + @test_throws BoundsError findnext(isequal(0x1),b"\x1\x2",0) +end -# ascii forward search -for str in [astr, GenericString(astr)] +@testset "ascii forward search $(typeof(str))" for str in [astr, GenericString(astr)] @test_throws BoundsError findnext(isequal('z'), str, 0) @test_throws BoundsError findnext(isequal('∀'), str, 0) @test findfirst(isequal('x'), str) === nothing @@ -41,9 +42,7 @@ for str in [astr, GenericString(astr)] @test findnext(isequal('\n'), str, 15) === nothing @test_throws BoundsError findnext(isequal('ε'), str, nextind(str,lastindex(str))+1) @test_throws BoundsError findnext(isequal('a'), str, nextind(str,lastindex(str))+1) -end -for str in [astr, GenericString(astr)] @test_throws BoundsError findnext('z', str, 0) @test_throws BoundsError findnext('∀', str, 0) @test findfirst('x', str) === nothing @@ -65,8 +64,8 @@ for str in [astr, GenericString(astr)] @test_throws BoundsError findnext('a', str, nextind(str,lastindex(str))+1) end -# ascii backward search -for str in [astr] +@testset "ascii backward search" begin + str = astr @test findlast(isequal('x'), str) === nothing @test findlast(isequal('\0'), str) === nothing @test findlast(isequal('\u80'), str) === nothing @@ -81,9 +80,7 @@ for str in [astr] @test findlast(isequal(','), str) == 6 @test findprev(isequal(','), str, 5) === nothing @test findlast(isequal('\n'), str) == 14 -end -for str in [astr] @test findlast('x', str) === nothing @test findlast('\0', str) === nothing @test findlast('\u80', str) === nothing @@ -102,8 +99,7 @@ for str in [astr] @test findlast('\n', str) == 14 end -# utf-8 forward search -for str in (u8str, GenericString(u8str)) +@testset "utf-8 forward search $(typeof(str))" for str in (u8str, GenericString(u8str)) @test_throws BoundsError findnext(isequal('z'), str, 0) @test_throws BoundsError findnext(isequal('∀'), str, 0) @test findfirst(isequal('z'), str) === nothing @@ -132,8 +128,8 @@ for str in (u8str, GenericString(u8str)) @test_throws BoundsError findnext(isequal('a'), str, nextind(str,lastindex(str))+1) end -# utf-8 backward search -for str in [u8str] +@testset "utf-8 backward search" begin + str = u8str @test findlast(isequal('z'), str) === nothing @test findlast(isequal('\0'), str) === nothing @test findlast(isequal('\u80'), str) === nothing @@ -155,6 +151,128 @@ for str in [u8str] @test findprev(isequal('ε'), str, 4) === nothing end +@testset "string forward search with a single-char string" begin + @test findfirst("x", astr) === nothing + @test findfirst("H", astr) == 1:1 + @test findnext("H", astr, 2) === nothing + @test findfirst("l", astr) == 3:3 + @test findnext("l", astr, 4) == 4:4 + @test findnext("l", astr, 5) == 11:11 + @test findnext("l", astr, 12) === nothing + @test findfirst("\n", astr) == 14:14 + @test findnext("\n", astr, 15) === nothing + + @test findfirst("z", u8str) === nothing + @test findfirst("∄", u8str) === nothing + @test findfirst("∀", u8str) == 1:1 + @test findnext("∀", u8str, 4) === nothing + @test findfirst("∃", u8str) == 13:13 + @test findnext("∃", u8str, 16) === nothing + @test findfirst("x", u8str) == 26:26 + @test findnext("x", u8str, 27) == 43:43 + @test findnext("x", u8str, 44) === nothing + @test findfirst("ε", u8str) == 5:5 + @test findnext("ε", u8str, 7) == 54:54 + @test findnext("ε", u8str, 56) === nothing +end + +@testset "findprev backward search with a single-char string" begin + @test findlast("x", astr) === nothing + @test findlast("H", astr) == 1:1 + @test findprev("H", astr, 2) == 1:1 + @test findprev("H", astr, 0) === nothing + @test findlast("l", astr) == 11:11 + @test findprev("l", astr, 10) == 4:4 + @test findprev("l", astr, 4) == 4:4 + @test findprev("l", astr, 3) == 3:3 + @test findprev("l", astr, 2) === nothing + @test findlast("\n", astr) == 14:14 + @test findprev("\n", astr, 13) === nothing + + @test findlast("z", u8str) === nothing + @test findlast("∄", u8str) === nothing + @test findlast("∀", u8str) == 1:1 + @test findprev("∀", u8str, 0) === nothing + #TODO: setting the limit in the middle of a wide char + # makes findnext fail but findprev succeed. + # Should findprev fail as well? + #@test findprev("∀", u8str, 2) === nothing # gives 1:3 + @test findlast("∃", u8str) == 13:13 + @test findprev("∃", u8str, 12) === nothing + @test findlast("x", u8str) == 43:43 + @test findprev("x", u8str, 42) == 26:26 + @test findprev("x", u8str, 25) === nothing + @test findlast("ε", u8str) == 54:54 + @test findprev("ε", u8str, 53) == 5:5 + @test findprev("ε", u8str, 4) === nothing +end + +@testset "string forward search with a single-char regex" begin + @test findfirst(r"x", astr) === nothing + @test findfirst(r"H", astr) == 1:1 + @test findnext(r"H", astr, 2) === nothing + @test findfirst(r"l", astr) == 3:3 + @test findnext(r"l", astr, 4) == 4:4 + @test findnext(r"l", astr, 5) == 11:11 + @test findnext(r"l", astr, 12) === nothing + @test findfirst(r"\n", astr) == 14:14 + @test findnext(r"\n", astr, 15) === nothing + @test findfirst(r"z", u8str) === nothing + @test findfirst(r"∄", u8str) === nothing + @test findfirst(r"∀", u8str) == 1:1 + @test findnext(r"∀", u8str, 4) === nothing + @test findfirst(r"∀", u8str) == findfirst(r"\u2200", u8str) + @test findnext(r"∀", u8str, 4) == findnext(r"\u2200", u8str, 4) + @test findfirst(r"∃", u8str) == 13:13 + @test findnext(r"∃", u8str, 16) === nothing + @test findfirst(r"x", u8str) == 26:26 + @test findnext(r"x", u8str, 27) == 43:43 + @test findnext(r"x", u8str, 44) === nothing + @test findfirst(r"ε", u8str) == 5:5 + @test findnext(r"ε", u8str, 7) == 54:54 + @test findnext(r"ε", u8str, 56) === nothing + for i = 1:lastindex(astr) + @test findnext(r"."s, astr, i) == i:i + end + for i = 1:lastindex(u8str) + if isvalid(u8str,i) + @test findnext(r"."s, u8str, i) == i:i + end + end +end + +@testset "string forward search with a zero-char string" begin + for i = 1:lastindex(astr) + @test findnext("", astr, i) == i:i-1 + end + for i = 1:lastindex(u8str) + @test findnext("", u8str, i) == i:i-1 + end + @test findfirst("", "") === 1:0 +end + +@testset "string backward search with a zero-char string" begin + for i = 1:lastindex(astr) + @test findprev("", astr, i) == i:i-1 + end + for i = 1:lastindex(u8str) + @test findprev("", u8str, i) == i:i-1 + end + @test findlast("", "") === 1:0 +end + +@testset "string forward search with a zero-char regex" begin + for i = 1:lastindex(astr) + @test findnext(r"", astr, i) == i:i-1 + end + for i = 1:lastindex(u8str) + # TODO: should regex search fast-forward invalid indices? + if isvalid(u8str,i) + @test findnext(r"", u8str, i) == i:i-1 + end + end +end + # See the comments in #54579 @testset "Search for invalid chars" begin @test findfirst(==('\xff'), "abc\xffde") == 4 @@ -165,238 +283,130 @@ end @test isnothing(findprev(==('\xa6'), "æa", 2)) end -# string forward search with a single-char string -@test findfirst("x", astr) === nothing -@test findfirst("H", astr) == 1:1 -@test findnext("H", astr, 2) === nothing -@test findfirst("l", astr) == 3:3 -@test findnext("l", astr, 4) == 4:4 -@test findnext("l", astr, 5) == 11:11 -@test findnext("l", astr, 12) === nothing -@test findfirst("\n", astr) == 14:14 -@test findnext("\n", astr, 15) === nothing - -@test findfirst("z", u8str) === nothing -@test findfirst("∄", u8str) === nothing -@test findfirst("∀", u8str) == 1:1 -@test findnext("∀", u8str, 4) === nothing -@test findfirst("∃", u8str) == 13:13 -@test findnext("∃", u8str, 16) === nothing -@test findfirst("x", u8str) == 26:26 -@test findnext("x", u8str, 27) == 43:43 -@test findnext("x", u8str, 44) === nothing -@test findfirst("ε", u8str) == 5:5 -@test findnext("ε", u8str, 7) == 54:54 -@test findnext("ε", u8str, 56) === nothing - -# strifindprev backward search with a single-char string -@test findlast("x", astr) === nothing -@test findlast("H", astr) == 1:1 -@test findprev("H", astr, 2) == 1:1 -@test findprev("H", astr, 0) === nothing -@test findlast("l", astr) == 11:11 -@test findprev("l", astr, 10) == 4:4 -@test findprev("l", astr, 4) == 4:4 -@test findprev("l", astr, 3) == 3:3 -@test findprev("l", astr, 2) === nothing -@test findlast("\n", astr) == 14:14 -@test findprev("\n", astr, 13) === nothing - -@test findlast("z", u8str) === nothing -@test findlast("∄", u8str) === nothing -@test findlast("∀", u8str) == 1:1 -@test findprev("∀", u8str, 0) === nothing -#TODO: setting the limit in the middle of a wide char -# makes findnext fail but findprev succeed. -# Should findprev fail as well? -#@test findprev("∀", u8str, 2) === nothing # gives 1:3 -@test findlast("∃", u8str) == 13:13 -@test findprev("∃", u8str, 12) === nothing -@test findlast("x", u8str) == 43:43 -@test findprev("x", u8str, 42) == 26:26 -@test findprev("x", u8str, 25) === nothing -@test findlast("ε", u8str) == 54:54 -@test findprev("ε", u8str, 53) == 5:5 -@test findprev("ε", u8str, 4) === nothing - -# string forward search with a single-char regex -@test findfirst(r"x", astr) === nothing -@test findfirst(r"H", astr) == 1:1 -@test findnext(r"H", astr, 2) === nothing -@test findfirst(r"l", astr) == 3:3 -@test findnext(r"l", astr, 4) == 4:4 -@test findnext(r"l", astr, 5) == 11:11 -@test findnext(r"l", astr, 12) === nothing -@test findfirst(r"\n", astr) == 14:14 -@test findnext(r"\n", astr, 15) === nothing -@test findfirst(r"z", u8str) === nothing -@test findfirst(r"∄", u8str) === nothing -@test findfirst(r"∀", u8str) == 1:1 -@test findnext(r"∀", u8str, 4) === nothing -@test findfirst(r"∀", u8str) == findfirst(r"\u2200", u8str) -@test findnext(r"∀", u8str, 4) == findnext(r"\u2200", u8str, 4) -@test findfirst(r"∃", u8str) == 13:13 -@test findnext(r"∃", u8str, 16) === nothing -@test findfirst(r"x", u8str) == 26:26 -@test findnext(r"x", u8str, 27) == 43:43 -@test findnext(r"x", u8str, 44) === nothing -@test findfirst(r"ε", u8str) == 5:5 -@test findnext(r"ε", u8str, 7) == 54:54 -@test findnext(r"ε", u8str, 56) === nothing -for i = 1:lastindex(astr) - @test findnext(r"."s, astr, i) == i:i -end -for i = 1:lastindex(u8str) - if isvalid(u8str,i) - @test findnext(r"."s, u8str, i) == i:i - end +@testset "string forward search with a two-char string literal" begin + @test findfirst("xx", "foo,bar,baz") === nothing + @test findfirst("fo", "foo,bar,baz") == 1:2 + @test findnext("fo", "foo,bar,baz", 3) === nothing + @test findfirst("oo", "foo,bar,baz") == 2:3 + @test findnext("oo", "foo,bar,baz", 4) === nothing + @test findfirst("o,", "foo,bar,baz") == 3:4 + @test findnext("o,", "foo,bar,baz", 5) === nothing + @test findfirst(",b", "foo,bar,baz") == 4:5 + @test findnext(",b", "foo,bar,baz", 6) == 8:9 + @test findnext(",b", "foo,bar,baz", 10) === nothing + @test findfirst("az", "foo,bar,baz") == 10:11 + @test findnext("az", "foo,bar,baz", 12) === nothing end -# string forward search with a zero-char string -for i = 1:lastindex(astr) - @test findnext("", astr, i) == i:i-1 +@testset "issue #9365" begin + # string forward search with a two-char UTF-8 (2 byte) string literal + @test findfirst("éé", "ééé") == 1:3 + @test findnext("éé", "ééé", 1) == 1:3 + # string forward search with a two-char UTF-8 (3 byte) string literal + @test findfirst("€€", "€€€") == 1:4 + @test findnext("€€", "€€€", 1) == 1:4 + # string forward search with a two-char UTF-8 (4 byte) string literal + @test findfirst("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 1:5 + @test findnext("\U1f596\U1f596", "\U1f596\U1f596\U1f596", 1) == 1:5 + + # string forward search with a two-char UTF-8 (2 byte) string literal + @test findfirst("éé", "éé") == 1:3 + @test findnext("éé", "éé", 1) == 1:3 + # string forward search with a two-char UTF-8 (3 byte) string literal + @test findfirst("€€", "€€") == 1:4 + @test findnext("€€", "€€", 1) == 1:4 + # string forward search with a two-char UTF-8 (4 byte) string literal + @test findfirst("\U1f596\U1f596", "\U1f596\U1f596") == 1:5 + @test findnext("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5 + + # string backward search with a two-char UTF-8 (2 byte) string literal + @test findlast("éé", "ééé") == 3:5 + @test findprev("éé", "ééé", lastindex("ééé")) == 3:5 + # string backward search with a two-char UTF-8 (3 byte) string literal + @test findlast("€€", "€€€") == 4:7 + @test findprev("€€", "€€€", lastindex("€€€")) == 4:7 + # string backward search with a two-char UTF-8 (4 byte) string literal + @test findlast("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 5:9 + @test findprev("\U1f596\U1f596", "\U1f596\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 5:9 + + # string backward search with a two-char UTF-8 (2 byte) string literal + @test findlast("éé", "éé") == 1:3 # should really be 1:4! + @test findprev("éé", "éé", lastindex("ééé")) == 1:3 + # string backward search with a two-char UTF-8 (3 byte) string literal + @test findlast("€€", "€€") == 1:4 # should really be 1:6! + @test findprev("€€", "€€", lastindex("€€€")) == 1:4 + # string backward search with a two-char UTF-8 (4 byte) string literal + @test findlast("\U1f596\U1f596", "\U1f596\U1f596") == 1:5 # should really be 1:8! + @test findprev("\U1f596\U1f596", "\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 1:5 end -for i = 1:lastindex(u8str) - @test findnext("", u8str, i) == i:i-1 + +@testset "string backward search with a two-char string literal" begin + @test findlast("xx", "foo,bar,baz") === nothing + @test findlast("fo", "foo,bar,baz") == 1:2 + @test findprev("fo", "foo,bar,baz", 1) === nothing + @test findlast("oo", "foo,bar,baz") == 2:3 + @test findprev("oo", "foo,bar,baz", 2) === nothing + @test findlast("o,", "foo,bar,baz") == 3:4 + @test findprev("o,", "foo,bar,baz", 1) === nothing + @test findlast(",b", "foo,bar,baz") == 8:9 + @test findprev(",b", "foo,bar,baz", 6) == 4:5 + @test findprev(",b", "foo,bar,baz", 3) === nothing + @test findlast("az", "foo,bar,baz") == 10:11 + @test findprev("az", "foo,bar,baz", 10) === nothing end -@test findfirst("", "") === 1:0 -# string backward search with a zero-char string -for i = 1:lastindex(astr) - @test findprev("", astr, i) == i:i-1 +@testset "string search with a two-char regex" begin + @test findfirst(r"xx", "foo,bar,baz") === nothing + @test findfirst(r"fo", "foo,bar,baz") == 1:2 + @test findnext(r"fo", "foo,bar,baz", 3) === nothing + @test findfirst(r"oo", "foo,bar,baz") == 2:3 + @test findnext(r"oo", "foo,bar,baz", 4) === nothing + @test findfirst(r"o,", "foo,bar,baz") == 3:4 + @test findnext(r"o,", "foo,bar,baz", 5) === nothing + @test findfirst(r",b", "foo,bar,baz") == 4:5 + @test findnext(r",b", "foo,bar,baz", 6) == 8:9 + @test findnext(r",b", "foo,bar,baz", 10) === nothing + @test findfirst(r"az", "foo,bar,baz") == 10:11 + @test findnext(r"az", "foo,bar,baz", 12) === nothing end -for i = 1:lastindex(u8str) - @test findprev("", u8str, i) == i:i-1 + +@testset "occursin/contains" begin + # occursin with a String and Char needle + @test occursin("o", "foo") + @test occursin('o', "foo") + # occursin in curried form + @test occursin("foo")("o") + @test occursin("foo")('o') + + # contains + @test contains("foo", "o") + @test contains("foo", 'o') + # contains in curried form + @test contains("o")("foo") + @test contains('o')("foo") + + @test_throws ErrorException "ab" ∈ "abc" end -@test findlast("", "") === 1:0 -# string forward search with a zero-char regex -for i = 1:lastindex(astr) - @test findnext(r"", astr, i) == i:i-1 +@testset "issue #15723" begin + @test findfirst(isequal('('), "⨳(") == 4 + @test findnext(isequal('('), "(⨳(", 2) == 5 + @test findlast(isequal('('), "(⨳(") == 5 + @test findprev(isequal('('), "(⨳(", 2) == 1 + + @test @inferred findall(isequal('a'), "éa") == [3] + @test @inferred findall(isequal('€'), "€€") == [1, 4] + @test @inferred isempty(findall(isequal('é'), "")) end -for i = 1:lastindex(u8str) - # TODO: should regex search fast-forward invalid indices? - if isvalid(u8str,i) - @test findnext(r"", u8str, i) == i:i-1 - end + + +@testset "issue #18109" begin + s_18109 = "fooα🐨βcd3" + @test findlast(isequal('o'), s_18109) == 3 + @test findfirst(isequal('d'), s_18109) == 13 end -# string forward search with a two-char string literal -@test findfirst("xx", "foo,bar,baz") === nothing -@test findfirst("fo", "foo,bar,baz") == 1:2 -@test findnext("fo", "foo,bar,baz", 3) === nothing -@test findfirst("oo", "foo,bar,baz") == 2:3 -@test findnext("oo", "foo,bar,baz", 4) === nothing -@test findfirst("o,", "foo,bar,baz") == 3:4 -@test findnext("o,", "foo,bar,baz", 5) === nothing -@test findfirst(",b", "foo,bar,baz") == 4:5 -@test findnext(",b", "foo,bar,baz", 6) == 8:9 -@test findnext(",b", "foo,bar,baz", 10) === nothing -@test findfirst("az", "foo,bar,baz") == 10:11 -@test findnext("az", "foo,bar,baz", 12) === nothing - -# issue #9365 -# string forward search with a two-char UTF-8 (2 byte) string literal -@test findfirst("éé", "ééé") == 1:3 -@test findnext("éé", "ééé", 1) == 1:3 -# string forward search with a two-char UTF-8 (3 byte) string literal -@test findfirst("€€", "€€€") == 1:4 -@test findnext("€€", "€€€", 1) == 1:4 -# string forward search with a two-char UTF-8 (4 byte) string literal -@test findfirst("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 1:5 -@test findnext("\U1f596\U1f596", "\U1f596\U1f596\U1f596", 1) == 1:5 - -# string forward search with a two-char UTF-8 (2 byte) string literal -@test findfirst("éé", "éé") == 1:3 -@test findnext("éé", "éé", 1) == 1:3 -# string forward search with a two-char UTF-8 (3 byte) string literal -@test findfirst("€€", "€€") == 1:4 -@test findnext("€€", "€€", 1) == 1:4 -# string forward search with a two-char UTF-8 (4 byte) string literal -@test findfirst("\U1f596\U1f596", "\U1f596\U1f596") == 1:5 -@test findnext("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5 - -# string backward search with a two-char UTF-8 (2 byte) string literal -@test findlast("éé", "ééé") == 3:5 -@test findprev("éé", "ééé", lastindex("ééé")) == 3:5 -# string backward search with a two-char UTF-8 (3 byte) string literal -@test findlast("€€", "€€€") == 4:7 -@test findprev("€€", "€€€", lastindex("€€€")) == 4:7 -# string backward search with a two-char UTF-8 (4 byte) string literal -@test findlast("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 5:9 -@test findprev("\U1f596\U1f596", "\U1f596\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 5:9 - -# string backward search with a two-char UTF-8 (2 byte) string literal -@test findlast("éé", "éé") == 1:3 # should really be 1:4! -@test findprev("éé", "éé", lastindex("ééé")) == 1:3 -# string backward search with a two-char UTF-8 (3 byte) string literal -@test findlast("€€", "€€") == 1:4 # should really be 1:6! -@test findprev("€€", "€€", lastindex("€€€")) == 1:4 -# string backward search with a two-char UTF-8 (4 byte) string literal -@test findlast("\U1f596\U1f596", "\U1f596\U1f596") == 1:5 # should really be 1:8! -@test findprev("\U1f596\U1f596", "\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 1:5 - -# string backward search with a two-char string literal -@test findlast("xx", "foo,bar,baz") === nothing -@test findlast("fo", "foo,bar,baz") == 1:2 -@test findprev("fo", "foo,bar,baz", 1) === nothing -@test findlast("oo", "foo,bar,baz") == 2:3 -@test findprev("oo", "foo,bar,baz", 2) === nothing -@test findlast("o,", "foo,bar,baz") == 3:4 -@test findprev("o,", "foo,bar,baz", 1) === nothing -@test findlast(",b", "foo,bar,baz") == 8:9 -@test findprev(",b", "foo,bar,baz", 6) == 4:5 -@test findprev(",b", "foo,bar,baz", 3) === nothing -@test findlast("az", "foo,bar,baz") == 10:11 -@test findprev("az", "foo,bar,baz", 10) === nothing - -# string search with a two-char regex -@test findfirst(r"xx", "foo,bar,baz") === nothing -@test findfirst(r"fo", "foo,bar,baz") == 1:2 -@test findnext(r"fo", "foo,bar,baz", 3) === nothing -@test findfirst(r"oo", "foo,bar,baz") == 2:3 -@test findnext(r"oo", "foo,bar,baz", 4) === nothing -@test findfirst(r"o,", "foo,bar,baz") == 3:4 -@test findnext(r"o,", "foo,bar,baz", 5) === nothing -@test findfirst(r",b", "foo,bar,baz") == 4:5 -@test findnext(r",b", "foo,bar,baz", 6) == 8:9 -@test findnext(r",b", "foo,bar,baz", 10) === nothing -@test findfirst(r"az", "foo,bar,baz") == 10:11 -@test findnext(r"az", "foo,bar,baz", 12) === nothing - -# occursin with a String and Char needle -@test occursin("o", "foo") -@test occursin('o', "foo") -# occursin in curried form -@test occursin("foo")("o") -@test occursin("foo")('o') - -# contains -@test contains("foo", "o") -@test contains("foo", 'o') -# contains in curried form -@test contains("o")("foo") -@test contains('o')("foo") - -@test_throws ErrorException "ab" ∈ "abc" - -# issue #15723 -@test findfirst(isequal('('), "⨳(") == 4 -@test findnext(isequal('('), "(⨳(", 2) == 5 -@test findlast(isequal('('), "(⨳(") == 5 -@test findprev(isequal('('), "(⨳(", 2) == 1 - -@test @inferred findall(isequal('a'), "éa") == [3] -@test @inferred findall(isequal('€'), "€€") == [1, 4] -@test @inferred isempty(findall(isequal('é'), "")) - -# issue #18109 -s_18109 = "fooα🐨βcd3" -@test findlast(isequal('o'), s_18109) == 3 -@test findfirst(isequal('d'), s_18109) == 13 - -# findall (issue #31788) -@testset "findall" begin +@testset "findall (issue #31788)" begin @test findall("fooo", "foo") == UnitRange{Int}[] @test findall("ing", "Spinning laughing dancing") == [6:8, 15:17, 23:25] @test all(findall("", "foo") .=== [1:0, 2:1, 3:2, 4:3]) # use === to compare empty ranges From 2616634a17fdd286e64d16d454bb8077c54d51c9 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Fri, 13 Sep 2024 15:57:19 -0400 Subject: [PATCH 41/57] fix #45494, error in ssa conversion with complex type decl (#55744) We were missing a call to `renumber-assigned-ssavalues` in the case where the declared type is used to assert the type of a value taken from a closure box. --- src/julia-syntax.scm | 5 ++++- test/syntax.jl | 10 ++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index 6815921375184..d6bc03091f37b 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -3977,7 +3977,10 @@ f(x) = yt(x) (val (if (equal? typ '(core Any)) val `(call (core typeassert) ,val - ,(cl-convert typ fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))))) + ,(let ((convt (cl-convert typ fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))) + (if (or (symbol-like? convt) (quoted? convt)) + convt + (renumber-assigned-ssavalues convt))))))) `(block ,@(if (eq? box access) '() `((= ,access ,box))) ,undefcheck diff --git a/test/syntax.jl b/test/syntax.jl index da69bd98dc010..1b630a56f84f8 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -3975,3 +3975,13 @@ module UsingFailedExplicit using .A: x as x @test x === 1 end + +# issue #45494 +begin + local b::Tuple{<:Any} = (0,) + function f45494() + b = b + b + end +end +@test f45494() === (0,) From 2ee655139d2ac9bae9e33e7af318e477aa40c2c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= <765740+giordano@users.noreply.github.com> Date: Fri, 13 Sep 2024 23:47:28 +0100 Subject: [PATCH 42/57] Revert "Avoid materializing arrays in bidiag matmul" (#55737) Reverts JuliaLang/julia#55450. @jishnub suggested reverting this PR to fix #55727. --- stdlib/LinearAlgebra/src/LinearAlgebra.jl | 4 +- stdlib/LinearAlgebra/src/bidiag.jl | 330 +++------------------- stdlib/LinearAlgebra/test/bidiag.jl | 85 ++---- stdlib/LinearAlgebra/test/tridiag.jl | 71 ----- 4 files changed, 68 insertions(+), 422 deletions(-) diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl index 17216845b350c..27d4255fb656b 100644 --- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl +++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl @@ -673,9 +673,7 @@ matprod_dest(A::Diagonal, B::Diagonal, TS) = _matprod_dest_diag(B, TS) _matprod_dest_diag(A, TS) = similar(A, TS) function _matprod_dest_diag(A::SymTridiagonal, TS) n = size(A, 1) - ev = similar(A, TS, max(0, n-1)) - dv = similar(A, TS, n) - Tridiagonal(ev, dv, similar(ev)) + Tridiagonal(similar(A, TS, n-1), similar(A, TS, n), similar(A, TS, n-1)) end # Special handling for adj/trans vec diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl index 8bc5b1c47f366..d86bad7e41435 100644 --- a/stdlib/LinearAlgebra/src/bidiag.jl +++ b/stdlib/LinearAlgebra/src/bidiag.jl @@ -557,8 +557,7 @@ end # function to get the internally stored vectors for Bidiagonal and [Sym]Tridiagonal # to avoid allocations in _mul! below (#24324, #24578) _diag(A::Tridiagonal, k) = k == -1 ? A.dl : k == 0 ? A.d : A.du -_diag(A::SymTridiagonal{<:Number}, k) = k == 0 ? A.dv : A.ev -_diag(A::SymTridiagonal, k) = k == 0 ? view(A, diagind(A, IndexStyle(A))) : view(A, diagind(A, 1, IndexStyle(A))) +_diag(A::SymTridiagonal, k) = k == 0 ? A.dv : A.ev function _diag(A::Bidiagonal, k) if k == 0 return A.dv @@ -578,45 +577,12 @@ function _bibimul!(C, A, B, _add) check_A_mul_B!_sizes(size(C), size(A), size(B)) n = size(A,1) iszero(n) && return C - if n <= 3 - # naive multiplication - for I in CartesianIndices(C) - _modify!(_add, sum(A[I[1], k] * B[k, I[2]] for k in axes(A,2)), C, I) - end - return C - end + n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta) # We use `_rmul_or_fill!` instead of `_modify!` here since using # `_modify!` in the following loop will not update the # off-diagonal elements for non-zero beta. _rmul_or_fill!(C, _add.beta) iszero(_add.alpha) && return C - @inbounds begin - # first column of C - C[1,1] += _add(A[1,1]*B[1,1] + A[1, 2]*B[2,1]) - C[2,1] += _add(A[2,1]*B[1,1] + A[2,2]*B[2,1]) - C[3,1] += _add(A[3,2]*B[2,1]) - # second column of C - C[1,2] += _add(A[1,1]*B[1,2] + A[1,2]*B[2,2]) - C[2,2] += _add(A[2,1]*B[1,2] + A[2,2]*B[2,2] + A[2,3]*B[3,2]) - C[3,2] += _add(A[3,2]*B[2,2] + A[3,3]*B[3,2]) - C[4,2] += _add(A[4,3]*B[3,2]) - end # inbounds - # middle columns - __bibimul!(C, A, B, _add) - @inbounds begin - C[n-3,n-1] += _add(A[n-3,n-2]*B[n-2,n-1]) - C[n-2,n-1] += _add(A[n-2,n-2]*B[n-2,n-1] + A[n-2,n-1]*B[n-1,n-1]) - C[n-1,n-1] += _add(A[n-1,n-2]*B[n-2,n-1] + A[n-1,n-1]*B[n-1,n-1] + A[n-1,n]*B[n,n-1]) - C[n, n-1] += _add(A[n,n-1]*B[n-1,n-1] + A[n,n]*B[n,n-1]) - # last column of C - C[n-2, n] += _add(A[n-2,n-1]*B[n-1,n]) - C[n-1, n] += _add(A[n-1,n-1]*B[n-1,n ] + A[n-1,n]*B[n,n ]) - C[n, n] += _add(A[n,n-1]*B[n-1,n ] + A[n,n]*B[n,n ]) - end # inbounds - C -end -function __bibimul!(C, A, B, _add) - n = size(A,1) Al = _diag(A, -1) Ad = _diag(A, 0) Au = _diag(A, 1) @@ -624,198 +590,44 @@ function __bibimul!(C, A, B, _add) Bd = _diag(B, 0) Bu = _diag(B, 1) @inbounds begin + # first row of C + C[1,1] += _add(A[1,1]*B[1,1] + A[1, 2]*B[2, 1]) + C[1,2] += _add(A[1,1]*B[1,2] + A[1,2]*B[2,2]) + C[1,3] += _add(A[1,2]*B[2,3]) + # second row of C + C[2,1] += _add(A[2,1]*B[1,1] + A[2,2]*B[2,1]) + C[2,2] += _add(A[2,1]*B[1,2] + A[2,2]*B[2,2] + A[2,3]*B[3,2]) + C[2,3] += _add(A[2,2]*B[2,3] + A[2,3]*B[3,3]) + C[2,4] += _add(A[2,3]*B[3,4]) for j in 3:n-2 - Aj₋2j₋1 = Au[j-2] - Aj₋1j = Au[j-1] - Ajj₊1 = Au[j] - Aj₋1j₋1 = Ad[j-1] - Ajj = Ad[j] - Aj₊1j₊1 = Ad[j+1] Ajj₋1 = Al[j-1] - Aj₊1j = Al[j] - Aj₊2j₊1 = Al[j+1] + Ajj = Ad[j] + Ajj₊1 = Au[j] + Bj₋1j₋2 = Bl[j-2] + Bj₋1j₋1 = Bd[j-1] Bj₋1j = Bu[j-1] + Bjj₋1 = Bl[j-1] Bjj = Bd[j] + Bjj₊1 = Bu[j] Bj₊1j = Bl[j] - - C[j-2, j] += _add(Aj₋2j₋1*Bj₋1j) - C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j + Aj₋1j*Bjj) - C[j, j] += _add(Ajj₋1*Bj₋1j + Ajj*Bjj + Ajj₊1*Bj₊1j) - C[j+1, j] += _add(Aj₊1j*Bjj + Aj₊1j₊1*Bj₊1j) - C[j+2, j] += _add(Aj₊2j₊1*Bj₊1j) - end - end - C -end -function __bibimul!(C, A, B::Bidiagonal, _add) - n = size(A,1) - Al = _diag(A, -1) - Ad = _diag(A, 0) - Au = _diag(A, 1) - Bd = _diag(B, 0) - if B.uplo == 'U' - Bu = _diag(B, 1) - @inbounds begin - for j in 3:n-2 - Aj₋2j₋1 = Au[j-2] - Aj₋1j = Au[j-1] - Aj₋1j₋1 = Ad[j-1] - Ajj = Ad[j] - Ajj₋1 = Al[j-1] - Aj₊1j = Al[j] - Bj₋1j = Bu[j-1] - Bjj = Bd[j] - - C[j-2, j] += _add(Aj₋2j₋1*Bj₋1j) - C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j + Aj₋1j*Bjj) - C[j, j] += _add(Ajj₋1*Bj₋1j + Ajj*Bjj) - C[j+1, j] += _add(Aj₊1j*Bjj) - end - end - else # B.uplo == 'L' - Bl = _diag(B, -1) - @inbounds begin - for j in 3:n-2 - Aj₋1j = Au[j-1] - Ajj₊1 = Au[j] - Ajj = Ad[j] - Aj₊1j₊1 = Ad[j+1] - Aj₊1j = Al[j] - Aj₊2j₊1 = Al[j+1] - Bjj = Bd[j] - Bj₊1j = Bl[j] - - C[j-1, j] += _add(Aj₋1j*Bjj) - C[j, j] += _add(Ajj*Bjj + Ajj₊1*Bj₊1j) - C[j+1, j] += _add(Aj₊1j*Bjj + Aj₊1j₊1*Bj₊1j) - C[j+2, j] += _add(Aj₊2j₊1*Bj₊1j) - end - end - end - C -end -function __bibimul!(C, A::Bidiagonal, B, _add) - n = size(A,1) - Bl = _diag(B, -1) - Bd = _diag(B, 0) - Bu = _diag(B, 1) - Ad = _diag(A, 0) - if A.uplo == 'U' - Au = _diag(A, 1) - @inbounds begin - for j in 3:n-2 - Aj₋2j₋1 = Au[j-2] - Aj₋1j = Au[j-1] - Ajj₊1 = Au[j] - Aj₋1j₋1 = Ad[j-1] - Ajj = Ad[j] - Aj₊1j₊1 = Ad[j+1] - Bj₋1j = Bu[j-1] - Bjj = Bd[j] - Bj₊1j = Bl[j] - - C[j-2, j] += _add(Aj₋2j₋1*Bj₋1j) - C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j + Aj₋1j*Bjj) - C[j, j] += _add(Ajj*Bjj + Ajj₊1*Bj₊1j) - C[j+1, j] += _add(Aj₊1j₊1*Bj₊1j) - end + Bj₊1j₊1 = Bd[j+1] + Bj₊1j₊2 = Bu[j+1] + C[j,j-2] += _add( Ajj₋1*Bj₋1j₋2) + C[j, j-1] += _add(Ajj₋1*Bj₋1j₋1 + Ajj*Bjj₋1) + C[j, j ] += _add(Ajj₋1*Bj₋1j + Ajj*Bjj + Ajj₊1*Bj₊1j) + C[j, j+1] += _add(Ajj *Bjj₊1 + Ajj₊1*Bj₊1j₊1) + C[j, j+2] += _add(Ajj₊1*Bj₊1j₊2) end - else # A.uplo == 'L' - Al = _diag(A, -1) - @inbounds begin - for j in 3:n-2 - Aj₋1j₋1 = Ad[j-1] - Ajj = Ad[j] - Aj₊1j₊1 = Ad[j+1] - Ajj₋1 = Al[j-1] - Aj₊1j = Al[j] - Aj₊2j₊1 = Al[j+1] - Bj₋1j = Bu[j-1] - Bjj = Bd[j] - Bj₊1j = Bl[j] - - C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j) - C[j, j] += _add(Ajj₋1*Bj₋1j + Ajj*Bjj) - C[j+1, j] += _add(Aj₊1j*Bjj + Aj₊1j₊1*Bj₊1j) - C[j+2, j] += _add(Aj₊2j₊1*Bj₊1j) - end - end - end - C -end -function __bibimul!(C, A::Bidiagonal, B::Bidiagonal, _add) - n = size(A,1) - Ad = _diag(A, 0) - Bd = _diag(B, 0) - if A.uplo == 'U' && B.uplo == 'U' - Au = _diag(A, 1) - Bu = _diag(B, 1) - @inbounds begin - for j in 3:n-2 - Aj₋2j₋1 = Au[j-2] - Aj₋1j = Au[j-1] - Aj₋1j₋1 = Ad[j-1] - Ajj = Ad[j] - Bj₋1j = Bu[j-1] - Bjj = Bd[j] - - C[j-2, j] += _add(Aj₋2j₋1*Bj₋1j) - C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j + Aj₋1j*Bjj) - C[j, j] += _add(Ajj*Bjj) - end - end - elseif A.uplo == 'U' && B.uplo == 'L' - Au = _diag(A, 1) - Bl = _diag(B, -1) - @inbounds begin - for j in 3:n-2 - Aj₋1j = Au[j-1] - Ajj₊1 = Au[j] - Ajj = Ad[j] - Aj₊1j₊1 = Ad[j+1] - Bjj = Bd[j] - Bj₊1j = Bl[j] - - C[j-1, j] += _add(Aj₋1j*Bjj) - C[j, j] += _add(Ajj*Bjj + Ajj₊1*Bj₊1j) - C[j+1, j] += _add(Aj₊1j₊1*Bj₊1j) - end - end - elseif A.uplo == 'L' && B.uplo == 'U' - Al = _diag(A, -1) - Bu = _diag(B, 1) - @inbounds begin - for j in 3:n-2 - Aj₋1j₋1 = Ad[j-1] - Ajj = Ad[j] - Ajj₋1 = Al[j-1] - Aj₊1j = Al[j] - Bj₋1j = Bu[j-1] - Bjj = Bd[j] - - C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j) - C[j, j] += _add(Ajj₋1*Bj₋1j + Ajj*Bjj) - C[j+1, j] += _add(Aj₊1j*Bjj) - end - end - else # A.uplo == 'L' && B.uplo == 'L' - Al = _diag(A, -1) - Bl = _diag(B, -1) - @inbounds begin - for j in 3:n-2 - Ajj = Ad[j] - Aj₊1j₊1 = Ad[j+1] - Aj₊1j = Al[j] - Aj₊2j₊1 = Al[j+1] - Bjj = Bd[j] - Bj₊1j = Bl[j] - - C[j, j] += _add(Ajj*Bjj) - C[j+1, j] += _add(Aj₊1j*Bjj + Aj₊1j₊1*Bj₊1j) - C[j+2, j] += _add(Aj₊2j₊1*Bj₊1j) - end - end - end + # row before last of C + C[n-1,n-3] += _add(A[n-1,n-2]*B[n-2,n-3]) + C[n-1,n-2] += _add(A[n-1,n-1]*B[n-1,n-2] + A[n-1,n-2]*B[n-2,n-2]) + C[n-1,n-1] += _add(A[n-1,n-2]*B[n-2,n-1] + A[n-1,n-1]*B[n-1,n-1] + A[n-1,n]*B[n,n-1]) + C[n-1,n ] += _add(A[n-1,n-1]*B[n-1,n ] + A[n-1, n]*B[n ,n ]) + # last row of C + C[n,n-2] += _add(A[n,n-1]*B[n-1,n-2]) + C[n,n-1] += _add(A[n,n-1]*B[n-1,n-1] + A[n,n]*B[n,n-1]) + C[n,n ] += _add(A[n,n-1]*B[n-1,n ] + A[n,n]*B[n,n ]) + end # inbounds C end @@ -932,52 +744,7 @@ function _mul!(C::AbstractVecOrMat, A::BiTriSym, B::AbstractVecOrMat, _add::MulA nB = size(B,2) (iszero(nA) || iszero(nB)) && return C iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta) - if nA <= 3 - # naive multiplication - for I in CartesianIndices(C) - col = Base.tail(Tuple(I)) - _modify!(_add, sum(A[I[1], k] * B[k, col...] for k in axes(A,2)), C, I) - end - return C - end - _mul_bitrisym!(C, A, B, _add) -end -function _mul_bitrisym!(C::AbstractVecOrMat, A::Bidiagonal, B::AbstractVecOrMat, _add::MulAddMul) - nA = size(A,1) - nB = size(B,2) - d = A.dv - if A.uplo == 'U' - u = A.ev - @inbounds begin - for j = 1:nB - b₀, b₊ = B[1, j], B[2, j] - _modify!(_add, d[1]*b₀ + u[1]*b₊, C, (1, j)) - for i = 2:nA - 1 - b₀, b₊ = b₊, B[i + 1, j] - _modify!(_add, d[i]*b₀ + u[i]*b₊, C, (i, j)) - end - _modify!(_add, d[nA]*b₊, C, (nA, j)) - end - end - else - l = A.ev - @inbounds begin - for j = 1:nB - b₀, b₊ = B[1, j], B[2, j] - _modify!(_add, d[1]*b₀, C, (1, j)) - for i = 2:nA - 1 - b₋, b₀, b₊ = b₀, b₊, B[i + 1, j] - _modify!(_add, l[i - 1]*b₋ + d[i]*b₀, C, (i, j)) - end - _modify!(_add, l[nA - 1]*b₀ + d[nA]*b₊, C, (nA, j)) - end - end - end - C -end -function _mul_bitrisym!(C::AbstractVecOrMat, A::TriSym, B::AbstractVecOrMat, _add::MulAddMul) - nA = size(A,1) - nB = size(B,2) + nA <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta) l = _diag(A, -1) d = _diag(A, 0) u = _diag(A, 1) @@ -1002,9 +769,8 @@ function _mul!(C::AbstractMatrix, A::AbstractMatrix, B::TriSym, _add::MulAddMul) m = size(B,2) (iszero(m) || iszero(n)) && return C iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta) - if m == 1 - B11 = B[1,1] - return mul!(C, A, B11, _add.alpha, _add.beta) + if n <= 3 || m <= 1 + return mul!(C, Array(A), Array(B), _add.alpha, _add.beta) end Bl = _diag(B, -1) Bd = _diag(B, 0) @@ -1038,18 +804,21 @@ function _mul!(C::AbstractMatrix, A::AbstractMatrix, B::Bidiagonal, _add::MulAdd m, n = size(A) (iszero(m) || iszero(n)) && return C iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta) + if size(A, 1) <= 3 || size(B, 2) <= 1 + return mul!(C, Array(A), Array(B), _add.alpha, _add.beta) + end @inbounds if B.uplo == 'U' - for j in n:-1:2, i in 1:m - _modify!(_add, A[i,j] * B.dv[j] + A[i,j-1] * B.ev[j-1], C, (i, j)) - end for i in 1:m + for j in n:-1:2 + _modify!(_add, A[i,j] * B.dv[j] + A[i,j-1] * B.ev[j-1], C, (i, j)) + end _modify!(_add, A[i,1] * B.dv[1], C, (i, 1)) end else # uplo == 'L' - for j in 1:n-1, i in 1:m - _modify!(_add, A[i,j] * B.dv[j] + A[i,j+1] * B.ev[j], C, (i, j)) - end for i in 1:m + for j in 1:n-1 + _modify!(_add, A[i,j] * B.dv[j] + A[i,j+1] * B.ev[j], C, (i, j)) + end _modify!(_add, A[i,n] * B.dv[n], C, (i, n)) end end @@ -1065,12 +834,7 @@ function _dibimul!(C, A, B, _add) check_A_mul_B!_sizes(size(C), size(A), size(B)) n = size(A,1) iszero(n) && return C - if n <= 3 - for I in CartesianIndices(C) - _modify!(_add, A.diag[I[1]] * B[I[1], I[2]], C, I) - end - return C - end + n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta) _rmul_or_fill!(C, _add.beta) # see the same use above iszero(_add.alpha) && return C Ad = A.diag diff --git a/stdlib/LinearAlgebra/test/bidiag.jl b/stdlib/LinearAlgebra/test/bidiag.jl index 58c228e39e226..ef50658a642fb 100644 --- a/stdlib/LinearAlgebra/test/bidiag.jl +++ b/stdlib/LinearAlgebra/test/bidiag.jl @@ -1026,71 +1026,26 @@ end @test_throws "cannot set entry" B[1,2] = 4 end -@testset "mul for small matrices" begin - @testset for n in 0:6 - D = Diagonal(rand(n)) - v = rand(n) - @testset for uplo in (:L, :U) - B = Bidiagonal(rand(n), rand(max(n-1,0)), uplo) - M = Matrix(B) - - @test B * v ≈ M * v - @test mul!(similar(v), B, v) ≈ M * v - @test mul!(ones(size(v)), B, v, 2, 3) ≈ M * v * 2 .+ 3 - - @test B * B ≈ M * M - @test mul!(similar(B, size(B)), B, B) ≈ M * M - @test mul!(ones(size(B)), B, B, 2, 4) ≈ M * M * 2 .+ 4 - - for m in 0:6 - AL = rand(m,n) - AR = rand(n,m) - @test AL * B ≈ AL * M - @test B * AR ≈ M * AR - @test mul!(similar(AL), AL, B) ≈ AL * M - @test mul!(similar(AR), B, AR) ≈ M * AR - @test mul!(ones(size(AL)), AL, B, 2, 4) ≈ AL * M * 2 .+ 4 - @test mul!(ones(size(AR)), B, AR, 2, 4) ≈ M * AR * 2 .+ 4 - end - - @test B * D ≈ M * D - @test D * B ≈ D * M - @test mul!(similar(B), B, D) ≈ M * D - @test mul!(similar(B), B, D) ≈ M * D - @test mul!(similar(B, size(B)), D, B) ≈ D * M - @test mul!(similar(B, size(B)), B, D) ≈ M * D - @test mul!(ones(size(B)), D, B, 2, 4) ≈ D * M * 2 .+ 4 - @test mul!(ones(size(B)), B, D, 2, 4) ≈ M * D * 2 .+ 4 - end - BL = Bidiagonal(rand(n), rand(max(0, n-1)), :L) - ML = Matrix(BL) - BU = Bidiagonal(rand(n), rand(max(0, n-1)), :U) - MU = Matrix(BU) - T = Tridiagonal(zeros(max(0, n-1)), zeros(n), zeros(max(0, n-1))) - @test mul!(T, BL, BU) ≈ ML * MU - @test mul!(T, BU, BL) ≈ MU * ML - T = Tridiagonal(ones(max(0, n-1)), ones(n), ones(max(0, n-1))) - @test mul!(copy(T), BL, BU, 2, 3) ≈ ML * MU * 2 + T * 3 - @test mul!(copy(T), BU, BL, 2, 3) ≈ MU * ML * 2 + T * 3 - end - - n = 4 - arr = SizedArrays.SizedArray{(2,2)}(reshape([1:4;],2,2)) - for B in ( - Bidiagonal(fill(arr,n), fill(arr,n-1), :L), - Bidiagonal(fill(arr,n), fill(arr,n-1), :U), - ) - @test B * B ≈ Matrix(B) * Matrix(B) - BL = Bidiagonal(fill(arr,n), fill(arr,n-1), :L) - BU = Bidiagonal(fill(arr,n), fill(arr,n-1), :U) - @test BL * B ≈ Matrix(BL) * Matrix(B) - @test BU * B ≈ Matrix(BU) * Matrix(B) - @test B * BL ≈ Matrix(B) * Matrix(BL) - @test B * BU ≈ Matrix(B) * Matrix(BU) - D = Diagonal(fill(arr,n)) - @test D * B ≈ Matrix(D) * Matrix(B) - @test B * D ≈ Matrix(B) * Matrix(D) - end +@testset "mul with empty arrays" begin + A = zeros(5,0) + B = Bidiagonal(zeros(0), zeros(0), :U) + BL = Bidiagonal(zeros(5), zeros(4), :U) + @test size(A * B) == size(A) + @test size(BL * A) == size(A) + @test size(B * B) == size(B) + C = similar(A) + @test mul!(C, A, B) == A * B + @test mul!(C, BL, A) == BL * A + @test mul!(similar(B), B, B) == B * B + @test mul!(similar(B, size(B)), B, B) == B * B + + v = zeros(size(B,2)) + @test size(B * v) == size(v) + @test mul!(similar(v), B, v) == B * v + + D = Diagonal(zeros(size(B,2))) + @test size(B * D) == size(D * B) == size(D) + @test mul!(similar(D), B, D) == mul!(similar(D), D, B) == B * D end end # module TestBidiagonal diff --git a/stdlib/LinearAlgebra/test/tridiag.jl b/stdlib/LinearAlgebra/test/tridiag.jl index 15ac7f9f2147f..3330fa682fe5e 100644 --- a/stdlib/LinearAlgebra/test/tridiag.jl +++ b/stdlib/LinearAlgebra/test/tridiag.jl @@ -970,75 +970,4 @@ end @test sprint(show, S) == "SymTridiagonal($(repr(diag(S))), $(repr(diag(S,1))))" end -@testset "mul for small matrices" begin - @testset for n in 0:6 - for T in ( - Tridiagonal(rand(max(n-1,0)), rand(n), rand(max(n-1,0))), - SymTridiagonal(rand(n), rand(max(n-1,0))), - ) - M = Matrix(T) - @test T * T ≈ M * M - @test mul!(similar(T, size(T)), T, T) ≈ M * M - @test mul!(ones(size(T)), T, T, 2, 4) ≈ M * M * 2 .+ 4 - - for m in 0:6 - AR = rand(n,m) - AL = rand(m,n) - @test AL * T ≈ AL * M - @test T * AR ≈ M * AR - @test mul!(similar(AL), AL, T) ≈ AL * M - @test mul!(similar(AR), T, AR) ≈ M * AR - @test mul!(ones(size(AL)), AL, T, 2, 4) ≈ AL * M * 2 .+ 4 - @test mul!(ones(size(AR)), T, AR, 2, 4) ≈ M * AR * 2 .+ 4 - end - - v = rand(n) - @test T * v ≈ M * v - @test mul!(similar(v), T, v) ≈ M * v - - D = Diagonal(rand(n)) - @test T * D ≈ M * D - @test D * T ≈ D * M - @test mul!(Tridiagonal(similar(T)), D, T) ≈ D * M - @test mul!(Tridiagonal(similar(T)), T, D) ≈ M * D - @test mul!(similar(T, size(T)), D, T) ≈ D * M - @test mul!(similar(T, size(T)), T, D) ≈ M * D - @test mul!(ones(size(T)), D, T, 2, 4) ≈ D * M * 2 .+ 4 - @test mul!(ones(size(T)), T, D, 2, 4) ≈ M * D * 2 .+ 4 - - for uplo in (:U, :L) - B = Bidiagonal(rand(n), rand(max(0, n-1)), uplo) - @test T * B ≈ M * B - @test B * T ≈ B * M - if n <= 2 - @test mul!(Tridiagonal(similar(T)), B, T) ≈ B * M - @test mul!(Tridiagonal(similar(T)), T, B) ≈ M * B - end - @test mul!(similar(T, size(T)), B, T) ≈ B * M - @test mul!(similar(T, size(T)), T, B) ≈ M * B - @test mul!(ones(size(T)), B, T, 2, 4) ≈ B * M * 2 .+ 4 - @test mul!(ones(size(T)), T, B, 2, 4) ≈ M * B * 2 .+ 4 - end - end - end - - n = 4 - arr = SizedArrays.SizedArray{(2,2)}(reshape([1:4;],2,2)) - for T in ( - SymTridiagonal(fill(arr,n), fill(arr,n-1)), - Tridiagonal(fill(arr,n-1), fill(arr,n), fill(arr,n-1)), - ) - @test T * T ≈ Matrix(T) * Matrix(T) - BL = Bidiagonal(fill(arr,n), fill(arr,n-1), :L) - BU = Bidiagonal(fill(arr,n), fill(arr,n-1), :U) - @test BL * T ≈ Matrix(BL) * Matrix(T) - @test BU * T ≈ Matrix(BU) * Matrix(T) - @test T * BL ≈ Matrix(T) * Matrix(BL) - @test T * BU ≈ Matrix(T) * Matrix(BU) - D = Diagonal(fill(arr,n)) - @test D * T ≈ Matrix(D) * Matrix(T) - @test T * D ≈ Matrix(T) * Matrix(D) - end -end - end # module TestTridiagonal From 243bdede3d686b1eaa634db190f2bab3e5f4de72 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Sat, 14 Sep 2024 12:22:31 -0400 Subject: [PATCH 43/57] Add a docs section about loading/precomp/ttfx time tuning (#55569) --- doc/src/manual/performance-tips.md | 119 +++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md index 38e27476f0af8..436d58f54754a 100644 --- a/doc/src/manual/performance-tips.md +++ b/doc/src/manual/performance-tips.md @@ -1394,6 +1394,125 @@ Prominent examples include [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl These are external packages, so we will not discuss them in detail here. Please refer to their respective documentations (especially because they have different behaviors than OpenBLAS with respect to multithreading). +## Execution latency, package loading and package precompiling time + +### Reducing time to first plot etc. + +The first time a julia method is called it (and any methods it calls, or ones that can be statically determined) will be +compiled. The [`@time`](@ref) macro family illustrates this. + +``` +julia> foo() = rand(2,2) * rand(2,2) +foo (generic function with 1 method) + +julia> @time @eval foo(); + 0.252395 seconds (1.12 M allocations: 56.178 MiB, 2.93% gc time, 98.12% compilation time) + +julia> @time @eval foo(); + 0.000156 seconds (63 allocations: 2.453 KiB) +``` + +Note that `@time @eval` is better for measuring compilation time because without [`@eval`](@ref), some compilation may +already be done before timing starts. + +When developing a package, you may be able to improve the experience of your users with *precompilation* +so that when they use the package, the code they use is already compiled. To precompile package code effectively, it's +recommended to use [`PrecompileTools.jl`](https://julialang.github.io/PrecompileTools.jl/stable/) to run a +"precompile workload" during precompilation time that is representative of typical package usage, which will cache the +native compiled code into the package `pkgimage` cache, greatly reducing "time to first execution" (often referred to as +TTFX) for such usage. + +Note that [`PrecompileTools.jl`](https://julialang.github.io/PrecompileTools.jl/stable/) workloads can be +disabled and sometimes configured via Preferences if you do not want to spend the extra time precompiling, which +may be the case during development of a package. + +### Reducing package loading time + +Keeping the time taken to load the package down is usually helpful. +General good practice for package developers includes: + +1. Reduce your dependencies to those you really need. Consider using [package extensions](@ref) to support interoperability with other packages without bloating your essential dependencies. +3. Avoid use of [`__init__()`](@ref) functions unless there is no alternative, especially those which might trigger a lot + of compilation, or just take a long time to execute. +4. Where possible, fix [invalidations](https://julialang.org/blog/2020/08/invalidations/) among your dependencies and from your package code. + +The tool [`@time_imports`](@ref) can be useful in the REPL to review the above factors. + +```julia-repl +julia> @time @time_imports using Plots + 0.5 ms Printf + 16.4 ms Dates + 0.7 ms Statistics + ┌ 23.8 ms SuiteSparse_jll.__init__() 86.11% compilation time (100% recompilation) + 90.1 ms SuiteSparse_jll 91.57% compilation time (82% recompilation) + 0.9 ms Serialization + ┌ 39.8 ms SparseArrays.CHOLMOD.__init__() 99.47% compilation time (100% recompilation) + 166.9 ms SparseArrays 23.74% compilation time (100% recompilation) + 0.4 ms Statistics → SparseArraysExt + 0.5 ms TOML + 8.0 ms Preferences + 0.3 ms PrecompileTools + 0.2 ms Reexport +... many deps omitted for example ... + 1.4 ms Tar + ┌ 73.8 ms p7zip_jll.__init__() 99.93% compilation time (100% recompilation) + 79.4 ms p7zip_jll 92.91% compilation time (100% recompilation) + ┌ 27.7 ms GR.GRPreferences.__init__() 99.77% compilation time (100% recompilation) + 43.0 ms GR 64.26% compilation time (100% recompilation) + ┌ 2.1 ms Plots.__init__() 91.80% compilation time (100% recompilation) + 300.9 ms Plots 0.65% compilation time (100% recompilation) + 1.795602 seconds (3.33 M allocations: 190.153 MiB, 7.91% gc time, 39.45% compilation time: 97% of which was recompilation) + +``` + +Notice that in this example there are multiple packages loaded, some with `__init__()` functions, some of which cause +compilation of which some is recompilation. Recompilation is caused by earlier packages invalidating methods, then in +these cases when the following packages run their `__init__()` function some hit recompilation before the code can be run. + +Further, note the `Statistics` extension `SparseArraysExt` has been activated because `SparseArrays` is in the dependency +tree. i.e. see `0.4 ms Statistics → SparseArraysExt`. + +This report gives a good opportunity to review whether the cost of dependency load time is worth the functionality it brings. +Also the `Pkg` utility `why` can be used to report why a an indirect dependency exists. + +``` +(CustomPackage) pkg> why FFMPEG_jll + Plots → FFMPEG → FFMPEG_jll + Plots → GR → GR_jll → FFMPEG_jll +``` + +or to see the indirect dependencies that a package brings in, you can `pkg> rm` the package, see the deps that are removed +from the manifest, then revert the change with `pkg> undo`. + +If loading time is dominated by slow `__init__()` methods having compilation, one verbose way to identify what is being +compiled is to use the julia args `--trace-compile=stderr --trace-compile-timing` which will report a [`precompile`](@ref) +statement each time a method is compiled, along with how long compilation took. For instance, the full setup would be: + +``` +$ julia --startup-file=no --trace-compile=stderr --trace-compile-timing +julia> @time @time_imports using CustomPackage +... +``` + +Note the `--startup-file=no` which helps isolate the test from packages you may have in your `startup.jl`. + +More analysis of the reasons for recompilation can be achieved with the +[`SnoopCompile`](https://github.com/timholy/SnoopCompile.jl) package. + +### Reducing precompilation time + +If package precompilation is taking a long time, one option is to set the following internal and then precompile. +``` +julia> Base.PRECOMPILE_TRACE_COMPILE[] = "stderr" + +pkg> precompile +``` + +This has the effect of setting `--trace-compile=stderr --trace-compile-timing` in the precompilation processes themselves, +so will show which methods are precompiled and how long they took to precompile. + +There are also profiling options such as [using the external profiler Tracy to profile the precompilation process](@ref Profiling-package-precompilation-with-Tracy). + ## Miscellaneous From 346f38bceabf3dab1d3912fe822a663735c91d4a Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Sat, 14 Sep 2024 14:40:10 -0500 Subject: [PATCH 44/57] Add compat entry for `Base.donotdelete` (#55773) --- base/docs/basedocs.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl index e03d0db78f29f..0fc253bd73d1c 100644 --- a/base/docs/basedocs.jl +++ b/base/docs/basedocs.jl @@ -3712,6 +3712,9 @@ unused and delete the entire benchmark code). which the value of the arguments of this intrinsic were available (in a register, in memory, etc.). +!!! compat "Julia 1.8" + This method was added in Julia 1.8. + # Examples ```julia From f4fb87b0f9c3d8e5bcc9901701acd81534789293 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Sun, 15 Sep 2024 07:12:12 -0400 Subject: [PATCH 45/57] REPL: precompile in its own module because Main is closed. Add check for unexpected errors. (#55759) --- stdlib/REPL/src/REPL.jl | 4 ++-- stdlib/REPL/src/precompile.jl | 27 ++++++++++++++++++++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl index ddf2f55d0b9f7..44fe0446240c6 100644 --- a/stdlib/REPL/src/REPL.jl +++ b/stdlib/REPL/src/REPL.jl @@ -758,11 +758,11 @@ setmodifiers!(c::REPLCompletionProvider, m::LineEdit.Modifiers) = c.modifiers = Set `mod` as the default contextual module in the REPL, both for evaluating expressions and printing them. """ -function activate(mod::Module=Main) +function activate(mod::Module=Main; interactive_utils::Bool=true) mistate = (Base.active_repl::LineEditREPL).mistate mistate === nothing && return nothing mistate.active_module = mod - Base.load_InteractiveUtils(mod) + interactive_utils && Base.load_InteractiveUtils(mod) return nothing end diff --git a/stdlib/REPL/src/precompile.jl b/stdlib/REPL/src/precompile.jl index 82a1a0bb78ee8..c42def9078759 100644 --- a/stdlib/REPL/src/precompile.jl +++ b/stdlib/REPL/src/precompile.jl @@ -14,6 +14,19 @@ finally end let + # these are intentionally triggered + allowed_errors = [ + "BoundsError: attempt to access 0-element Vector{Any} at index [1]", + "MethodError: no method matching f(::$Int, ::$Int)", + "Padding of type", # reinterpret docstring has ERROR examples + ] + function check_errors(out) + str = String(out) + if occursin("ERROR:", str) && !any(occursin(e, str) for e in allowed_errors) + @error "Unexpected error (Review REPL precompilation with debug_output on):\n$str" + exit(1) + end + end ## Debugging options # View the code sent to the repl by setting this to `stdout` debug_output = devnull # or stdout @@ -25,6 +38,8 @@ let DOWN_ARROW = "\e[B" repl_script = """ + import REPL + REPL.activate(REPL.Precompile; interactive_utils=false) # Main is closed so we can't evaluate in it 2+2 print("") printstyled("a", "b") @@ -47,6 +62,7 @@ let [][1] Base.Iterators.minimum cd("complete_path\t\t$CTRL_C + REPL.activate(; interactive_utils=false) println("done") """ @@ -113,10 +129,10 @@ let end schedule(repltask) # wait for the definitive prompt before start writing to the TTY - readuntil(output_copy, JULIA_PROMPT) + check_errors(readuntil(output_copy, JULIA_PROMPT)) write(debug_output, "\n#### REPL STARTED ####\n") sleep(0.1) - readavailable(output_copy) + check_errors(readavailable(output_copy)) # Input our script precompile_lines = split(repl_script::String, '\n'; keepempty=false) curr = 0 @@ -124,16 +140,16 @@ let sleep(0.1) curr += 1 # consume any other output - bytesavailable(output_copy) > 0 && readavailable(output_copy) + bytesavailable(output_copy) > 0 && check_errors(readavailable(output_copy)) # push our input write(debug_output, "\n#### inputting statement: ####\n$(repr(l))\n####\n") # If the line ends with a CTRL_C, don't write an extra newline, which would # cause a second empty prompt. Our code below expects one new prompt per # input line and can race out of sync with the unexpected second line. endswith(l, CTRL_C) ? write(ptm, l) : write(ptm, l, "\n") - readuntil(output_copy, "\n") + check_errors(readuntil(output_copy, "\n")) # wait for the next prompt-like to appear - readuntil(output_copy, "\n") + check_errors(readuntil(output_copy, "\n")) strbuf = "" while !eof(output_copy) strbuf *= String(readavailable(output_copy)) @@ -143,6 +159,7 @@ let occursin(HELP_PROMPT, strbuf) && break sleep(0.1) end + check_errors(strbuf) end write(debug_output, "\n#### COMPLETED - Closing REPL ####\n") write(ptm, "$CTRL_D") From 4633607ce9b9f077f32f89f09a136e04389bbac2 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Sun, 15 Sep 2024 07:11:22 -0500 Subject: [PATCH 46/57] Try to put back previously flakey addmul tests (#55775) Partial revert of #50071, inspired by conversation in https://github.com/JuliaLang/julia/issues/49966#issuecomment-2350935477 Ran the tests 100 times to make sure we're not putting back something that's still flaky. Closes #49966 --- stdlib/LinearAlgebra/test/addmul.jl | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/stdlib/LinearAlgebra/test/addmul.jl b/stdlib/LinearAlgebra/test/addmul.jl index 3fff8289242f7..72fdf687bf5c3 100644 --- a/stdlib/LinearAlgebra/test/addmul.jl +++ b/stdlib/LinearAlgebra/test/addmul.jl @@ -164,8 +164,7 @@ end Bc = Matrix(B) returned_mat = mul!(C, A, B, α, β) @test returned_mat === C - # This test is skipped because it is flakey, but should be fixed and put back (see #49966) - @test_skip collect(returned_mat) ≈ α * Ac * Bc + β * Cc rtol=rtol + @test collect(returned_mat) ≈ α * Ac * Bc + β * Cc rtol=rtol y = C[:, 1] x = B[:, 1] @@ -190,8 +189,7 @@ end returned_mat = mul!(C, Af, Bf, α, β) @test returned_mat === C - # This test is skipped because it is flakey, but should be fixed and put back (see #49966) - @test_skip collect(returned_mat) ≈ α * Ac * Bc + β * Cc rtol=rtol + @test collect(returned_mat) ≈ α * Ac * Bc + β * Cc rtol=rtol end end end @@ -203,8 +201,7 @@ end Bc = Matrix(B) returned_mat = mul!(C, A, B, α, zero(eltype(C))) @test returned_mat === C - # This test is skipped because it is flakey, but should be fixed and put back (see #49966) - @test_skip collect(returned_mat) ≈ α * Ac * Bc rtol=rtol + @test collect(returned_mat) ≈ α * Ac * Bc rtol=rtol end end From a993cd8f81a6bc02a88ee5cf036f4e29c36d5580 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mos=C3=A8=20Giordano?= <765740+giordano@users.noreply.github.com> Date: Sun, 15 Sep 2024 21:24:47 +0100 Subject: [PATCH 47/57] Print results of `runtests` with `printstyled` (#55780) This ensures escape characters are used only if `stdout` can accept them. --- test/runtests.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index c46472ac93fa8..e48e896f4069e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -438,9 +438,9 @@ cd(@__DIR__) do # o_ts.verbose = true # set to true to show all timings when successful Test.print_test_results(o_ts, 1) if !o_ts.anynonpass - println(" \033[32;1mSUCCESS\033[0m") + printstyled(" SUCCESS\n"; bold=true, color=:green) else - println(" \033[31;1mFAILURE\033[0m\n") + printstyled(" FAILURE\n\n"; bold=true, color=:red) skipped > 0 && println("$skipped test", skipped > 1 ? "s were" : " was", " skipped due to failure.") println("The global RNG seed was 0x$(string(seed, base = 16)).\n") From 55c40ce52eb5c249efdff421101190b2a111d541 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Mon, 16 Sep 2024 13:25:20 -0400 Subject: [PATCH 48/57] move null check in `unsafe_convert` of RefValue (#55766) LLVM can optimize out this check but our optimizer can't, so this leads to smaller IR in most cases. --- base/refvalue.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/refvalue.jl b/base/refvalue.jl index 000088ff0ce76..7a0f2f84e2206 100644 --- a/base/refvalue.jl +++ b/base/refvalue.jl @@ -46,9 +46,9 @@ function unsafe_convert(P::Union{Type{Ptr{T}},Type{Ptr{Cvoid}}}, b::RefValue{T}) # Instead, explicitly load the pointer from the `RefValue`, # which also ensures this returns same pointer as the one rooted in the `RefValue` object. p = atomic_pointerref(Ptr{Ptr{Cvoid}}(pointer_from_objref(b)), :monotonic) - end - if p == C_NULL - throw(UndefRefError()) + if p == C_NULL + throw(UndefRefError()) + end end return p end From 753296e89ddc484e54937ce7195a3f152fd5a14a Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Mon, 16 Sep 2024 15:58:40 -0300 Subject: [PATCH 49/57] Fix hang in tmerge_types_slow (#55757) Fixes https://github.com/JuliaLang/julia/issues/55751 Co-authored-by: Jameson Nash --- base/compiler/typelimits.jl | 1 + test/compiler/inference.jl | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl index 91a44d3b117ab..3d0e5f3d0877d 100644 --- a/base/compiler/typelimits.jl +++ b/base/compiler/typelimits.jl @@ -831,6 +831,7 @@ end typenames[i] = Any.name simplify[i] = false types[j] = widen + typenames[j] = ijname break end end diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl index 9454c53a09fb7..d1382d3c84b82 100644 --- a/test/compiler/inference.jl +++ b/test/compiler/inference.jl @@ -6141,3 +6141,14 @@ end == TypeError @test Base.infer_exception_type((Vector{Any},)) do args Core.throw_methoderror(args...) end == Union{MethodError,ArgumentError} + +# Issue https://github.com/JuliaLang/julia/issues/55751 + +abstract type AbstractGrid55751{T, N} <: AbstractArray{T, N} end +struct Grid55751{T, N, AT} <: AbstractGrid55751{T, N} + axes::AT +end + +t155751 = Union{AbstractArray{UInt8, 4}, Array{Float32, 4}, Grid55751{Float32, 3, _A} where _A} +t255751 = Array{Float32, 3} +@test Core.Compiler.tmerge_types_slow(t155751,t255751) == AbstractArray # shouldn't hang From 5aad7617c3ac49155d03748a451f56215b28dec4 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Mon, 16 Sep 2024 15:08:57 -0400 Subject: [PATCH 50/57] trace-compile: color recompilation yellow (#55763) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marks recompilation of a method that produced a `precompile` statement as yellow, or if color isn't supported adds a trailing comment: `# recompilation`. The coloring matches the `@time_imports` coloring. i.e. an excerpt of ``` % ./julia --start=no --trace-compile=stderr --trace-compile-timing -e "using InteractiveUtils; @time @time_imports using Plots" ``` ![Screenshot 2024-09-13 at 5 04 24 PM](https://github.com/user-attachments/assets/85bd99e0-586e-4070-994f-2d845be0d9e7) --- NEWS.md | 1 + doc/man/julia.1 | 3 ++- doc/src/manual/command-line-interface.md | 2 +- src/gf.c | 21 ++++++++++++++++----- src/jloptions.c | 4 +++- 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/NEWS.md b/NEWS.md index c12cc3c64300c..9ecdd87f0c2bb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -56,6 +56,7 @@ variables. ([#53742]). * `--project=@temp` starts Julia with a temporary environment. * New `--trace-compile-timing` option to report how long each method reported by `--trace-compile` took to compile, in ms. ([#54662]) +* `--trace-compile` now prints recompiled methods in yellow or with a trailing comment if color is not supported ([#55763]) Multi-threading changes ----------------------- diff --git a/doc/man/julia.1 b/doc/man/julia.1 index ebac4362b39a6..536a23bd37894 100644 --- a/doc/man/julia.1 +++ b/doc/man/julia.1 @@ -283,7 +283,8 @@ Generate an incremental output file (rather than complete) .TP --trace-compile={stderr|name} -Print precompile statements for methods compiled during execution or save to a path +Print precompile statements for methods compiled during execution or save to stderr or a path. +Methods that were recompiled are printed in yellow or with a trailing comment if color is not supported .TP --trace-compile-timing= diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md index d1ed576c42a4f..41c3eacd61d26 100644 --- a/doc/src/manual/command-line-interface.md +++ b/doc/src/manual/command-line-interface.md @@ -214,7 +214,7 @@ The following is a complete list of command-line switches available when launchi |`--output-bc ` |Generate LLVM bitcode (.bc)| |`--output-asm ` |Generate an assembly file (.s)| |`--output-incremental={yes\|no*}` |Generate an incremental output file (rather than complete)| -|`--trace-compile={stderr\|name}` |Print precompile statements for methods compiled during execution or save to a path| +|`--trace-compile={stderr\|name}` |Print precompile statements for methods compiled during execution or save to stderr or a path. Methods that were recompiled are printed in yellow or with a trailing comment if color is not supported| |`--trace-compile-timing` |If --trace-compile is enabled show how long each took to compile in ms| |`--image-codegen` |Force generate code in imaging mode| |`--permalloc-pkgimg={yes\|no*}` |Copy the data section of package images into memory| diff --git a/src/gf.c b/src/gf.c index 970cb62b8a862..e6f5b4ee007f7 100644 --- a/src/gf.c +++ b/src/gf.c @@ -2514,7 +2514,7 @@ jl_code_instance_t *jl_method_inferred_with_abi(jl_method_instance_t *mi JL_PROP jl_mutex_t precomp_statement_out_lock; -static void record_precompile_statement(jl_method_instance_t *mi, double compilation_time) +static void record_precompile_statement(jl_method_instance_t *mi, double compilation_time, int is_recompile) { static ios_t f_precompile; static JL_STREAM* s_precompile = NULL; @@ -2539,11 +2539,22 @@ static void record_precompile_statement(jl_method_instance_t *mi, double compila } } if (!jl_has_free_typevars(mi->specTypes)) { + if (is_recompile && s_precompile == JL_STDERR && jl_options.color != JL_OPTIONS_COLOR_OFF) + jl_printf(s_precompile, "\e[33m"); if (jl_options.trace_compile_timing) jl_printf(s_precompile, "#= %6.1f ms =# ", compilation_time / 1e6); jl_printf(s_precompile, "precompile("); jl_static_show(s_precompile, mi->specTypes); - jl_printf(s_precompile, ")\n"); + jl_printf(s_precompile, ")"); + if (is_recompile) { + if (s_precompile == JL_STDERR && jl_options.color != JL_OPTIONS_COLOR_OFF) { + jl_printf(s_precompile, "\e[0m"); + } + else { + jl_printf(s_precompile, " # recompile"); + } + } + jl_printf(s_precompile, "\n"); if (s_precompile != JL_STDERR) ios_flush(&f_precompile); } @@ -2674,7 +2685,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t // unspec is probably not specsig, but might be using specptr jl_atomic_store_relaxed(&codeinst->specsigflags, specsigflags & ~0b1); // clear specsig flag jl_mi_cache_insert(mi, codeinst); - record_precompile_statement(mi, 0); + record_precompile_statement(mi, 0, 0); return codeinst; } } @@ -2691,7 +2702,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t 0, 1, ~(size_t)0, 0, jl_nothing, 0, NULL); jl_atomic_store_release(&codeinst->invoke, jl_fptr_interpret_call); jl_mi_cache_insert(mi, codeinst); - record_precompile_statement(mi, 0); + record_precompile_statement(mi, 0, 0); return codeinst; } if (compile_option == JL_OPTIONS_COMPILE_OFF) { @@ -2740,7 +2751,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t codeinst = NULL; } else if (did_compile && codeinst->owner == jl_nothing) { - record_precompile_statement(mi, compile_time); + record_precompile_statement(mi, compile_time, is_recompile); } JL_GC_POP(); } diff --git a/src/jloptions.c b/src/jloptions.c index 4cdec2c7b367f..f63f4de020e26 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -258,7 +258,9 @@ static const char opts_hidden[] = " --output-incremental={yes|no*} Generate an incremental output file (rather than\n" " complete)\n" " --trace-compile={stderr|name} Print precompile statements for methods compiled\n" - " during execution or save to a path\n" + " during execution or save to stderr or a path. Methods that\n" + " were recompiled are printed in yellow or with a trailing\n" + " comment if color is not supported\n" " --trace-compile-timing If --trace-compile is enabled show how long each took to\n" " compile in ms\n" " --image-codegen Force generate code in imaging mode\n" From 02549d5c54ec8ac8c7e62ea470803350bb3d1899 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 17 Sep 2024 04:05:24 -0400 Subject: [PATCH 51/57] Use PrecompileTools mechanics to compile REPL (#55782) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes https://github.com/JuliaLang/julia/issues/55778 Based on discussion here https://github.com/JuliaLang/julia/issues/55778#issuecomment-2352428043 With this `?reinterpret` feels instant, with only these precompiles at the start. ![Screenshot 2024-09-16 at 9 49 39 AM](https://github.com/user-attachments/assets/20dc016d-c6f7-4870-acd7-0e795dcf541b) --- stdlib/REPL/src/precompile.jl | 39 ++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/stdlib/REPL/src/precompile.jl b/stdlib/REPL/src/precompile.jl index c42def9078759..6bcec6415ba96 100644 --- a/stdlib/REPL/src/precompile.jl +++ b/stdlib/REPL/src/precompile.jl @@ -13,7 +13,7 @@ finally Base._track_dependencies[] = true end -let +function repl_workload() # these are intentionally triggered allowed_errors = [ "BoundsError: attempt to access 0-element Vector{Any} at index [1]", @@ -175,9 +175,38 @@ let nothing end -precompile(Tuple{typeof(Base.setindex!), Base.Dict{Any, Any}, Any, Int}) -precompile(Tuple{typeof(Base.delete!), Base.Set{Any}, String}) -precompile(Tuple{typeof(Base.:(==)), Char, String}) -precompile(Tuple{typeof(Base.reseteof), Base.TTY}) +# Copied from PrecompileTools.jl +let + function check_edges(node) + parentmi = node.mi_info.mi + for child in node.children + childmi = child.mi_info.mi + if !(isdefined(childmi, :backedges) && parentmi ∈ childmi.backedges) + precompile(childmi.specTypes) + end + check_edges(child) + end + end + + if Base.generating_output() && Base.JLOptions().use_pkgimages != 0 + Core.Compiler.Timings.reset_timings() + Core.Compiler.__set_measure_typeinf(true) + try + repl_workload() + finally + Core.Compiler.__set_measure_typeinf(false) + Core.Compiler.Timings.close_current_timer() + end + roots = Core.Compiler.Timings._timings[1].children + for child in roots + precompile(child.mi_info.mi.specTypes) + check_edges(child) + end + precompile(Tuple{typeof(Base.setindex!), Base.Dict{Any, Any}, Any, Int}) + precompile(Tuple{typeof(Base.delete!), Base.Set{Any}, String}) + precompile(Tuple{typeof(Base.:(==)), Char, String}) + precompile(Tuple{typeof(Base.reseteof), Base.TTY}) + end +end end # Precompile From f8086062906d80ef56c0dcd595a13438ef028293 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Wed, 18 Sep 2024 00:41:52 +0900 Subject: [PATCH 52/57] use `inferencebarrier` instead of `invokelatest` for 1-arg `@assert` (#55783) This version would be better as per this comment: I confirmed this still allows us to avoid invalidations reported at JuliaLang/julia#55583. --- base/error.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/base/error.jl b/base/error.jl index ee533cee0b57d..c49ede624607d 100644 --- a/base/error.jl +++ b/base/error.jl @@ -232,12 +232,14 @@ macro assert(ex, msgs...) msg = msg # pass-through elseif !isempty(msgs) && (isa(msg, Expr) || isa(msg, Symbol)) # message is an expression needing evaluating - msg = :(Main.Base.invokelatest(Main.Base.string, $(esc(msg)))) + # N.B. To reduce the risk of invalidation caused by the complex callstack involved + # with `string`, use `inferencebarrier` here to hide this `string` from the compiler. + msg = :(Main.Base.inferencebarrier(Main.Base.string)($(esc(msg)))) elseif isdefined(Main, :Base) && isdefined(Main.Base, :string) && applicable(Main.Base.string, msg) msg = Main.Base.string(msg) else # string() might not be defined during bootstrap - msg = :(Main.Base.invokelatest(_assert_tostring, $(Expr(:quote,msg)))) + msg = :(Main.Base.inferencebarrier(_assert_tostring)($(Expr(:quote,msg)))) end return :($(esc(ex)) ? $(nothing) : throw(AssertionError($msg))) end From 61c044ca4fe35ed357c77fc50d9a8cf70f6724b0 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Tue, 17 Sep 2024 13:18:44 -0300 Subject: [PATCH 53/57] Inline statically known method errors. (#54972) This replaces the `Expr(:call, ...)` with a call of a new builtin `Core.throw_methoderror` This is useful because it makes very clear if something is a static method error or a plain dynamic dispatch that always errors. Tools such as AllocCheck or juliac can notice that this is not a genuine dynamic dispatch, and prevent it from becoming a false positive compile-time error. Dependent on https://github.com/JuliaLang/julia/pull/55705 --------- Co-authored-by: Cody Tapscott --- base/compiler/abstractinterpretation.jl | 41 ++++++++------- base/compiler/ssair/inlining.jl | 52 +++++++++++-------- base/compiler/stmtinfo.jl | 17 ++++-- base/compiler/tfuncs.jl | 7 ++- base/compiler/types.jl | 6 +++ test/compiler/AbstractInterpreter.jl | 9 ++++ .../compiler/EscapeAnalysis/EscapeAnalysis.jl | 4 +- test/compiler/inline.jl | 39 +++++++++----- test/threads_exec.jl | 1 + 9 files changed, 111 insertions(+), 65 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index bb5f2dd1ad180..f126389c42d2d 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -209,8 +209,7 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f), rettype = exctype = Any all_effects = Effects() else - if (matches isa MethodMatches ? (!matches.fullmatch || any_ambig(matches)) : - (!all(matches.fullmatches) || any_ambig(matches))) + if !fully_covering(matches) || any_ambig(matches) # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature. all_effects = Effects(all_effects; nothrow=false) exctype = exctype ⊔ₚ MethodError @@ -275,21 +274,23 @@ struct MethodMatches applicable::Vector{Any} info::MethodMatchInfo valid_worlds::WorldRange - mt::MethodTable - fullmatch::Bool end -any_ambig(info::MethodMatchInfo) = info.results.ambig +any_ambig(result::MethodLookupResult) = result.ambig +any_ambig(info::MethodMatchInfo) = any_ambig(info.results) any_ambig(m::MethodMatches) = any_ambig(m.info) +fully_covering(info::MethodMatchInfo) = info.fullmatch +fully_covering(m::MethodMatches) = fully_covering(m.info) struct UnionSplitMethodMatches applicable::Vector{Any} applicable_argtypes::Vector{Vector{Any}} info::UnionSplitInfo valid_worlds::WorldRange - mts::Vector{MethodTable} - fullmatches::Vector{Bool} end -any_ambig(m::UnionSplitMethodMatches) = any(any_ambig, m.info.matches) +any_ambig(info::UnionSplitInfo) = any(any_ambig, info.matches) +any_ambig(m::UnionSplitMethodMatches) = any_ambig(m.info) +fully_covering(info::UnionSplitInfo) = all(info.fullmatches) +fully_covering(m::UnionSplitMethodMatches) = fully_covering(m.info) function find_method_matches(interp::AbstractInterpreter, argtypes::Vector{Any}, @nospecialize(atype); max_union_splitting::Int = InferenceParams(interp).max_union_splitting, @@ -307,7 +308,7 @@ is_union_split_eligible(𝕃::AbstractLattice, argtypes::Vector{Any}, max_union_ function find_union_split_method_matches(interp::AbstractInterpreter, argtypes::Vector{Any}, @nospecialize(atype), max_methods::Int) split_argtypes = switchtupleunion(typeinf_lattice(interp), argtypes) - infos = MethodMatchInfo[] + infos = MethodLookupResult[] applicable = Any[] applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match valid_worlds = WorldRange() @@ -323,29 +324,29 @@ function find_union_split_method_matches(interp::AbstractInterpreter, argtypes:: if matches === nothing return FailedMethodMatch("For one of the union split cases, too many methods matched") end - push!(infos, MethodMatchInfo(matches)) + push!(infos, matches) for m in matches push!(applicable, m) push!(applicable_argtypes, arg_n) end valid_worlds = intersect(valid_worlds, matches.valid_worlds) thisfullmatch = any(match::MethodMatch->match.fully_covers, matches) - found = false + mt_found = false for (i, mt′) in enumerate(mts) if mt′ === mt fullmatches[i] &= thisfullmatch - found = true + mt_found = true break end end - if !found + if !mt_found push!(mts, mt) push!(fullmatches, thisfullmatch) end end - info = UnionSplitInfo(infos) + info = UnionSplitInfo(infos, mts, fullmatches) return UnionSplitMethodMatches( - applicable, applicable_argtypes, info, valid_worlds, mts, fullmatches) + applicable, applicable_argtypes, info, valid_worlds) end function find_simple_method_matches(interp::AbstractInterpreter, @nospecialize(atype), max_methods::Int) @@ -360,10 +361,9 @@ function find_simple_method_matches(interp::AbstractInterpreter, @nospecialize(a # (assume this will always be true, so we don't compute / update valid age in this case) return FailedMethodMatch("Too many methods matched") end - info = MethodMatchInfo(matches) fullmatch = any(match::MethodMatch->match.fully_covers, matches) - return MethodMatches( - matches.matches, info, matches.valid_worlds, mt, fullmatch) + info = MethodMatchInfo(matches, mt, fullmatch) + return MethodMatches(matches.matches, info, matches.valid_worlds) end """ @@ -584,9 +584,10 @@ function add_call_backedges!(interp::AbstractInterpreter, @nospecialize(rettype) # also need an edge to the method table in case something gets # added that did not intersect with any existing method if isa(matches, MethodMatches) - matches.fullmatch || add_mt_backedge!(sv, matches.mt, atype) + fully_covering(matches) || add_mt_backedge!(sv, matches.info.mt, atype) else - for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts) + matches::UnionSplitMethodMatches + for (thisfullmatch, mt) in zip(matches.info.fullmatches, matches.info.mts) thisfullmatch || add_mt_backedge!(sv, mt, atype) end end diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 70318b9e1a979..727e015b67062 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -50,12 +50,13 @@ struct InliningCase end struct UnionSplit - fully_covered::Bool + handled_all_cases::Bool # All possible dispatches are included in the cases + fully_covered::Bool # All handled cases are fully covering atype::DataType cases::Vector{InliningCase} bbs::Vector{Int} - UnionSplit(fully_covered::Bool, atype::DataType, cases::Vector{InliningCase}) = - new(fully_covered, atype, cases, Int[]) + UnionSplit(handled_all_cases::Bool, fully_covered::Bool, atype::DataType, cases::Vector{InliningCase}) = + new(handled_all_cases, fully_covered, atype, cases, Int[]) end struct InliningEdgeTracker @@ -215,7 +216,7 @@ end function cfg_inline_unionsplit!(ir::IRCode, idx::Int, union_split::UnionSplit, state::CFGInliningState, params::OptimizationParams) - (; fully_covered, #=atype,=# cases, bbs) = union_split + (; handled_all_cases, fully_covered, #=atype,=# cases, bbs) = union_split inline_into_block!(state, block_for_inst(ir, idx)) from_bbs = Int[] delete!(state.split_targets, length(state.new_cfg_blocks)) @@ -235,7 +236,7 @@ function cfg_inline_unionsplit!(ir::IRCode, idx::Int, union_split::UnionSplit, end end push!(from_bbs, length(state.new_cfg_blocks)) - if !(i == length(cases) && fully_covered) + if !(i == length(cases) && (handled_all_cases && fully_covered)) # This block will have the next condition or the final else case push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx))) push!(state.new_cfg_blocks[cond_bb].succs, length(state.new_cfg_blocks)) @@ -244,7 +245,10 @@ function cfg_inline_unionsplit!(ir::IRCode, idx::Int, union_split::UnionSplit, end end # The edge from the fallback block. - fully_covered || push!(from_bbs, length(state.new_cfg_blocks)) + # NOTE This edge is only required for `!handled_all_cases` and not `!fully_covered`, + # since in the latter case we inline `Core.throw_methoderror` into the fallback + # block, which is must-throw, making the subsequent code path unreachable. + !handled_all_cases && push!(from_bbs, length(state.new_cfg_blocks)) # This block will be the block everyone returns to push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx), from_bbs, orig_succs)) join_bb = length(state.new_cfg_blocks) @@ -523,7 +527,7 @@ assuming their order stays the same post-discovery in `ml_matches`. function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, union_split::UnionSplit, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int,Int}}, interp::AbstractInterpreter) - (; fully_covered, atype, cases, bbs) = union_split + (; handled_all_cases, fully_covered, atype, cases, bbs) = union_split stmt, typ, line = compact.result[idx][:stmt], compact.result[idx][:type], compact.result[idx][:line] join_bb = bbs[end] pn = PhiNode() @@ -538,7 +542,7 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs:: cond = true nparams = fieldcount(atype) @assert nparams == fieldcount(mtype) - if !(i == ncases && fully_covered) + if !(i == ncases && fully_covered && handled_all_cases) for i = 1:nparams aft, mft = fieldtype(atype, i), fieldtype(mtype, i) # If this is always true, we don't need to check for it @@ -597,14 +601,18 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs:: end bb += 1 # We're now in the fall through block, decide what to do - if !fully_covered + if !handled_all_cases ssa = insert_node_here!(compact, NewInstruction(stmt, typ, line)) push!(pn.edges, bb) push!(pn.values, ssa) insert_node_here!(compact, NewInstruction(GotoNode(join_bb), Any, line)) finish_current_bb!(compact, 0) + elseif !fully_covered + insert_node_here!(compact, NewInstruction(Expr(:call, GlobalRef(Core, :throw_methoderror), argexprs...), Union{}, line)) + insert_node_here!(compact, NewInstruction(ReturnNode(), Union{}, line)) + finish_current_bb!(compact, 0) + ncases == 0 && return insert_node_here!(compact, NewInstruction(nothing, Any, line)) end - # We're now in the join block. return insert_node_here!(compact, NewInstruction(pn, typ, line)) end @@ -1348,10 +1356,6 @@ function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt32, sig # Too many applicable methods # Or there is a (partial?) ambiguity return nothing - elseif length(meth) == 0 - # No applicable methods; try next union split - handled_all_cases = false - continue end local split_fully_covered = false for (j, match) in enumerate(meth) @@ -1392,12 +1396,16 @@ function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt32, sig handled_all_cases &= handle_any_const_result!(cases, result, match, argtypes, info, flag, state; allow_typevars=true) end + if !fully_covered + atype = argtypes_to_type(sig.argtypes) + # We will emit an inline MethodError so we need a backedge to the MethodTable + add_uncovered_edges!(state.edges, info, atype) + end elseif !isempty(cases) # if we've not seen all candidates, union split is valid only for dispatch tuples filter!(case::InliningCase->isdispatchtuple(case.sig), cases) end - - return cases, (handled_all_cases & fully_covered), joint_effects + return cases, handled_all_cases, fully_covered, joint_effects end function handle_call!(todo::Vector{Pair{Int,Any}}, @@ -1405,9 +1413,9 @@ function handle_call!(todo::Vector{Pair{Int,Any}}, state::InliningState) cases = compute_inlining_cases(info, flag, sig, state) cases === nothing && return nothing - cases, all_covered, joint_effects = cases + cases, handled_all_cases, fully_covered, joint_effects = cases atype = argtypes_to_type(sig.argtypes) - handle_cases!(todo, ir, idx, stmt, atype, cases, all_covered, joint_effects) + handle_cases!(todo, ir, idx, stmt, atype, cases, handled_all_cases, fully_covered, joint_effects) end function handle_match!(cases::Vector{InliningCase}, @@ -1496,19 +1504,19 @@ function concrete_result_item(result::ConcreteResult, @nospecialize(info::CallIn end function handle_cases!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, stmt::Expr, - @nospecialize(atype), cases::Vector{InliningCase}, all_covered::Bool, + @nospecialize(atype), cases::Vector{InliningCase}, handled_all_cases::Bool, fully_covered::Bool, joint_effects::Effects) # If we only have one case and that case is fully covered, we may either # be able to do the inlining now (for constant cases), or push it directly # onto the todo list - if all_covered && length(cases) == 1 + if fully_covered && handled_all_cases && length(cases) == 1 handle_single_case!(todo, ir, idx, stmt, cases[1].item) - elseif length(cases) > 0 + elseif length(cases) > 0 || handled_all_cases isa(atype, DataType) || return nothing for case in cases isa(case.sig, DataType) || return nothing end - push!(todo, idx=>UnionSplit(all_covered, atype, cases)) + push!(todo, idx=>UnionSplit(handled_all_cases, fully_covered, atype, cases)) else add_flag!(ir[SSAValue(idx)], flags_for_effects(joint_effects)) end diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl index 69d2ac7ae45a0..33fca90b6261e 100644 --- a/base/compiler/stmtinfo.jl +++ b/base/compiler/stmtinfo.jl @@ -33,10 +33,13 @@ not a call to a generic function. """ struct MethodMatchInfo <: CallInfo results::MethodLookupResult + mt::MethodTable + fullmatch::Bool end nsplit_impl(info::MethodMatchInfo) = 1 getsplit_impl(info::MethodMatchInfo, idx::Int) = (@assert idx == 1; info.results) getresult_impl(::MethodMatchInfo, ::Int) = nothing +add_uncovered_edges_impl(edges::Vector{Any}, info::MethodMatchInfo, @nospecialize(atype)) = (!info.fullmatch && push!(edges, info.mt, atype); ) """ info::UnionSplitInfo <: CallInfo @@ -48,20 +51,27 @@ each partition (`info.matches::Vector{MethodMatchInfo}`). This info is illegal on any statement that is not a call to a generic function. """ struct UnionSplitInfo <: CallInfo - matches::Vector{MethodMatchInfo} + matches::Vector{MethodLookupResult} + mts::Vector{MethodTable} + fullmatches::Vector{Bool} end nmatches(info::MethodMatchInfo) = length(info.results) function nmatches(info::UnionSplitInfo) n = 0 for mminfo in info.matches - n += nmatches(mminfo) + n += length(mminfo) end return n end nsplit_impl(info::UnionSplitInfo) = length(info.matches) -getsplit_impl(info::UnionSplitInfo, idx::Int) = getsplit_impl(info.matches[idx], 1) +getsplit_impl(info::UnionSplitInfo, idx::Int) = info.matches[idx] getresult_impl(::UnionSplitInfo, ::Int) = nothing +function add_uncovered_edges_impl(edges::Vector{Any}, info::UnionSplitInfo, @nospecialize(atype)) + for (mt, fullmatch) in zip(info.mts, info.fullmatches) + !fullmatch && push!(edges, mt, atype) + end +end abstract type ConstResult end @@ -105,6 +115,7 @@ end nsplit_impl(info::ConstCallInfo) = nsplit(info.call) getsplit_impl(info::ConstCallInfo, idx::Int) = getsplit(info.call, idx) getresult_impl(info::ConstCallInfo, idx::Int) = info.results[idx] +add_uncovered_edges_impl(edges::Vector{Any}, info::ConstCallInfo, @nospecialize(atype)) = add_uncovered_edges!(edges, info.call, atype) """ info::MethodResultPure <: CallInfo diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index 64a93bd07c2fa..6bb73ded8660d 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -2983,9 +2983,9 @@ function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any}, # also need an edge to the method table in case something gets # added that did not intersect with any existing method if isa(matches, MethodMatches) - matches.fullmatch || add_mt_backedge!(sv, matches.mt, atype) + fully_covering(matches) || add_mt_backedge!(sv, matches.info.mt, atype) else - for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts) + for (thisfullmatch, mt) in zip(matches.info.fullmatches, matches.info.mts) thisfullmatch || add_mt_backedge!(sv, mt, atype) end end @@ -3001,8 +3001,7 @@ function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any}, add_backedge!(sv, edge) end - if isa(matches, MethodMatches) ? (!matches.fullmatch || any_ambig(matches)) : - (!all(matches.fullmatches) || any_ambig(matches)) + if !fully_covering(matches) || any_ambig(matches) # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature. rt = Bool end diff --git a/base/compiler/types.jl b/base/compiler/types.jl index f315b7968fd9b..015b1dbc00a6f 100644 --- a/base/compiler/types.jl +++ b/base/compiler/types.jl @@ -450,10 +450,16 @@ abstract type CallInfo end nsplit(info::CallInfo) = nsplit_impl(info)::Union{Nothing,Int} getsplit(info::CallInfo, idx::Int) = getsplit_impl(info, idx)::MethodLookupResult +add_uncovered_edges!(edges::Vector{Any}, info::CallInfo, @nospecialize(atype)) = add_uncovered_edges_impl(edges, info, atype) + getresult(info::CallInfo, idx::Int) = getresult_impl(info, idx) +# must implement `nsplit`, `getsplit`, and `add_uncovered_edges!` to opt in to inlining nsplit_impl(::CallInfo) = nothing getsplit_impl(::CallInfo, ::Int) = error("unexpected call into `getsplit`") +add_uncovered_edges_impl(edges::Vector{Any}, info::CallInfo, @nospecialize(atype)) = error("unexpected call into `add_uncovered_edges!`") + +# must implement `getresult` to opt in to extended lattice return information getresult_impl(::CallInfo, ::Int) = nothing @specialize diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl index d95354cefa80c..e92b67f980942 100644 --- a/test/compiler/AbstractInterpreter.jl +++ b/test/compiler/AbstractInterpreter.jl @@ -409,6 +409,7 @@ end CC.nsplit_impl(info::NoinlineCallInfo) = CC.nsplit(info.info) CC.getsplit_impl(info::NoinlineCallInfo, idx::Int) = CC.getsplit(info.info, idx) CC.getresult_impl(info::NoinlineCallInfo, idx::Int) = CC.getresult(info.info, idx) +CC.add_uncovered_edges_impl(edges::Vector{Any}, info::NoinlineCallInfo, @nospecialize(atype)) = CC.add_uncovered_edges!(edges, info.info, atype) function CC.abstract_call(interp::NoinlineInterpreter, arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int) @@ -431,6 +432,8 @@ end @inline function inlined_usually(x, y, z) return x * y + z end +foo_split(x::Float64) = 1 +foo_split(x::Int) = 2 # check if the inlining algorithm works as expected let src = code_typed1((Float64,Float64,Float64)) do x, y, z @@ -444,6 +447,7 @@ let NoinlineModule = Module() main_func(x, y, z) = inlined_usually(x, y, z) @eval NoinlineModule noinline_func(x, y, z) = $inlined_usually(x, y, z) @eval OtherModule other_func(x, y, z) = $inlined_usually(x, y, z) + @eval NoinlineModule bar_split_error() = $foo_split(Core.compilerbarrier(:type, nothing)) interp = NoinlineInterpreter(Set((NoinlineModule,))) @@ -473,6 +477,11 @@ let NoinlineModule = Module() @test count(isinvoke(:inlined_usually), src.code) == 0 @test count(iscall((src, inlined_usually)), src.code) == 0 end + + let src = code_typed1(NoinlineModule.bar_split_error) + @test count(iscall((src, foo_split)), src.code) == 0 + @test count(iscall((src, Core.throw_methoderror)), src.code) > 0 + end end # Make sure that Core.Compiler has enough NamedTuple infrastructure diff --git a/test/compiler/EscapeAnalysis/EscapeAnalysis.jl b/test/compiler/EscapeAnalysis/EscapeAnalysis.jl index 31c21f7228014..8c3e065818208 100644 --- a/test/compiler/EscapeAnalysis/EscapeAnalysis.jl +++ b/test/compiler/EscapeAnalysis/EscapeAnalysis.jl @@ -2240,13 +2240,13 @@ end # accounts for ThrownEscape via potential MethodError # no method error -@noinline identity_if_string(x::SafeRef) = (println("preventing inlining"); nothing) +@noinline identity_if_string(x::SafeRef{<:AbstractString}) = (println("preventing inlining"); nothing) let result = code_escapes((SafeRef{String},)) do x identity_if_string(x) end @test has_no_escape(ignore_argescape(result.state[Argument(2)])) end -let result = code_escapes((Union{SafeRef{String},Nothing},)) do x +let result = code_escapes((SafeRef,)) do x identity_if_string(x) end i = only(findall(iscall((result.ir, identity_if_string)), result.ir.stmts.stmt)) diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index a8b5fd66dcd0d..80c8ddbb08c69 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -876,7 +876,7 @@ let src = code_typed1((Any,)) do x abstract_unionsplit_fallback(x) end @test count(isinvoke(:abstract_unionsplit_fallback), src.code) == 2 - @test count(iscall((src, abstract_unionsplit_fallback)), src.code) == 1 # fallback dispatch + @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error end let src = code_typed1((Union{Type,Number},)) do x abstract_unionsplit_fallback(x) @@ -912,7 +912,7 @@ let src = code_typed1((Any,)) do x @test count(iscall((src, typeof)), src.code) == 2 @test count(isinvoke(:println), src.code) == 0 @test count(iscall((src, println)), src.code) == 0 - @test count(iscall((src, abstract_unionsplit_fallback)), src.code) == 1 # fallback dispatch + @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error end let src = code_typed1((Union{Type,Number},)) do x abstract_unionsplit_fallback(false, x) @@ -960,8 +960,8 @@ let # aggressive inlining of single, abstract method match end |> only |> first # both callsites should be inlined @test count(isinvoke(:has_free_typevars), src.code) == 2 - # `isGoodType(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted - @test count(iscall((src,isGoodType)), src.code) == 1 + # `isGoodType(y::Any)` isn't fully covered, so the fallback is a method error + @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error end @inline isGoodType2(cnd, @nospecialize x::Type) = @@ -973,8 +973,8 @@ let # aggressive inlining of single, abstract method match (with constant-prop'e # both callsite should be inlined with constant-prop'ed result @test count(isinvoke(:isType), src.code) == 2 @test count(isinvoke(:has_free_typevars), src.code) == 0 - # `isGoodType(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted - @test count(iscall((src,isGoodType2)), src.code) == 1 + # `isGoodType(y::Any)` isn't fully covered, thus a MethodError gets inserted + @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error end @noinline function checkBadType!(@nospecialize x::Type) @@ -989,8 +989,8 @@ let # aggressive static dispatch of single, abstract method match end |> only |> first # both callsites should be resolved statically @test count(isinvoke(:checkBadType!), src.code) == 2 - # `checkBadType!(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted - @test count(iscall((src,checkBadType!)), src.code) == 1 + # `checkBadType!(y::Any)` isn't fully covered, thus a MethodError gets inserted + @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error end @testset "late_inline_special_case!" begin @@ -2004,7 +2004,7 @@ f48397(::Tuple{String,String}) = :ok let src = code_typed1((Union{Bool,Tuple{String,Any}},)) do x f48397(x) end - @test any(iscall((src, f48397)), src.code) + @test any(iscall((src, Core.throw_methoderror)), src.code) # fallback method error) end g48397::Union{Bool,Tuple{String,Any}} = ("48397", 48397) let res = @test_throws MethodError let @@ -2175,11 +2175,6 @@ let src = code_typed1() do @test count(isinvoke(:iterate), src.code) == 0 end -# JuliaLang/julia#53062: proper `joint_effects` for call with empty method matches -let ir = first(only(Base.code_ircode(setproperty!, (Base.RefValue{Int},Symbol,Base.RefValue{Int})))) - i = findfirst(iscall((ir, convert)), ir.stmts.stmt)::Int - @test iszero(ir.stmts.flag[i] & Core.Compiler.IR_FLAG_NOTHROW) -end function issue53062(cond) x = Ref{Int}(0) if cond @@ -2214,3 +2209,19 @@ let ir = Base.code_ircode((Issue52644,); optimize_until="Inlining") do t @test irfunc(Issue52644(Tuple{})) === :DataType @test_throws MethodError irfunc(Issue52644(Tuple{<:Integer})) end + +foo_split(x::Float64) = 1 +foo_split(x::Int) = 2 +bar_inline_error() = foo_split(nothing) +bar_split_error() = foo_split(Core.compilerbarrier(:type,nothing)) + +let src = code_typed1(bar_inline_error, Tuple{}) + # Should inline method errors + @test count(iscall((src, foo_split)), src.code) == 0 + @test count(iscall((src, Core.throw_methoderror)), src.code) > 0 +end +let src = code_typed1(bar_split_error, Tuple{}) + # Should inline method errors + @test count(iscall((src, foo_split)), src.code) == 0 + @test count(iscall((src, Core.throw_methoderror)), src.code) > 0 +end diff --git a/test/threads_exec.jl b/test/threads_exec.jl index 595f8991d58d7..ac54dd009390c 100644 --- a/test/threads_exec.jl +++ b/test/threads_exec.jl @@ -1235,6 +1235,7 @@ end @testset "throw=true" begin tasks, event = create_tasks() push!(tasks, Threads.@spawn error("Error")) + wait(tasks[end]; throw=false) @test_throws CompositeException begin waitany(convert_tasks(tasks_type, tasks); throw=true) From 48ddd2dc2859f41fc1a82876cc5654fb31d7b2ba Mon Sep 17 00:00:00 2001 From: norci Date: Wed, 18 Sep 2024 01:14:35 +0800 Subject: [PATCH 54/57] Fix shell `cd` error when working dir has been deleted (#41244) root cause: if current dir has been deleted, then pwd() will throw an IOError: pwd(): no such file or directory (ENOENT) --------- Co-authored-by: Ian Butterworth --- base/client.jl | 13 +++++++++---- test/file.jl | 20 ++++++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/base/client.jl b/base/client.jl index 2ca88c40aeb7e..a04556507d5dc 100644 --- a/base/client.jl +++ b/base/client.jl @@ -41,7 +41,6 @@ function repl_cmd(cmd, out) if isempty(cmd.exec) throw(ArgumentError("no cmd to execute")) elseif cmd.exec[1] == "cd" - new_oldpwd = pwd() if length(cmd.exec) > 2 throw(ArgumentError("cd method only takes one argument")) elseif length(cmd.exec) == 2 @@ -52,11 +51,17 @@ function repl_cmd(cmd, out) end dir = ENV["OLDPWD"] end - cd(dir) else - cd() + dir = homedir() end - ENV["OLDPWD"] = new_oldpwd + try + ENV["OLDPWD"] = pwd() + catch ex + ex isa IOError || rethrow() + # if current dir has been deleted, then pwd() will throw an IOError: pwd(): no such file or directory (ENOENT) + delete!(ENV, "OLDPWD") + end + cd(dir) println(out, pwd()) else @static if !Sys.iswindows() diff --git a/test/file.jl b/test/file.jl index de6d488056a02..de258c92e02bc 100644 --- a/test/file.jl +++ b/test/file.jl @@ -1908,6 +1908,26 @@ end end end +@testset "pwd tests" begin + mktempdir() do dir + cd(dir) do + withenv("OLDPWD" => nothing) do + io = IOBuffer() + Base.repl_cmd(@cmd("cd"), io) + Base.repl_cmd(@cmd("cd -"), io) + @test realpath(pwd()) == realpath(dir) + if !Sys.iswindows() + # Delete the working directory and check we can cd out of it + # Cannot delete the working directory on Windows + rm(dir) + @test_throws Base._UVError("pwd()", Base.UV_ENOENT) pwd() + Base.repl_cmd(@cmd("cd \\~"), io) + end + end + end + end +end + @testset "readdir tests" begin ≛(a, b) = sort(a) == sort(b) mktempdir() do dir From 00739173311983479fee3f5f826e4b763872860a Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Tue, 17 Sep 2024 19:47:48 -0400 Subject: [PATCH 55/57] codegen: fix bits compare for UnionAll (#55770) Fixes #55768 in two parts: one is making the type computation in emit_bits_compare agree with the parent function and two is not using the optimized egal code for UnionAll kinds, which is different from how the egal code itself works for kinds. --- src/codegen.cpp | 21 ++++++++++++--------- test/compiler/codegen.jl | 5 +++++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 73a5f844b31da..6ae4f56a53ee2 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -3484,25 +3484,26 @@ static size_t emit_masked_bits_compare(callback &emit_desc, jl_datatype_t *aty, static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t arg2) { ++EmittedBitsCompares; + jl_value_t *argty = (arg1.constant ? jl_typeof(arg1.constant) : arg1.typ); bool isboxed; Type *at = julia_type_to_llvm(ctx, arg1.typ, &isboxed); - assert(jl_is_datatype(arg1.typ) && arg1.typ == arg2.typ && !isboxed); + assert(jl_is_datatype(arg1.typ) && arg1.typ == (arg2.constant ? jl_typeof(arg2.constant) : arg2.typ) && !isboxed); if (type_is_ghost(at)) return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1); if (at->isIntegerTy() || at->isPointerTy() || at->isFloatingPointTy()) { Type *at_int = INTT(at, ctx.emission_context.DL); - Value *varg1 = emit_unbox(ctx, at_int, arg1, arg1.typ); - Value *varg2 = emit_unbox(ctx, at_int, arg2, arg2.typ); + Value *varg1 = emit_unbox(ctx, at_int, arg1, argty); + Value *varg2 = emit_unbox(ctx, at_int, arg2, argty); return ctx.builder.CreateICmpEQ(varg1, varg2); } if (at->isVectorTy()) { - jl_svec_t *types = ((jl_datatype_t*)arg1.typ)->types; + jl_svec_t *types = ((jl_datatype_t*)argty)->types; Value *answer = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1); - Value *varg1 = emit_unbox(ctx, at, arg1, arg1.typ); - Value *varg2 = emit_unbox(ctx, at, arg2, arg2.typ); + Value *varg1 = emit_unbox(ctx, at, arg1, argty); + Value *varg2 = emit_unbox(ctx, at, arg2, argty); for (size_t i = 0, l = jl_svec_len(types); i < l; i++) { jl_value_t *fldty = jl_svecref(types, i); Value *subAns, *fld1, *fld2; @@ -3517,7 +3518,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a } if (at->isAggregateType()) { // Struct or Array - jl_datatype_t *sty = (jl_datatype_t*)arg1.typ; + jl_datatype_t *sty = (jl_datatype_t*)argty; size_t sz = jl_datatype_size(sty); if (sz > 512 && !sty->layout->flags.haspadding && sty->layout->flags.isbitsegal) { Value *varg1 = arg1.ispointer() ? data_pointer(ctx, arg1) : @@ -3721,8 +3722,10 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva if (jl_type_intersection(rt1, rt2) == (jl_value_t*)jl_bottom_type) // types are disjoint (exhaustive test) return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0); - bool justbits1 = jl_is_concrete_immutable(rt1); - bool justbits2 = jl_is_concrete_immutable(rt2); + // can compare any concrete immutable by bits, except for UnionAll + // which has a special non-bits based egal + bool justbits1 = jl_is_concrete_immutable(rt1) && !jl_is_kind(rt1); + bool justbits2 = jl_is_concrete_immutable(rt2) && !jl_is_kind(rt2); if (justbits1 || justbits2) { // whether this type is unique'd by value return emit_nullcheck_guard2(ctx, nullcheck1, nullcheck2, [&] () -> Value* { jl_datatype_t *typ = (jl_datatype_t*)(justbits1 ? rt1 : rt2); diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl index 0260113044a3b..07308713bb789 100644 --- a/test/compiler/codegen.jl +++ b/test/compiler/codegen.jl @@ -999,3 +999,8 @@ for (T, StructName) in ((Int128, :Issue55558), (UInt128, :UIssue55558)) @test sizeof($(StructName)) == 48 broken=broken_i128 end end + +@noinline Base.@nospecializeinfer f55768(@nospecialize z::UnionAll) = z === Vector +@test f55768(Vector) +@test f55768(Vector{T} where T) +@test !f55768(Vector{S} where S) From 95a32db42c3cca841006836ddd88c18aac3afb5a Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Wed, 18 Sep 2024 17:51:34 -0300 Subject: [PATCH 56/57] use libuv to measure maxrss (#55806) Libuv has a wrapper around rusage on Unix (and its equivalent on Windows). We should probably use it. --- src/sys.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/src/sys.c b/src/sys.c index 712d232da363a..b54edc32b32b6 100644 --- a/src/sys.c +++ b/src/sys.c @@ -772,26 +772,11 @@ JL_DLLEXPORT jl_sym_t *jl_get_ARCH(void) JL_NOTSAFEPOINT JL_DLLEXPORT size_t jl_maxrss(void) { -#if defined(_OS_WINDOWS_) - PROCESS_MEMORY_COUNTERS counter; - GetProcessMemoryInfo( GetCurrentProcess( ), &counter, sizeof(counter) ); - return (size_t)counter.PeakWorkingSetSize; - -// FIXME: `rusage` is available on OpenBSD, DragonFlyBSD and NetBSD as well. -// All of them return `ru_maxrss` in kilobytes. -#elif defined(_OS_LINUX_) || defined(_OS_DARWIN_) || defined (_OS_FREEBSD_) || defined (_OS_OPENBSD_) - struct rusage rusage; - getrusage( RUSAGE_SELF, &rusage ); - -#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_) || defined (_OS_OPENBSD_) - return (size_t)(rusage.ru_maxrss * 1024); -#else - return (size_t)rusage.ru_maxrss; -#endif - -#else - return (size_t)0; -#endif + uv_rusage_t rusage; + if (uv_getrusage(&rusage) == 0) { + return rusage.ru_maxrss * 1024; + } + return 0; } // Simple `rand()` like function, with global seed and added thread-safety From e4c8d4f7976162dcf5eebc15d93d2408cb6d0666 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Wed, 18 Sep 2024 17:49:15 -0400 Subject: [PATCH 57/57] REPL: use atreplinit to change the active module during precompilation (#55805) --- stdlib/REPL/src/precompile.jl | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/stdlib/REPL/src/precompile.jl b/stdlib/REPL/src/precompile.jl index 6bcec6415ba96..f7961a205e0b1 100644 --- a/stdlib/REPL/src/precompile.jl +++ b/stdlib/REPL/src/precompile.jl @@ -37,9 +37,21 @@ function repl_workload() UP_ARROW = "\e[A" DOWN_ARROW = "\e[B" + # This is notified as soon as the first prompt appears + repl_init_event = Base.Event() + + atreplinit() do repl + # Main is closed so we can't evaluate in it, but atreplinit runs at + # a time that repl.mistate === nothing so REPL.activate fails. So do + # it async and wait for the first prompt to know its ready. + t = @async begin + wait(repl_init_event) + REPL.activate(REPL.Precompile; interactive_utils=false) + end + Base.errormonitor(t) + end + repl_script = """ - import REPL - REPL.activate(REPL.Precompile; interactive_utils=false) # Main is closed so we can't evaluate in it 2+2 print("") printstyled("a", "b") @@ -62,7 +74,6 @@ function repl_workload() [][1] Base.Iterators.minimum cd("complete_path\t\t$CTRL_C - REPL.activate(; interactive_utils=false) println("done") """ @@ -159,6 +170,7 @@ function repl_workload() occursin(HELP_PROMPT, strbuf) && break sleep(0.1) end + notify(repl_init_event) check_errors(strbuf) end write(debug_output, "\n#### COMPLETED - Closing REPL ####\n")